[pypy-svn] r54060 - in pypy/branch/gc-improvements/pypy/rpython/memory: gc test

arigo at codespeak.net arigo at codespeak.net
Wed Apr 23 18:00:19 CEST 2008


Author: arigo
Date: Wed Apr 23 18:00:18 2008
New Revision: 54060

Modified:
   pypy/branch/gc-improvements/pypy/rpython/memory/gc/generation.py
   pypy/branch/gc-improvements/pypy/rpython/memory/gc/hybrid.py
   pypy/branch/gc-improvements/pypy/rpython/memory/gc/semispace.py
   pypy/branch/gc-improvements/pypy/rpython/memory/test/test_gc.py
Log:
Probable improvement in the hybrid gc: the second time a non-young
object is copied, move it to raw_malloc'ed memory so that further copies
are not necessary.  To do: check if it would be better to do this only
after some more copies.


Modified: pypy/branch/gc-improvements/pypy/rpython/memory/gc/generation.py
==============================================================================
--- pypy/branch/gc-improvements/pypy/rpython/memory/gc/generation.py	(original)
+++ pypy/branch/gc-improvements/pypy/rpython/memory/gc/generation.py	Wed Apr 23 18:00:18 2008
@@ -373,13 +373,13 @@
         # weakref; otherwise invalidate the weakref
         while self.young_objects_with_weakrefs.non_empty():
             obj = self.young_objects_with_weakrefs.pop()
-            if not self.is_forwarded(obj):
+            if not self.surviving(obj):
                 continue # weakref itself dies
             obj = self.get_forwarding_address(obj)
             offset = self.weakpointer_offset(self.get_type_id(obj))
             pointing_to = (obj + offset).address[0]
             if self.is_in_nursery(pointing_to):
-                if self.is_forwarded(pointing_to):
+                if self.surviving(pointing_to):
                     (obj + offset).address[0] = self.get_forwarding_address(
                         pointing_to)
                 else:

Modified: pypy/branch/gc-improvements/pypy/rpython/memory/gc/hybrid.py
==============================================================================
--- pypy/branch/gc-improvements/pypy/rpython/memory/gc/hybrid.py	(original)
+++ pypy/branch/gc-improvements/pypy/rpython/memory/gc/hybrid.py	Wed Apr 23 18:00:18 2008
@@ -1,12 +1,14 @@
 import sys
 from pypy.rpython.memory.gc.semispace import SemiSpaceGC
-from pypy.rpython.memory.gc.generation import GenerationGC
+from pypy.rpython.memory.gc.generation import GenerationGC, GCFLAG_FORWARDED
+from pypy.rpython.memory.gc.generation import GCFLAG_NO_YOUNG_PTRS
 from pypy.rpython.lltypesystem import llmemory, llarena
 from pypy.rpython.lltypesystem.llmemory import raw_malloc_usage
 from pypy.rlib.debug import ll_assert
 from pypy.rlib.rarithmetic import ovfcheck
 
 GCFLAG_UNVISITED = GenerationGC.first_unused_gcflag << 0
+GCFLAG_AGING = GenerationGC.first_unused_gcflag << 1
 
 
 class HybridGC(GenerationGC):
@@ -14,7 +16,7 @@
     except that objects above a certain size are handled separately:
     they are allocated via raw_malloc/raw_free in a mark-n-sweep fashion.
     """
-    first_unused_gcflag = GenerationGC.first_unused_gcflag << 1
+    first_unused_gcflag = GenerationGC.first_unused_gcflag << 2
 
     # the following values override the default arguments of __init__ when
     # translating to a real backend.
@@ -150,8 +152,7 @@
         self.large_objects_collect_trigger -= raw_malloc_usage(totalsize)
         if self.large_objects_collect_trigger < 0:
             self.semispace_collect()
-        # XXX maybe we should use llarena.arena_malloc above a certain size?
-        result = llmemory.raw_malloc(totalsize)
+        result = self.allocate_external_object(totalsize)
         if not result:
             raise MemoryError()
         # The parent classes guarantee zero-filled allocations, so we
@@ -161,6 +162,11 @@
         self.large_objects_list.append(result + size_gc_header)
         return result
 
+    def allocate_external_object(self, totalsize):
+        # XXX maybe we should use arena_malloc() above a certain size?
+        # If so, we'd also use arena_reset() in malloc_varsize_marknsweep().
+        return llmemory.raw_malloc(totalsize)
+
     # ___________________________________________________________________
     # the following methods are hook into SemiSpaceGC.semispace_collect()
 
@@ -171,6 +177,14 @@
         ll_assert(not self.pending_external_object_list.non_empty(),
                   "pending_external_object_list should be empty at start")
 
+    def surviving(self, obj):
+        # To use during a collection.  The objects that survive are the
+        # ones with GCFLAG_FORWARDED set and GCFLAG_UNVISITED not set.
+        # This is equivalent to self.is_forwarded() for all objects except
+        # the ones obtained by raw_malloc.
+        flags = self.header(obj).tid & (GCFLAG_FORWARDED|GCFLAG_UNVISITED)
+        return flags == GCFLAG_FORWARDED
+
     def visit_external_object(self, obj):
         hdr = self.header(obj)
         if hdr.tid & GCFLAG_UNVISITED:
@@ -178,6 +192,45 @@
             hdr.tid -= GCFLAG_UNVISITED
             self.pending_external_object_list.append(obj)
 
+    def make_a_copy(self, obj, objsize):
+        # During a full collect, all copied objects might implicitly come
+        # from the nursery.  If they do, we must add the GCFLAG_NO_YOUNG_PTRS.
+        # If they don't, we count how many times they are copied and when
+        # some threshold is reached we make the copy a non-movable "external"
+        # object.  For now we use a single flag GCFLAG_AGING, so threshold==2.
+        tid = self.header(obj).tid
+        if not (tid & GCFLAG_NO_YOUNG_PTRS):
+            tid |= GCFLAG_NO_YOUNG_PTRS    # object comes from the nursery
+        elif not (tid & GCFLAG_AGING):
+            tid |= GCFLAG_AGING
+        else:
+            newobj = self.make_a_nonmoving_copy(obj, objsize)
+            if newobj:
+                return newobj
+            tid &= ~GCFLAG_AGING
+        # skip GenerationGC.make_a_copy() as we already did the right
+        # thing about GCFLAG_NO_YOUNG_PTRS
+        newobj = SemiSpaceGC.make_a_copy(self, obj, objsize)
+        self.header(newobj).tid = tid
+        return newobj
+
+    def make_a_nonmoving_copy(self, obj, objsize):
+        # NB. the object can have a finalizer or be a weakref, but
+        # it's not an issue.
+        totalsize = self.size_gc_header() + objsize
+        newaddr = self.allocate_external_object(totalsize)
+        if not newaddr:
+            return llmemory.NULL   # can't raise MemoryError during a collect()
+
+        llmemory.raw_memcopy(obj - self.size_gc_header(), newaddr, totalsize)
+        newobj = newaddr + self.size_gc_header()
+        hdr = self.header(newobj)
+        hdr.tid |= self.GCFLAGS_FOR_NEW_EXTERNAL_OBJECTS
+        # GCFLAG_UNVISITED is not set
+        self.large_objects_list.append(newobj)
+        self.pending_external_object_list.append(newobj)
+        return newobj
+
     def scan_copied(self, scan):
         # Alternate between scanning the regular objects we just moved
         # and scanning the raw_malloc'ed object we just visited.

Modified: pypy/branch/gc-improvements/pypy/rpython/memory/gc/semispace.py
==============================================================================
--- pypy/branch/gc-improvements/pypy/rpython/memory/gc/semispace.py	(original)
+++ pypy/branch/gc-improvements/pypy/rpython/memory/gc/semispace.py	Wed Apr 23 18:00:18 2008
@@ -342,6 +342,14 @@
         if pointer.address[0] != NULL:
             pointer.address[0] = self.copy(pointer.address[0])
 
+    def surviving(self, obj):
+        # To use during a collection.  Check if the object is currently
+        # marked as surviving the collection.  This is equivalent to
+        # self.is_forwarded() for all objects except the nonmoving objects
+        # created by the HybridGC subclass.  In all cases, if an object
+        # survives, self.get_forwarding_address() returns its new address.
+        return self.is_forwarded(obj)
+
     def is_forwarded(self, obj):
         return self.header(obj).tid & GCFLAG_FORWARDED != 0
         # note: all prebuilt objects also have this flag set
@@ -427,7 +435,7 @@
             x = self.objects_with_finalizers.popleft()
             ll_assert(self._finalization_state(x) != 1, 
                       "bad finalization state 1")
-            if self.is_forwarded(x):
+            if self.surviving(x):
                 new_with_finalizer.append(self.get_forwarding_address(x))
                 continue
             marked.append(x)
@@ -470,7 +478,7 @@
     _append_if_nonnull = staticmethod(_append_if_nonnull)
 
     def _finalization_state(self, obj):
-        if self.is_forwarded(obj):
+        if self.surviving(obj):
             newobj = self.get_forwarding_address(obj)
             hdr = self.header(newobj)
             if hdr.tid & GCFLAG_FINALIZATION_ORDERING:
@@ -520,14 +528,14 @@
         new_with_weakref = self.AddressStack()
         while self.objects_with_weakrefs.non_empty():
             obj = self.objects_with_weakrefs.pop()
-            if not self.is_forwarded(obj):
+            if not self.surviving(obj):
                 continue # weakref itself dies
             obj = self.get_forwarding_address(obj)
             offset = self.weakpointer_offset(self.get_type_id(obj))
             pointing_to = (obj + offset).address[0]
             # XXX I think that pointing_to cannot be NULL here
             if pointing_to:
-                if self.is_forwarded(pointing_to):
+                if self.surviving(pointing_to):
                     (obj + offset).address[0] = self.get_forwarding_address(
                         pointing_to)
                     new_with_weakref.append(obj)

Modified: pypy/branch/gc-improvements/pypy/rpython/memory/test/test_gc.py
==============================================================================
--- pypy/branch/gc-improvements/pypy/rpython/memory/test/test_gc.py	(original)
+++ pypy/branch/gc-improvements/pypy/rpython/memory/test/test_gc.py	Wed Apr 23 18:00:18 2008
@@ -428,3 +428,53 @@
             return len("".join(lst))
         res = self.interpret(concat, [100])
         assert res == concat(100)
+
+    def test_longliving_weakref(self):
+        # test for the case where a weakref points to a very old object
+        # that was made non-movable after several collections
+        import gc, weakref
+        class A:
+            pass
+        def step1(x):
+            a = A()
+            a.x = 42
+            ref = weakref.ref(a)
+            i = 0
+            while i < x:
+                gc.collect()
+                i += 1
+            assert ref() is a
+            assert ref().x == 42
+            return ref
+        def step2(ref):
+            gc.collect()       # 'a' is freed here
+            assert ref() is None
+        def f(x):
+            ref = step1(x)
+            step2(ref)
+        self.interpret(f, [10])
+
+    def test_longliving_object_with_finalizer(self):
+        class B(object):
+            pass
+        b = B()
+        b.nextid = 0
+        b.num_deleted = 0
+        class A(object):
+            def __init__(self):
+                self.id = b.nextid
+                b.nextid += 1
+            def __del__(self):
+                b.num_deleted += 1
+        def f(x):
+            a = A()
+            i = 0
+            while i < x:
+                i += 1
+                a = A()
+                llop.gc__collect(lltype.Void)
+            llop.gc__collect(lltype.Void)
+            llop.gc__collect(lltype.Void)
+            return b.num_deleted
+        res = self.interpret(f, [15])
+        assert res == 16



More information about the Pypy-commit mailing list