[pypy-svn] r77185 - in pypy/trunk/pypy: rlib rpython rpython/lltypesystem rpython/memory rpython/memory/gc rpython/memory/gc/test rpython/memory/gctransform rpython/memory/test

arigo at codespeak.net arigo at codespeak.net
Mon Sep 20 09:48:08 CEST 2010


Author: arigo
Date: Mon Sep 20 09:48:06 2010
New Revision: 77185

Modified:
   pypy/trunk/pypy/rlib/rarithmetic.py
   pypy/trunk/pypy/rpython/llinterp.py
   pypy/trunk/pypy/rpython/lltypesystem/ll2ctypes.py
   pypy/trunk/pypy/rpython/lltypesystem/llarena.py
   pypy/trunk/pypy/rpython/lltypesystem/llheap.py
   pypy/trunk/pypy/rpython/lltypesystem/llmemory.py
   pypy/trunk/pypy/rpython/lltypesystem/lloperation.py
   pypy/trunk/pypy/rpython/memory/gc/base.py
   pypy/trunk/pypy/rpython/memory/gc/minimark.py
   pypy/trunk/pypy/rpython/memory/gc/test/test_direct.py
   pypy/trunk/pypy/rpython/memory/gc/test/test_minimark.py
   pypy/trunk/pypy/rpython/memory/gctransform/framework.py
   pypy/trunk/pypy/rpython/memory/gcwrapper.py
   pypy/trunk/pypy/rpython/memory/test/test_gc.py
   pypy/trunk/pypy/rpython/memory/test/test_transformed_gc.py
Log:
Merge branch/gc-cardmark, adding card marking to the minimark GC.

(the branch was only local to my working copy.)


Modified: pypy/trunk/pypy/rlib/rarithmetic.py
==============================================================================
--- pypy/trunk/pypy/rlib/rarithmetic.py	(original)
+++ pypy/trunk/pypy/rlib/rarithmetic.py	Mon Sep 20 09:48:06 2010
@@ -50,6 +50,11 @@
 LONG_MASK = _Ltest*2-1
 LONG_TEST = _Ltest
 
+LONG_BIT_SHIFT = 0
+while (1 << LONG_BIT_SHIFT) != LONG_BIT:
+    LONG_BIT_SHIFT += 1
+    assert LONG_BIT_SHIFT < 99, "LONG_BIT_SHIFT value not found?"
+
 INFINITY = 1e200 * 1e200
 NAN = INFINITY / INFINITY
 

Modified: pypy/trunk/pypy/rpython/llinterp.py
==============================================================================
--- pypy/trunk/pypy/rpython/llinterp.py	(original)
+++ pypy/trunk/pypy/rpython/llinterp.py	Mon Sep 20 09:48:06 2010
@@ -650,7 +650,7 @@
         offsets, fieldvalue = fieldnamesval[:-1], fieldnamesval[-1]
         inneraddr, FIELD = self.getinneraddr(obj, *offsets)
         if FIELD is not lltype.Void:
-            self.heap.setinterior(obj, inneraddr, FIELD, fieldvalue)
+            self.heap.setinterior(obj, inneraddr, FIELD, fieldvalue, offsets)
 
     def op_bare_setinteriorfield(self, obj, *fieldnamesval):
         offsets, fieldvalue = fieldnamesval[:-1], fieldnamesval[-1]

Modified: pypy/trunk/pypy/rpython/lltypesystem/ll2ctypes.py
==============================================================================
--- pypy/trunk/pypy/rpython/lltypesystem/ll2ctypes.py	(original)
+++ pypy/trunk/pypy/rpython/lltypesystem/ll2ctypes.py	Mon Sep 20 09:48:06 2010
@@ -353,6 +353,7 @@
     """Put ctypes_storage on the instance, changing its __class__ so that it
     sees the methods of the given mixin class."""
     assert not isinstance(instance, _parentable_mixin)  # not yet
+    ctypes_storage._preserved_hash = hash(instance)
     subcls = get_common_subclass(mixin_cls, instance.__class__)
     instance.__class__ = subcls
     instance._storage = ctypes_storage
@@ -402,6 +403,8 @@
 
     def __hash__(self):
         if self._storage is not None:
+            if hasattr(self._storage, '_preserved_hash'):
+                return self._storage._preserved_hash
             return ctypes.addressof(self._storage)
         else:
             return object.__hash__(self)

Modified: pypy/trunk/pypy/rpython/lltypesystem/llarena.py
==============================================================================
--- pypy/trunk/pypy/rpython/lltypesystem/llarena.py	(original)
+++ pypy/trunk/pypy/rpython/lltypesystem/llarena.py	Mon Sep 20 09:48:06 2010
@@ -69,7 +69,7 @@
             raise ArenaError("Address offset is outside the arena")
         return fakearenaaddress(self, offset)
 
-    def allocate_object(self, offset, size):
+    def allocate_object(self, offset, size, letter='x'):
         self.check()
         bytes = llmemory.raw_malloc_usage(size)
         if offset + bytes > self.nbytes:
@@ -84,7 +84,7 @@
                 raise ArenaError("new object overlaps a previous object")
         assert offset not in self.objectptrs
         addr2 = size._raw_malloc([], zero=zero)
-        pattern = 'X' + 'x'*(bytes-1)
+        pattern = letter.upper() + letter*(bytes-1)
         self.usagemap[offset:offset+bytes] = array.array('c', pattern)
         self.setobject(addr2, offset, bytes)
         # common case: 'size' starts with a GCHeaderOffset.  In this case
@@ -324,10 +324,13 @@
     this is used to know what type of lltype object to allocate."""
     from pypy.rpython.memory.lltypelayout import memory_alignment
     addr = getfakearenaaddress(addr)
-    if check_alignment and (addr.offset & (memory_alignment-1)) != 0:
+    letter = 'x'
+    if llmemory.raw_malloc_usage(size) == 1:
+        letter = 'b'    # for Byte-aligned allocations
+    elif check_alignment and (addr.offset & (memory_alignment-1)) != 0:
         raise ArenaError("object at offset %d would not be correctly aligned"
                          % (addr.offset,))
-    addr.arena.allocate_object(addr.offset, size)
+    addr.arena.allocate_object(addr.offset, size, letter)
 
 def arena_shrink_obj(addr, newsize):
     """ Mark object as shorter than it was
@@ -471,6 +474,7 @@
                   sandboxsafe=True)
 
 def llimpl_arena_free(arena_addr):
+    # NB. minimark.py assumes that arena_free() is actually just a raw_free().
     llmemory.raw_free(arena_addr)
 register_external(arena_free, [llmemory.Address], None, 'll_arena.arena_free',
                   llimpl=llimpl_arena_free,

Modified: pypy/trunk/pypy/rpython/lltypesystem/llheap.py
==============================================================================
--- pypy/trunk/pypy/rpython/lltypesystem/llheap.py	(original)
+++ pypy/trunk/pypy/rpython/lltypesystem/llheap.py	Mon Sep 20 09:48:06 2010
@@ -8,7 +8,8 @@
 from pypy.rlib.rgc import collect
 from pypy.rlib.rgc import can_move
 
-def setinterior(toplevelcontainer, inneraddr, INNERTYPE, newvalue):
+def setinterior(toplevelcontainer, inneraddr, INNERTYPE, newvalue,
+                offsets=None):
     assert typeOf(newvalue) == INNERTYPE
     # xxx access the address object's ref() directly for performance
     inneraddr.ref()[0] = newvalue

Modified: pypy/trunk/pypy/rpython/lltypesystem/llmemory.py
==============================================================================
--- pypy/trunk/pypy/rpython/lltypesystem/llmemory.py	(original)
+++ pypy/trunk/pypy/rpython/lltypesystem/llmemory.py	Mon Sep 20 09:48:06 2010
@@ -409,6 +409,9 @@
         if self.ptr is None:
             s = 'NULL'
         else:
+            #try:
+            #    s = hex(self.ptr._cast_to_int())
+            #except:
             s = str(self.ptr)
         return '<fakeaddr %s>' % (s,)
 

Modified: pypy/trunk/pypy/rpython/lltypesystem/lloperation.py
==============================================================================
--- pypy/trunk/pypy/rpython/lltypesystem/lloperation.py	(original)
+++ pypy/trunk/pypy/rpython/lltypesystem/lloperation.py	Mon Sep 20 09:48:06 2010
@@ -436,6 +436,7 @@
     'do_malloc_fixedsize_clear':LLOp(canraise=(MemoryError,),canunwindgc=True),
     'do_malloc_varsize_clear':  LLOp(canraise=(MemoryError,),canunwindgc=True),
     'get_write_barrier_failing_case': LLOp(sideeffects=False),
+    'get_write_barrier_from_array_failing_case': LLOp(sideeffects=False),
     'gc_get_type_info_group': LLOp(sideeffects=False),
 
     # __________ GC operations __________

Modified: pypy/trunk/pypy/rpython/memory/gc/base.py
==============================================================================
--- pypy/trunk/pypy/rpython/memory/gc/base.py	(original)
+++ pypy/trunk/pypy/rpython/memory/gc/base.py	Mon Sep 20 09:48:06 2010
@@ -200,6 +200,39 @@
                 length -= 1
     trace._annspecialcase_ = 'specialize:arg(2)'
 
+    def trace_partial(self, obj, start, stop, callback, arg):
+        """Like trace(), but only walk the array part, for indices in
+        range(start, stop).  Must only be called if has_gcptr_in_varsize().
+        """
+        length = stop - start
+        typeid = self.get_type_id(obj)
+        if self.is_gcarrayofgcptr(typeid):
+            # a performance shortcut for GcArray(gcptr)
+            item = obj + llmemory.gcarrayofptr_itemsoffset
+            item += llmemory.gcarrayofptr_singleitemoffset * start
+            while length > 0:
+                if self.points_to_valid_gc_object(item):
+                    callback(item, arg)
+                item += llmemory.gcarrayofptr_singleitemoffset
+                length -= 1
+            return
+        ll_assert(self.has_gcptr_in_varsize(typeid),
+                  "trace_partial() on object without has_gcptr_in_varsize()")
+        item = obj + self.varsize_offset_to_variable_part(typeid)
+        offsets = self.varsize_offsets_to_gcpointers_in_var_part(typeid)
+        itemlength = self.varsize_item_sizes(typeid)
+        item += itemlength * start
+        while length > 0:
+            j = 0
+            while j < len(offsets):
+                itemobj = item + offsets[j]
+                if self.points_to_valid_gc_object(itemobj):
+                    callback(itemobj, arg)
+                j += 1
+            item += itemlength
+            length -= 1
+    trace_partial._annspecialcase_ = 'specialize:arg(4)'
+
     def points_to_valid_gc_object(self, addr):
         return self.is_valid_gc_object(addr.address[0])
 

Modified: pypy/trunk/pypy/rpython/memory/gc/minimark.py
==============================================================================
--- pypy/trunk/pypy/rpython/memory/gc/minimark.py	(original)
+++ pypy/trunk/pypy/rpython/memory/gc/minimark.py	Mon Sep 20 09:48:06 2010
@@ -5,8 +5,10 @@
 from pypy.rpython.memory.gc import minimarkpage, base, generation
 from pypy.rpython.memory.support import DEFAULT_CHUNK_SIZE
 from pypy.rlib.rarithmetic import ovfcheck, LONG_BIT, intmask, r_uint
+from pypy.rlib.rarithmetic import LONG_BIT_SHIFT
 from pypy.rlib.debug import ll_assert, debug_print, debug_start, debug_stop
 from pypy.rlib.objectmodel import we_are_translated
+from pypy.tool.sourcetools import func_with_new_name
 
 WORD = LONG_BIT // 8
 NULL = llmemory.NULL
@@ -40,6 +42,12 @@
 # collection.  See pypy/doc/discussion/finalizer-order.txt
 GCFLAG_FINALIZATION_ORDERING = first_gcflag << 4
 
+# The following flag is set on externally raw_malloc'ed arrays of pointers.
+# They are allocated with some extra space in front of them for a bitfield,
+# one bit per 'card_page_indices' indices.
+GCFLAG_HAS_CARDS    = first_gcflag << 5
+GCFLAG_CARDS_SET    = first_gcflag << 6     # <- at least one card bit is set
+
 
 FORWARDSTUB = lltype.GcStruct('forwarding_stub',
                               ('forw', llmemory.Address))
@@ -110,6 +118,14 @@
         # total size is now more than 'major_collection_threshold' times,
         # we trigger the next major collection.
         "major_collection_threshold": 1.82,
+
+        # The number of array indices that are mapped to a single bit in
+        # write_barrier_from_array().  Must be a power of two.  The default
+        # value of 128 means that card pages are 512 bytes (1024 on 64-bits)
+        # in regular arrays of pointers; more in arrays whose items are
+        # larger.  A value of 0 disables card marking.
+        "card_page_indices": 128,
+        "card_page_indices_min": 800,    # minimum number of indices for cards
         }
 
     def __init__(self, config, chunk_size=DEFAULT_CHUNK_SIZE,
@@ -119,6 +135,8 @@
                  arena_size=64*WORD,
                  small_request_threshold=5*WORD,
                  major_collection_threshold=2.5,
+                 card_page_indices=0,
+                 card_page_indices_min=None,
                  ArenaCollectionClass=None):
         MovingGCBase.__init__(self, config, chunk_size)
         assert small_request_threshold % WORD == 0
@@ -130,6 +148,13 @@
         self.max_heap_size = 0.0
         self.max_heap_size_already_raised = False
         #
+        self.card_page_indices = card_page_indices
+        if self.card_page_indices > 0:
+            self.card_page_indices_min = card_page_indices_min
+            self.card_page_shift = 0
+            while (1 << self.card_page_shift) < self.card_page_indices:
+                self.card_page_shift += 1
+        #
         self.nursery      = NULL
         self.nursery_free = NULL
         self.nursery_top  = NULL
@@ -145,6 +170,13 @@
         # the write barrier.
         self.old_objects_pointing_to_young = self.AddressStack()
         #
+        # Similar to 'old_objects_pointing_to_young', but lists objects
+        # that have the GCFLAG_CARDS_SET bit.  For large arrays.  Note
+        # that it is possible for an object to be listed both in here
+        # and in 'old_objects_pointing_to_young', in which case we
+        # should just clear the cards and trace it fully, as usual.
+        self.old_objects_with_cards_set = self.AddressStack()
+        #
         # A list of all prebuilt GC objects that contain pointers to the heap
         self.prebuilt_root_objects = self.AddressStack()
         #
@@ -292,7 +324,7 @@
         # If totalsize is greater than small_request_threshold, ask for
         # a rawmalloc.
         if llmemory.raw_malloc_usage(totalsize) > self.small_request_threshold:
-            result = self._external_malloc(typeid, totalsize)
+            result = self._external_malloc_cardmark(typeid, totalsize, length)
             #
         else:
             # Round the size up to the next multiple of WORD.  Note that
@@ -364,51 +396,85 @@
             self.minor_collection()
             self.major_collection(reserving_size)
 
-    def _reserve_external_memory(self, totalsize):
-        """Do a raw_malloc() to get some external memory.
-        Note that the returned memory is not cleared."""
-        #
-        result = llmemory.raw_malloc(totalsize)
-        if not result:
-            raise MemoryError("cannot allocate large object")
-        #
-        size_gc_header = self.gcheaderbuilder.size_gc_header
-        self.rawmalloced_total_size += llmemory.raw_malloc_usage(totalsize)
-        self.rawmalloced_objects.append(result + size_gc_header)
-        return result
-
     def _external_malloc(self, typeid, totalsize):
         """Allocate a large object using raw_malloc()."""
+        return self._external_malloc_cardmark(typeid, totalsize, 0)
+
+
+    def _external_malloc_cardmark(self, typeid, totalsize, length):
+        """Allocate a large object using raw_malloc(), possibly as an
+        object with card marking enabled, if its length is large enough.
+        'length' can be specified as 0 if the object is not varsized."""
         #
-        # If somebody calls _external_malloc() a lot, we must eventually
+        # If somebody calls this function a lot, we must eventually
         # force a full collection.
         self._full_collect_if_needed(totalsize)
         #
-        result = self._reserve_external_memory(totalsize)
-        llmemory.raw_memclear(result, totalsize)
-        self.init_gc_object(result, typeid, GCFLAG_NO_YOUNG_PTRS)
+        # Check if we need to introduce the card marker bits area.
+        if (self.card_page_indices <= 0     # <- this check is constant-folded
+            or length < self.card_page_indices_min   # <- must be large enough
+            or not self.has_gcptr_in_varsize(typeid)):  # <- must contain ptrs
+            #
+            # In these cases, we don't want a card marker bits area.
+            cardheadersize = 0
+            extra_flags = 0
+            #
+        else:
+            # Reserve N extra words containing card bits before the object.
+            extra_words = self.card_marking_words_for_length(length)
+            cardheadersize = WORD * extra_words
+            extra_flags = GCFLAG_HAS_CARDS
+        #
+        allocsize = cardheadersize + llmemory.raw_malloc_usage(totalsize)
+        #
+        # Allocate the object using arena_malloc(), which we assume here
+        # is just the same as raw_malloc(), but allows the extra flexibility
+        # of saying that we have extra words in the header.
+        arena = llarena.arena_malloc(allocsize, False)
+        if not arena:
+            raise MemoryError("cannot allocate large object")
+        #
+        # Clear it using method 2 of llarena.arena_reset(), which is the
+        # same as just a raw_memclear().
+        llarena.arena_reset(arena, allocsize, 2)
+        #
+        # Reserve the card mark as a list of single bytes
+        # (the loop is empty in C).
+        i = 0
+        while i < cardheadersize:
+            llarena.arena_reserve(arena + i, llmemory.sizeof(lltype.Char))
+            i += 1
+        #
+        # Initialize the object.
+        result = arena + cardheadersize
+        llarena.arena_reserve(result, totalsize)
+        self.init_gc_object(result, typeid, GCFLAG_NO_YOUNG_PTRS | extra_flags)
+        #
+        # Record the newly allocated object and its size.
+        size_gc_header = self.gcheaderbuilder.size_gc_header
+        self.rawmalloced_total_size += llmemory.raw_malloc_usage(totalsize)
+        self.rawmalloced_objects.append(result + size_gc_header)
         return result
-    _external_malloc._dont_inline_ = True
+    _external_malloc_cardmark._dont_inline_ = True
 
 
     def _malloc_nonmovable(self, typeid, totalsize):
         """Allocate an object non-movable."""
         #
-        # If somebody calls _malloc_nonmovable() a lot, we must eventually
-        # force a full collection.
-        self._full_collect_if_needed(totalsize)
-        #
         rawtotalsize = llmemory.raw_malloc_usage(totalsize)
-        if rawtotalsize <= self.small_request_threshold:
-            #
-            # Ask the ArenaCollection to do the malloc.
-            totalsize = llarena.round_up_for_allocation(totalsize)
-            result = self.ac.malloc(totalsize)
+        if rawtotalsize > self.small_request_threshold:
             #
-        else:
             # The size asked for is too large for the ArenaCollection.
-            result = self._reserve_external_memory(totalsize)
+            return self._external_malloc(typeid, totalsize)
+        #
+        totalsize = llarena.round_up_for_allocation(totalsize)
+        #
+        # If somebody calls _malloc_nonmovable() a lot, we must eventually
+        # force a full collection.
+        self._full_collect_if_needed(totalsize)
         #
+        # Ask the ArenaCollection to do the malloc.
+        result = self.ac.malloc(totalsize)
         llmemory.raw_memclear(result, totalsize)
         self.init_gc_object(result, typeid, GCFLAG_NO_YOUNG_PTRS)
         return result
@@ -446,6 +512,7 @@
         # Only objects in the nursery can be "resized".  Resizing them
         # means recording that they have a smaller size, so that when
         # moved out of the nursery, they will consume less memory.
+        # In particular, an array with GCFLAG_HAS_CARDS is never resized.
         if not self.is_in_nursery(obj):
             return False
         #
@@ -492,7 +559,7 @@
             gcref = self.malloc_varsize_nonmovable(typeid, length)
         else:
             gcref = self.malloc_fixedsize_nonmovable(typeid)
-        return gcref
+        return llmemory.cast_ptr_to_adr(gcref)
 
 
     # ----------
@@ -540,6 +607,24 @@
         """
         return self.ac.total_memory_used + self.rawmalloced_total_size
 
+    def card_marking_words_for_length(self, length):
+        # --- Unoptimized version:
+        #num_bits = ((length-1) >> self.card_page_shift) + 1
+        #return (num_bits + (LONG_BIT - 1)) >> LONG_BIT_SHIFT
+        # --- Optimized version:
+        return intmask(
+            ((r_uint(length) + ((LONG_BIT << self.card_page_shift) - 1)) >>
+             (self.card_page_shift + LONG_BIT_SHIFT)))
+
+    def card_marking_bytes_for_length(self, length):
+        # --- Unoptimized version:
+        #num_bits = ((length-1) >> self.card_page_shift) + 1
+        #return (num_bits + 7) >> 3
+        # --- Optimized version:
+        return intmask(
+            ((r_uint(length) + ((8 << self.card_page_shift) - 1)) >>
+             (self.card_page_shift + 3)))
+
     def debug_check_object(self, obj):
         # after a minor or major collection, no object should be in the nursery
         ll_assert(not self.is_in_nursery(obj),
@@ -557,6 +642,30 @@
         # the GCFLAG_FINALIZATION_ORDERING should not be set between coll.
         ll_assert(self.header(obj).tid & GCFLAG_FINALIZATION_ORDERING == 0,
                   "unexpected GCFLAG_FINALIZATION_ORDERING")
+        # the GCFLAG_CARDS_SET should not be set between collections
+        ll_assert(self.header(obj).tid & GCFLAG_CARDS_SET == 0,
+                  "unexpected GCFLAG_CARDS_SET")
+        # if the GCFLAG_HAS_CARDS is set, check that all bits are zero now
+        if self.header(obj).tid & GCFLAG_HAS_CARDS:
+            ll_assert(self.card_page_indices > 0,
+                      "GCFLAG_HAS_CARDS but not using card marking")
+            typeid = self.get_type_id(obj)
+            ll_assert(self.has_gcptr_in_varsize(typeid),
+                      "GCFLAG_HAS_CARDS but not has_gcptr_in_varsize")
+            ll_assert(self.header(obj).tid & GCFLAG_NO_HEAP_PTRS == 0,
+                      "GCFLAG_HAS_CARDS && GCFLAG_NO_HEAP_PTRS")
+            offset_to_length = self.varsize_offset_to_length(typeid)
+            length = (obj + offset_to_length).signed[0]
+            extra_words = self.card_marking_words_for_length(length)
+            #
+            size_gc_header = self.gcheaderbuilder.size_gc_header
+            p = llarena.getfakearenaaddress(obj - size_gc_header)
+            i = extra_words * WORD
+            while i > 0:
+                p -= 1
+                ll_assert(p.char[0] == '\x00',
+                          "the card marker bits are not cleared")
+                i -= 1
 
     # ----------
     # Write barrier
@@ -570,6 +679,14 @@
         if self.header(addr_struct).tid & GCFLAG_NO_YOUNG_PTRS:
             self.remember_young_pointer(addr_struct, newvalue)
 
+    def write_barrier_from_array(self, newvalue, addr_array, index):
+        if self.header(addr_array).tid & GCFLAG_NO_YOUNG_PTRS:
+            if self.card_page_indices > 0:     # <- constant-folded
+                self.remember_young_pointer_from_array(addr_array, index,
+                                                       newvalue)
+            else:
+                self.remember_young_pointer(addr_array, newvalue)
+
     def _init_writebarrier_logic(self):
         # The purpose of attaching remember_young_pointer to the instance
         # instead of keeping it as a regular method is to help the JIT call it.
@@ -614,6 +731,63 @@
 
         remember_young_pointer._dont_inline_ = True
         self.remember_young_pointer = remember_young_pointer
+        #
+        if self.card_page_indices > 0:
+            self._init_writebarrier_with_card_marker()
+
+
+    def _init_writebarrier_with_card_marker(self):
+        def remember_young_pointer_from_array(addr_array, index, addr):
+            # 'addr_array' is the address of the object in which we write,
+            # which must have an array part;  'index' is the index of the
+            # item that is (or contains) the pointer that we write;
+            # 'addr' is the address that we write in the array.
+            objhdr = self.header(addr_array)
+            if objhdr.tid & GCFLAG_HAS_CARDS == 0:
+                #
+                # no cards, use default logic.  The 'nocard_logic()' is just
+                # 'remember_young_pointer()', but forced to be inlined here.
+                nocard_logic(addr_array, addr)
+                return
+            #
+            # 'addr_array' is a raw_malloc'ed array with card markers
+            # in front.  Compute the index of the bit to set:
+            bitindex = index >> self.card_page_shift
+            byteindex = bitindex >> 3
+            bitmask = 1 << (bitindex & 7)
+            #
+            # If the bit is already set, leave now.
+            size_gc_header = self.gcheaderbuilder.size_gc_header
+            addr_byte = addr_array - size_gc_header
+            addr_byte = llarena.getfakearenaaddress(addr_byte) + (~byteindex)
+            byte = ord(addr_byte.char[0])
+            if byte & bitmask:
+                return
+            #
+            # As in remember_young_pointer, check if 'addr' is a valid
+            # pointer, in case it can be a tagged integer
+            if (self.config.taggedpointers and
+                not self.is_valid_gc_object(addr)):
+                return
+            #
+            # If the 'addr' is in the nursery, then we need to set the flag.
+            # Note that the following check is done after the bit check
+            # above, because it is expected that the "bit already set"
+            # situation is the most common.
+            if self.is_in_nursery(addr):
+                addr_byte.char[0] = chr(byte | bitmask)
+                #
+                if objhdr.tid & GCFLAG_CARDS_SET == 0:
+                    self.old_objects_with_cards_set.append(addr_array)
+                    objhdr.tid |= GCFLAG_CARDS_SET
+
+        nocard_logic = func_with_new_name(self.remember_young_pointer,
+                                          'remember_young_pointer_nocard')
+        del nocard_logic._dont_inline_
+        nocard_logic._always_inline_ = True
+        remember_young_pointer_from_array._dont_inline_ = True
+        self.remember_young_pointer_from_array = (
+            remember_young_pointer_from_array)
 
 
     def assume_young_pointers(self, addr_struct):
@@ -641,7 +815,8 @@
             return True
         # ^^^ a fast path of write-barrier
         #
-        if source_hdr.tid & GCFLAG_NO_YOUNG_PTRS == 0:
+        if (source_hdr.tid & GCFLAG_NO_YOUNG_PTRS == 0 or
+            source_hdr.tid & GCFLAG_CARDS_SET != 0):
             # there might be an object in source that is in nursery
             self.old_objects_pointing_to_young.append(dest_addr)
             dest_hdr.tid &= ~GCFLAG_NO_YOUNG_PTRS
@@ -670,6 +845,11 @@
         # 'old_objects_pointing_to_young'.
         self.collect_roots_in_nursery()
         #
+        # If we are using card marking, do a partial trace of the arrays
+        # that are flagged with GCFLAG_CARDS_SET.
+        if self.card_page_indices > 0:
+            self.collect_cardrefs_to_nursery()
+        #
         # Now trace objects from 'old_objects_pointing_to_young'.
         # All nursery objects they reference are copied out of the
         # nursery, and again added to 'old_objects_pointing_to_young'.
@@ -707,6 +887,61 @@
             MiniMarkGC._trace_drag_out1,  # static in prebuilt non-gc
             None)                         # static in prebuilt gc
 
+    def collect_cardrefs_to_nursery(self):
+        size_gc_header = self.gcheaderbuilder.size_gc_header
+        oldlist = self.old_objects_with_cards_set
+        while oldlist.non_empty():
+            obj = oldlist.pop()
+            #
+            # Remove the GCFLAG_CARDS_SET flag.
+            ll_assert(self.header(obj).tid & GCFLAG_CARDS_SET != 0,
+                "!GCFLAG_CARDS_SET but object in 'old_objects_with_cards_set'")
+            self.header(obj).tid &= ~GCFLAG_CARDS_SET
+            #
+            # Get the number of card marker bytes in the header.
+            typeid = self.get_type_id(obj)
+            offset_to_length = self.varsize_offset_to_length(typeid)
+            length = (obj + offset_to_length).signed[0]
+            bytes = self.card_marking_bytes_for_length(length)
+            p = llarena.getfakearenaaddress(obj - size_gc_header)
+            #
+            # If the object doesn't have GCFLAG_NO_YOUNG_PTRS, then it
+            # means that it is in 'old_objects_pointing_to_young' and
+            # will be fully traced by collect_oldrefs_to_nursery() just
+            # afterwards.
+            if self.header(obj).tid & GCFLAG_NO_YOUNG_PTRS == 0:
+                #
+                # In that case, we just have to reset all card bits.
+                while bytes > 0:
+                    p -= 1
+                    p.char[0] = '\x00'
+                    bytes -= 1
+                #
+            else:
+                # Walk the bytes encoding the card marker bits, and for
+                # each bit set, call trace_and_drag_out_of_nursery_partial().
+                interval_start = 0
+                while bytes > 0:
+                    p -= 1
+                    cardbyte = ord(p.char[0])
+                    p.char[0] = '\x00'           # reset the bits
+                    bytes -= 1
+                    next_byte_start = interval_start + 8*self.card_page_indices
+                    #
+                    while cardbyte != 0:
+                        interval_stop = interval_start + self.card_page_indices
+                        #
+                        if cardbyte & 1:
+                            if interval_stop > length:
+                                interval_stop = length
+                            self.trace_and_drag_out_of_nursery_partial(
+                                obj, interval_start, interval_stop)
+                        #
+                        interval_start = interval_stop
+                        cardbyte >>= 1
+                    interval_start = next_byte_start
+
+
     def collect_oldrefs_to_nursery(self):
         # Follow the old_objects_pointing_to_young list and move the
         # young objects they point to out of the nursery.
@@ -729,6 +964,15 @@
         """
         self.trace(obj, self._trace_drag_out, None)
 
+    def trace_and_drag_out_of_nursery_partial(self, obj, start, stop):
+        """Like trace_and_drag_out_of_nursery(), but limited to the array
+        indices in range(start, stop).
+        """
+        ll_assert(start < stop, "empty or negative range "
+                                "in trace_and_drag_out_of_nursery_partial()")
+        #print 'trace_partial:', start, stop, '\t', obj
+        self.trace_partial(obj, start, stop, self._trace_drag_out, None)
+
 
     def _trace_drag_out1(self, root):
         self._trace_drag_out(root, None)
@@ -917,7 +1161,22 @@
                 totalsize = size_gc_header + self.get_size(obj)
                 rawtotalsize = llmemory.raw_malloc_usage(totalsize)
                 self.rawmalloced_total_size -= rawtotalsize
-                llmemory.raw_free(obj - size_gc_header)
+                arena = llarena.getfakearenaaddress(obj - size_gc_header)
+                #
+                # Must also include the card marker area, if any
+                if (self.card_page_indices > 0    # <- this is constant-folded
+                    and self.header(obj).tid & GCFLAG_HAS_CARDS):
+                    #
+                    # Get the length and compute the number of extra bytes
+                    typeid = self.get_type_id(obj)
+                    ll_assert(self.has_gcptr_in_varsize(typeid),
+                              "GCFLAG_HAS_CARDS but not has_gcptr_in_varsize")
+                    offset_to_length = self.varsize_offset_to_length(typeid)
+                    length = (obj + offset_to_length).signed[0]
+                    extra_words = self.card_marking_words_for_length(length)
+                    arena -= extra_words * WORD
+                #
+                llarena.arena_free(arena)
         #
         list.delete()
 

Modified: pypy/trunk/pypy/rpython/memory/gc/test/test_direct.py
==============================================================================
--- pypy/trunk/pypy/rpython/memory/gc/test/test_direct.py	(original)
+++ pypy/trunk/pypy/rpython/memory/gc/test/test_direct.py	Mon Sep 20 09:48:06 2010
@@ -95,7 +95,10 @@
         if self.gc.needs_write_barrier:
             newaddr = llmemory.cast_ptr_to_adr(newvalue)
             addr_struct = llmemory.cast_ptr_to_adr(p)
-            self.gc.write_barrier(newaddr, addr_struct)
+            if hasattr(self.gc, 'write_barrier_from_array'):
+                self.gc.write_barrier_from_array(newaddr, addr_struct, index)
+            else:
+                self.gc.write_barrier(newaddr, addr_struct)
         p[index] = newvalue
 
     def malloc(self, TYPE, n=None):
@@ -485,5 +488,27 @@
     # test the GC itself, providing a simple class for ArenaCollection
     GC_PARAMS = {'ArenaCollectionClass': SimpleArenaCollection}
 
+    def test_card_marker(self):
+        for arraylength in (range(4, 17)
+                            + [69]      # 3 bytes
+                            + [300]):   # 10 bytes
+            print 'array length:', arraylength
+            nums = {}
+            a = self.malloc(VAR, arraylength)
+            self.stackroots.append(a)
+            for i in range(50):
+                p = self.malloc(S)
+                p.x = -i
+                a = self.stackroots[-1]
+                index = (i*i) % arraylength
+                self.writearray(a, index, p)
+                nums[index] = p.x
+                #
+                for index, expected_x in nums.items():
+                    assert a[index].x == expected_x
+            self.stackroots.pop()
+    test_card_marker.GC_PARAMS = {"card_page_indices": 4,
+                                  "card_page_indices_min": 7}
+
 class TestMiniMarkGCFull(DirectGCTest):
     from pypy.rpython.memory.gc.minimark import MiniMarkGC as GCClass

Modified: pypy/trunk/pypy/rpython/memory/gc/test/test_minimark.py
==============================================================================
--- pypy/trunk/pypy/rpython/memory/gc/test/test_minimark.py	(original)
+++ pypy/trunk/pypy/rpython/memory/gc/test/test_minimark.py	Mon Sep 20 09:48:06 2010
@@ -1,5 +1,6 @@
 from pypy.rpython.lltypesystem import llmemory
 from pypy.rpython.memory.gc.minimark import MiniMarkGC
+from pypy.rlib.rarithmetic import LONG_BIT
 
 # Note that most tests are in test_direct.py.
 
@@ -23,3 +24,27 @@
     size2 = llmemory.raw_malloc_usage(llmemory.sizeof(UNICODE, INIT_SIZE))
     size2 = size_gc_header + size2
     assert size2 <= MiniMarkGC.TRANSLATION_PARAMS["small_request_threshold"]
+
+def test_card_marking_words_for_length():
+    gc = MiniMarkGC(None, card_page_indices=128)
+    assert gc.card_page_shift == 7
+    P = 128 * LONG_BIT
+    assert gc.card_marking_words_for_length(1) == 1
+    assert gc.card_marking_words_for_length(P) == 1
+    assert gc.card_marking_words_for_length(P+1) == 2
+    assert gc.card_marking_words_for_length(P+P) == 2
+    assert gc.card_marking_words_for_length(P+P+1) == 3
+    assert gc.card_marking_words_for_length(P+P+P+P+P+P+P+P) == 8
+    assert gc.card_marking_words_for_length(P+P+P+P+P+P+P+P+1) == 9
+
+def test_card_marking_bytes_for_length():
+    gc = MiniMarkGC(None, card_page_indices=128)
+    assert gc.card_page_shift == 7
+    P = 128 * 8
+    assert gc.card_marking_bytes_for_length(1) == 1
+    assert gc.card_marking_bytes_for_length(P) == 1
+    assert gc.card_marking_bytes_for_length(P+1) == 2
+    assert gc.card_marking_bytes_for_length(P+P) == 2
+    assert gc.card_marking_bytes_for_length(P+P+1) == 3
+    assert gc.card_marking_bytes_for_length(P+P+P+P+P+P+P+P) == 8
+    assert gc.card_marking_bytes_for_length(P+P+P+P+P+P+P+P+1) == 9

Modified: pypy/trunk/pypy/rpython/memory/gctransform/framework.py
==============================================================================
--- pypy/trunk/pypy/rpython/memory/gctransform/framework.py	(original)
+++ pypy/trunk/pypy/rpython/memory/gctransform/framework.py	Mon Sep 20 09:48:06 2010
@@ -182,6 +182,7 @@
         gcdata.gc.set_root_walker(root_walker)
         self.num_pushs = 0
         self.write_barrier_calls = 0
+        self.write_barrier_from_array_calls = 0
 
         def frameworkgc_setup():
             # run-time initialization code
@@ -420,6 +421,8 @@
                                             annmodel.SomeInteger(nonneg=True)],
                                            annmodel.s_None)
 
+        self.write_barrier_ptr = None
+        self.write_barrier_from_array_ptr = None
         if GCClass.needs_write_barrier:
             self.write_barrier_ptr = getfn(GCClass.write_barrier.im_func,
                                            [s_gc,
@@ -435,8 +438,26 @@
                                                [annmodel.SomeAddress(),
                                                 annmodel.SomeAddress()],
                                                annmodel.s_None)
-        else:
-            self.write_barrier_ptr = None
+            func = getattr(GCClass, 'write_barrier_from_array', None)
+            if func is not None:
+                self.write_barrier_from_array_ptr = getfn(func.im_func,
+                                           [s_gc,
+                                            annmodel.SomeAddress(),
+                                            annmodel.SomeAddress(),
+                                            annmodel.SomeInteger()],
+                                           annmodel.s_None,
+                                           inline=True)
+                func = getattr(gcdata.gc, 'remember_young_pointer_from_array',
+                               None)
+                if func is not None:
+                    # func should not be a bound method, but a real function
+                    assert isinstance(func, types.FunctionType)
+                    self.write_barrier_from_array_failing_case_ptr = \
+                                             getfn(func,
+                                                   [annmodel.SomeAddress(),
+                                                    annmodel.SomeInteger(),
+                                                    annmodel.SomeAddress()],
+                                                   annmodel.s_None)
         self.statistics_ptr = getfn(GCClass.statistics.im_func,
                                     [s_gc, annmodel.SomeInteger()],
                                     annmodel.SomeInteger())
@@ -523,6 +544,9 @@
         if self.write_barrier_ptr:
             log.info("inserted %s write barrier calls" % (
                          self.write_barrier_calls, ))
+        if self.write_barrier_from_array_ptr:
+            log.info("inserted %s write_barrier_from_array calls" % (
+                         self.write_barrier_from_array_calls, ))
 
         # XXX because we call inputconst already in replace_malloc, we can't
         # modify the instance, we have to modify the 'rtyped instance'
@@ -793,6 +817,12 @@
                   [self.write_barrier_failing_case_ptr],
                   resultvar=op.result)
 
+    def gct_get_write_barrier_from_array_failing_case(self, hop):
+        op = hop.spaceop
+        hop.genop("same_as",
+                  [self.write_barrier_from_array_failing_case_ptr],
+                  resultvar=op.result)
+
     def gct_zero_gc_pointers_inside(self, hop):
         if not self.malloc_zero_filled:
             v_ob = hop.spaceop.args[0]
@@ -971,6 +1001,15 @@
         c = rmodel.inputconst(TYPE, lltype.nullptr(TYPE.TO))
         return hop.cast_result(c)
 
+    def _set_into_gc_array_part(self, op):
+        if op.opname == 'setarrayitem':
+            return op.args[1]
+        if op.opname == 'setinteriorfield':
+            for v in op.args[1:-1]:
+                if v.concretetype is not lltype.Void:
+                    return v
+        return None
+
     def transform_generic_set(self, hop):
         from pypy.objspace.flow.model import Constant
         opname = hop.spaceop.opname
@@ -984,15 +1023,26 @@
             and not isinstance(v_newvalue, Constant)
             and v_struct.concretetype.TO._gckind == "gc"
             and hop.spaceop not in self.clean_sets):
-            self.write_barrier_calls += 1
             v_newvalue = hop.genop("cast_ptr_to_adr", [v_newvalue],
                                    resulttype = llmemory.Address)
             v_structaddr = hop.genop("cast_ptr_to_adr", [v_struct],
                                      resulttype = llmemory.Address)
-            hop.genop("direct_call", [self.write_barrier_ptr,
-                                      self.c_const_gc,
-                                      v_newvalue,
-                                      v_structaddr])
+            if (self.write_barrier_from_array_ptr is not None and
+                    self._set_into_gc_array_part(hop.spaceop) is not None):
+                self.write_barrier_from_array_calls += 1
+                v_index = self._set_into_gc_array_part(hop.spaceop)
+                assert v_index.concretetype == lltype.Signed
+                hop.genop("direct_call", [self.write_barrier_from_array_ptr,
+                                          self.c_const_gc,
+                                          v_newvalue,
+                                          v_structaddr,
+                                          v_index])
+            else:
+                self.write_barrier_calls += 1
+                hop.genop("direct_call", [self.write_barrier_ptr,
+                                          self.c_const_gc,
+                                          v_newvalue,
+                                          v_structaddr])
         hop.rename('bare_' + opname)
 
     def transform_getfield_typeptr(self, hop):

Modified: pypy/trunk/pypy/rpython/memory/gcwrapper.py
==============================================================================
--- pypy/trunk/pypy/rpython/memory/gcwrapper.py	(original)
+++ pypy/trunk/pypy/rpython/memory/gcwrapper.py	Mon Sep 20 09:48:06 2010
@@ -15,6 +15,8 @@
         self.llinterp = llinterp
         self.prepare_graphs(flowgraphs)
         self.gc.setup()
+        self.has_write_barrier_from_array = hasattr(self.gc,
+                                                    'write_barrier_from_array')
 
     def prepare_graphs(self, flowgraphs):
         lltype2vtable = self.llinterp.typer.lltype2vtable
@@ -78,13 +80,30 @@
         ARRAY = lltype.typeOf(array).TO
         addr = llmemory.cast_ptr_to_adr(array)
         addr += llmemory.itemoffsetof(ARRAY, index)
-        self.setinterior(array, addr, ARRAY.OF, newitem)
+        self.setinterior(array, addr, ARRAY.OF, newitem, (index,))
 
-    def setinterior(self, toplevelcontainer, inneraddr, INNERTYPE, newvalue):
+    def setinterior(self, toplevelcontainer, inneraddr, INNERTYPE, newvalue,
+                    offsets=()):
         if (lltype.typeOf(toplevelcontainer).TO._gckind == 'gc' and
             isinstance(INNERTYPE, lltype.Ptr) and INNERTYPE.TO._gckind == 'gc'):
-            self.gc.write_barrier(llmemory.cast_ptr_to_adr(newvalue),
-                                  llmemory.cast_ptr_to_adr(toplevelcontainer))
+            #
+            wb = True
+            if self.has_write_barrier_from_array:
+                for index in offsets:
+                    if type(index) is not str:
+                        assert (type(index) is int    # <- fast path
+                                or lltype.typeOf(index) == lltype.Signed)
+                        self.gc.write_barrier_from_array(
+                            llmemory.cast_ptr_to_adr(newvalue),
+                            llmemory.cast_ptr_to_adr(toplevelcontainer),
+                            index)
+                        wb = False
+                        break
+            #
+            if wb:
+                self.gc.write_barrier(
+                    llmemory.cast_ptr_to_adr(newvalue),
+                    llmemory.cast_ptr_to_adr(toplevelcontainer))
         llheap.setinterior(toplevelcontainer, inneraddr, INNERTYPE, newvalue)
 
     def collect(self, *gen):

Modified: pypy/trunk/pypy/rpython/memory/test/test_gc.py
==============================================================================
--- pypy/trunk/pypy/rpython/memory/test/test_gc.py	(original)
+++ pypy/trunk/pypy/rpython/memory/test/test_gc.py	Mon Sep 20 09:48:06 2010
@@ -770,3 +770,7 @@
     from pypy.rpython.memory.gc.minimark import MiniMarkGC as GCClass
     GC_CAN_SHRINK_BIG_ARRAY = False
     GC_CAN_MALLOC_NONMOVABLE = True
+
+class TestMiniMarkGCCardMarking(TestMiniMarkGC):
+    GC_PARAMS = {'card_page_indices': 4,
+                 'card_page_indices_min': 10}

Modified: pypy/trunk/pypy/rpython/memory/test/test_transformed_gc.py
==============================================================================
--- pypy/trunk/pypy/rpython/memory/test/test_transformed_gc.py	(original)
+++ pypy/trunk/pypy/rpython/memory/test/test_transformed_gc.py	Mon Sep 20 09:48:06 2010
@@ -242,6 +242,26 @@
         heap_size = self.heap_usage(statistics)
         assert heap_size < 16000 * WORD / 4 # xxx
 
+    def define_llinterp_dict(self):
+        class A(object):
+            pass
+        def malloc_a_lot():
+            i = 0
+            while i < 10:
+                i += 1
+                a = (1, 2, i)
+                b = {a: A()}
+                j = 0
+                while j < 20:
+                    j += 1
+                    b[1, j, i] = A()
+            return 0
+        return malloc_a_lot
+
+    def test_llinterp_dict(self):
+        run = self.runner("llinterp_dict")
+        run([])
+
     def skipdefine_global_list(cls):
         gl = []
         class Box:
@@ -1454,6 +1474,8 @@
                          'page_size': 16*WORD,
                          'arena_size': 64*WORD,
                          'small_request_threshold': 5*WORD,
+                         'card_page_indices': 4,
+                         'card_page_indices_min': 10,
                          }
             root_stack_depth = 200
 



More information about the Pypy-commit mailing list