[pypy-commit] stmgc c8-card-marking: try using read markers instead of write_locks for cards

Raemi noreply at buildbot.pypy.org
Thu Feb 26 21:46:47 CET 2015


Author: Remi Meier <remi.meier at gmail.com>
Branch: c8-card-marking
Changeset: r1670:9e3927eeccb3
Date: 2015-02-26 20:23 +0100
http://bitbucket.org/pypy/stmgc/changeset/9e3927eeccb3/

Log:	try using read markers instead of write_locks for cards

diff --git a/c8/stm/core.c b/c8/stm/core.c
--- a/c8/stm/core.c
+++ b/c8/stm/core.c
@@ -576,21 +576,67 @@
 }
 
 
-void _stm_write_slowpath_card(object_t *obj, uintptr_t index)
+static bool obj_should_use_cards(object_t *obj)
 {
-    stm_write(obj);
+    struct object_s *realobj = (struct object_s *)
+        REAL_ADDRESS(STM_SEGMENT->segment_base, obj);
+    long supports = stmcb_obj_supports_cards(realobj);
+    if (!supports)
+        return false;
+
+    /* check also if it makes sense: */
+    size_t size = stmcb_size_rounded_up(realobj);
+    return (size >= _STM_MIN_CARD_OBJ_SIZE);
 }
 
-void _stm_write_slowpath(object_t *obj)
+__attribute__((always_inline))
+static void write_gc_only_path(object_t *obj, bool mark_card)
+{
+    assert(obj->stm_flags & GCFLAG_WRITE_BARRIER);
+    assert(obj->stm_flags & GCFLAG_WB_EXECUTED);
+    dprintf(("write_slowpath-fast(%p)\n", obj));
+
+    if (!mark_card) {
+        /* The basic case, with no card marking.  We append the object
+           into 'objects_pointing_to_nursery', and remove the flag so
+           that the write_slowpath will not be called again until the
+           next minor collection. */
+        if (obj->stm_flags & GCFLAG_CARDS_SET) {
+            /* if we clear this flag, we also need to clear the cards */
+            _reset_object_cards(get_priv_segment(STM_SEGMENT->segment_num),
+                                obj, CARD_CLEAR, false);
+        }
+        obj->stm_flags &= ~(GCFLAG_WRITE_BARRIER | GCFLAG_CARDS_SET);
+        LIST_APPEND(STM_PSEGMENT->objects_pointing_to_nursery, obj);
+    } else {
+        /* Card marking.  Don't remove GCFLAG_WRITE_BARRIER because we
+           need to come back to _stm_write_slowpath_card() for every
+           card to mark.  Add GCFLAG_CARDS_SET. */
+        assert(!(obj->stm_flags & GCFLAG_CARDS_SET));
+        obj->stm_flags |= GCFLAG_CARDS_SET;
+        LIST_APPEND(STM_PSEGMENT->old_objects_with_cards_set, obj);
+    }
+}
+
+
+__attribute__((always_inline))
+static void write_slowpath_common(object_t *obj, bool mark_card)
 {
     assert(_seems_to_be_running_transaction());
     assert(!_is_in_nursery(obj));
     assert(obj->stm_flags & GCFLAG_WRITE_BARRIER);
 
+    if (obj->stm_flags & GCFLAG_WB_EXECUTED) {
+        /* already executed WB once in this transaction. do GC
+           part again: */
+        write_gc_only_path(obj, mark_card);
+        return;
+    }
+
+    char *realobj;
+    size_t obj_size;
     int my_segnum = STM_SEGMENT->segment_num;
     uintptr_t end_page, first_page = ((uintptr_t)obj) / 4096UL;
-    char *realobj;
-    size_t obj_size;
 
     realobj = REAL_ADDRESS(STM_SEGMENT->segment_base, obj);
     obj_size = stmcb_size_rounded_up((struct object_s *)realobj);
@@ -604,15 +650,6 @@
     /* add to read set: */
     stm_read(obj);
 
-    if (obj->stm_flags & GCFLAG_WB_EXECUTED) {
-        /* already executed WB once in this transaction. do GC
-           part again: */
-        dprintf(("write_slowpath-fast(%p)\n", obj));
-        obj->stm_flags &= ~GCFLAG_WRITE_BARRIER;
-        LIST_APPEND(STM_PSEGMENT->objects_pointing_to_nursery, obj);
-        return;
-    }
-
     assert(!(obj->stm_flags & GCFLAG_WB_EXECUTED));
     dprintf(("write_slowpath(%p): sz=%lu\n", obj, obj_size));
 
@@ -679,20 +716,105 @@
     }
     OPT_ASSERT(remaining_obj_sz == 0);
 
-    /* remove the WRITE_BARRIER flag and add WB_EXECUTED */
-    obj->stm_flags &= ~GCFLAG_WRITE_BARRIER;
-    obj->stm_flags |= GCFLAG_WB_EXECUTED;
+    if (!mark_card) {
+        /* also add it to the GC list for minor collections */
+        LIST_APPEND(STM_PSEGMENT->objects_pointing_to_nursery, obj);
+
+        if (obj->stm_flags & GCFLAG_CARDS_SET) {
+            /* if we clear this flag, we have to tell sync_old_objs that
+               everything needs to be synced */
+            _reset_object_cards(get_priv_segment(STM_SEGMENT->segment_num),
+                                obj, CARD_MARKED_OLD, true); /* mark all */
+        }
+
+        /* remove the WRITE_BARRIER flag and add WB_EXECUTED */
+        obj->stm_flags &= ~(GCFLAG_WRITE_BARRIER | GCFLAG_CARDS_SET);
+        obj->stm_flags |= GCFLAG_WB_EXECUTED;
+    } else {
+        /* don't remove WRITE_BARRIER, but add CARDS_SET */
+        obj->stm_flags |= (GCFLAG_CARDS_SET | GCFLAG_WB_EXECUTED);
+        /* XXXXXXXXXXXX maybe not set WB_EXECUTED and make CARDS_SET
+           mean the same thing where necessary */
+        LIST_APPEND(STM_PSEGMENT->old_objects_with_cards_set, obj);
+    }
 
     DEBUG_EXPECT_SEGFAULT(true);
 
     release_modification_lock(STM_SEGMENT->segment_num);
     /* done fiddling with protection and privatization */
     release_all_privatization_locks();
+}
 
-    /* also add it to the GC list for minor collections */
-    LIST_APPEND(STM_PSEGMENT->objects_pointing_to_nursery, obj);
+
+char _stm_write_slowpath_card_extra(object_t *obj)
+{
+    /* the PyPy JIT calls this function directly if it finds that an
+       array doesn't have the GCFLAG_CARDS_SET */
+    bool mark_card = obj_should_use_cards(obj);
+    write_slowpath_common(obj, mark_card);
+    return mark_card;
 }
 
+long _stm_write_slowpath_card_extra_base(void)
+{
+    /* XXX can go away? */
+    /* for the PyPy JIT: _stm_write_slowpath_card_extra_base[obj >> 4]
+       is the byte that must be set to CARD_MARKED.  The logic below
+       does the same, but more explicitly. */
+    return 0;
+}
+
+void _stm_write_slowpath_card(object_t *obj, uintptr_t index)
+{
+    dprintf_test(("write_slowpath_card(%p, %lu)\n",
+                  obj, index));
+
+    /* If CARDS_SET is not set so far, issue a normal write barrier.
+       If the object is large enough, ask it to set up the object for
+       card marking instead. */
+    if (!(obj->stm_flags & GCFLAG_CARDS_SET)) {
+        char mark_card = _stm_write_slowpath_card_extra(obj);
+        if (!mark_card)
+            return;
+    }
+
+    dprintf_test(("write_slowpath_card %p -> index:%lu\n",
+                  obj, index));
+
+    /* We reach this point if we have to mark the card. */
+    assert(obj->stm_flags & GCFLAG_WRITE_BARRIER);
+    assert(obj->stm_flags & GCFLAG_CARDS_SET);
+    assert(!is_small_uniform(obj)); /* not supported/tested */
+
+#ifndef NDEBUG
+    struct object_s *realobj = (struct object_s *)
+        REAL_ADDRESS(STM_SEGMENT->segment_base, obj);
+    size_t size = stmcb_size_rounded_up(realobj);
+    /* we need at least one read marker in addition to the STM-reserved object
+       write-lock */
+    assert(size >= 32);
+    /* the 'index' must be in range(length-of-obj), but we don't have
+       a direct way to know the length.  We know that it is smaller
+       than the size in bytes. */
+    assert(index < size);
+#endif
+
+    /* Write into the card's lock.  This is used by the next minor
+       collection to know what parts of the big object may have changed.
+       We already own the object here or it is an overflow obj. */
+    struct stm_read_marker_s *cards = get_read_marker(STM_SEGMENT->segment_base,
+                                                      (uintptr_t)obj);
+    cards[get_index_to_card_index(index)].rm = CARD_MARKED;
+
+    dprintf(("mark %p index %lu, card:%lu with %d\n",
+             obj, index, get_index_to_card_index(index), CARD_MARKED));
+}
+
+void _stm_write_slowpath(object_t *obj) {
+    write_slowpath_common(obj,  /* mark_card */ false);
+}
+
+
 static void reset_transaction_read_version(void)
 {
     /* force-reset all read markers to 0 */
@@ -831,7 +953,10 @@
 
     STM_PSEGMENT->safe_point = SP_NO_TRANSACTION;
     STM_PSEGMENT->transaction_state = TS_NONE;
+
+    _verify_cards_cleared_in_all_lists(get_priv_segment(STM_SEGMENT->segment_num));
     list_clear(STM_PSEGMENT->objects_pointing_to_nursery);
+    list_clear(STM_PSEGMENT->old_objects_with_cards_set);
     list_clear(STM_PSEGMENT->new_objects);
 
     release_thread_segment(tl);
@@ -854,12 +979,15 @@
 
 static void push_new_objects_to_other_segments(void)
 {
+    struct stm_priv_segment_info_s *pseg = get_priv_segment(STM_SEGMENT->segment_num);
     acquire_privatization_lock(STM_SEGMENT->segment_num);
     LIST_FOREACH_R(STM_PSEGMENT->new_objects, object_t *,
         ({
             assert(item->stm_flags & GCFLAG_WB_EXECUTED);
+            _cards_cleared_in_object(pseg, item); /* check for C8 */
+            _reset_object_cards(pseg, item, CARD_CLEAR, false); /* unnecessary, as sync_obj_enq does it already? */
             item->stm_flags &= ~GCFLAG_WB_EXECUTED;
-            synchronize_object_enqueue(item);
+            synchronize_object_enqueue(item, true);
         }));
     synchronize_objects_flush();
     release_privatization_lock(STM_SEGMENT->segment_num);
@@ -910,6 +1038,8 @@
         }
     }
 
+    _verify_cards_cleared_in_all_lists(get_priv_segment(STM_SEGMENT->segment_num));
+
     commit_finalizers();
 
     invoke_and_clear_user_callbacks(0);   /* for commit */
@@ -951,6 +1081,10 @@
                undo->backup,
                SLICE_SIZE(undo->slice));
 
+        if (obj_should_use_cards(obj))
+            _reset_object_cards(pseg, obj, CARD_CLEAR, false);
+        /* XXXXXXXXX: only reset cards of slice!! ^^^^^^^ */
+
         dprintf(("reset_modified_from_backup_copies(%d): obj=%p off=%lu bk=%p\n",
                  segment_num, obj, SLICE_OFFSET(undo->slice), undo->backup));
 
@@ -987,9 +1121,23 @@
 
     long bytes_in_nursery = throw_away_nursery(pseg);
 
+    /* some new objects may have cards when aborting, clear them too */
+    LIST_FOREACH_R(pseg->new_objects, object_t * /*item*/,
+        {
+            struct object_s *realobj = (struct object_s *)
+                REAL_ADDRESS(pseg->pub.segment_base, item);
+
+            if (realobj->stm_flags & GCFLAG_CARDS_SET) {
+                /* CARDS_SET is enough since other HAS_CARDS objs
+                   are already cleared */
+                _reset_object_cards(pseg, item, CARD_CLEAR, false);
+            }
+        });
+
     acquire_modification_lock(segment_num);
     reset_modified_from_backup_copies(segment_num);
     release_modification_lock(segment_num);
+    _verify_cards_cleared_in_all_lists(pseg);
 
     stm_thread_local_t *tl = pseg->pub.running_thread;
 #ifdef STM_NO_AUTOMATIC_SETJMP
@@ -1013,6 +1161,7 @@
     tl->last_abort__bytes_in_nursery = bytes_in_nursery;
 
     list_clear(pseg->objects_pointing_to_nursery);
+    list_clear(pseg->old_objects_with_cards_set);
     list_clear(pseg->new_objects);
     list_clear(pseg->young_weakrefs);
 #pragma pop_macro("STM_SEGMENT")
@@ -1143,7 +1292,133 @@
     ++STM_PSEGMENT->sq_len;
 }
 
-static void synchronize_object_enqueue(object_t *obj)
+
+static void _page_wise_synchronize_object_now(object_t *obj, ssize_t obj_size)
+{
+    uintptr_t start = (uintptr_t)obj;
+    uintptr_t end = start + obj_size;
+
+    do {
+        uintptr_t copy_up_to = (start + 4096) & ~4095;   /* end of page */
+        if (copy_up_to >= end) {
+            copy_up_to = end;        /* this is the last fragment */
+        }
+        uintptr_t copy_size = copy_up_to - start;
+
+        /* double-check that the result fits in one page */
+        assert(copy_size > 0);
+        assert(copy_size + (start & 4095) <= 4096);
+
+        _synchronize_fragment((stm_char *)start, copy_size);
+
+        start = copy_up_to;
+    } while (start != end);
+}
+
+static void _card_wise_synchronize_object_now(object_t *obj, ssize_t obj_size)
+{
+    assert(obj_size >= 32);
+    assert(obj_should_use_cards(obj));
+    assert(!(obj->stm_flags & GCFLAG_CARDS_SET));
+
+    uintptr_t offset_itemsize[2];
+    struct object_s *realobj = (struct object_s *)REAL_ADDRESS(STM_SEGMENT->segment_base, obj);
+    stmcb_get_card_base_itemsize(realobj, offset_itemsize);
+    size_t real_idx_count = (obj_size - offset_itemsize[0]) / offset_itemsize[1];
+
+    struct stm_read_marker_s *cards = get_read_marker(STM_SEGMENT->segment_base, (uintptr_t)obj);
+    uintptr_t card_index = 1;
+    uintptr_t last_card_index = get_index_to_card_index(real_idx_count - 1); /* max valid index */
+    assert(cards->rm == STM_SEGMENT->transaction_read_version); /* stm_read() */
+
+    /* simple heuristic to check if probably the whole object is
+       marked anyway so we should do page-wise synchronize */
+    if (cards[1].rm == CARD_MARKED_OLD
+        && cards[last_card_index].rm == CARD_MARKED_OLD
+        && cards[(last_card_index >> 1) + 1].rm == CARD_MARKED_OLD) {
+
+        dprintf(("card_wise_sync assumes %p,size:%lu is fully marked\n", obj, obj_size));
+        _reset_object_cards(get_priv_segment(STM_SEGMENT->segment_num),
+                            obj, CARD_CLEAR, false);
+        _page_wise_synchronize_object_now(obj, obj_size);
+        return;
+    }
+
+    dprintf(("card_wise_sync syncs %p,size:%lu card-wise\n", obj, obj_size));
+
+    /* Combine multiple marked cards and do a memcpy for them. We don't
+       try yet to use page_copy() or otherwise take into account privatization
+       of pages (except _has_private_page_in_range) */
+    bool all_cards_were_cleared = true;
+
+    uintptr_t start_card_index = -1;
+    while (card_index <= last_card_index) {
+        uint8_t card_value = cards[card_index].rm;
+
+        if (card_value == CARD_MARKED_OLD) {
+            cards[card_index].rm = CARD_CLEAR;
+
+            if (start_card_index == -1) {   /* first marked card */
+                start_card_index = card_index;
+                /* start = (uintptr_t)obj + stmcb_index_to_byte_offset( */
+                /*     realobj, get_card_index_to_index(card_index)); */
+                if (all_cards_were_cleared) {
+                    all_cards_were_cleared = false;
+                }
+            }
+        }
+        else {
+            OPT_ASSERT(card_value == CARD_CLEAR);
+        }
+
+        if (start_card_index != -1                    /* something to copy */
+            && (card_value != CARD_MARKED_OLD         /* found non-marked card */
+                || card_index == last_card_index)) {  /* this is the last card */
+            /* do the copying: */
+            uintptr_t start, copy_size;
+            uintptr_t next_card_offset;
+            uintptr_t start_card_offset;
+            uintptr_t next_card_index = card_index;
+
+            if (card_value == CARD_MARKED_OLD) {
+                /* card_index is the last card of the object, but we need
+                   to go one further to get the right offset */
+                next_card_index++;
+            }
+
+            start_card_offset = offset_itemsize[0] +
+                get_card_index_to_index(start_card_index) * offset_itemsize[1];
+
+            next_card_offset = offset_itemsize[0] +
+                get_card_index_to_index(next_card_index) * offset_itemsize[1];
+
+            if (next_card_offset > obj_size)
+                next_card_offset = obj_size;
+
+            start = (uintptr_t)obj + start_card_offset;
+            copy_size = next_card_offset - start_card_offset;
+            OPT_ASSERT(copy_size > 0);
+
+            /* push to seg0 and enqueue for synchronization */
+            _synchronize_fragment((stm_char *)start, copy_size);
+
+            start_card_index = -1;
+        }
+
+        card_index++;
+    }
+
+    if (all_cards_were_cleared) {
+        /* well, seems like we never called stm_write_card() on it, so actually
+           we need to fall back to synchronize the whole object */
+        _page_wise_synchronize_object_now(obj, obj_size);
+        return;
+    }
+
+}
+
+
+static void synchronize_object_enqueue(object_t *obj, bool ignore_cards)
 {
     assert(!_is_young(obj));
     assert(STM_PSEGMENT->privatization_lock);
@@ -1155,28 +1430,18 @@
     OPT_ASSERT(obj_size >= 16);
 
     if (LIKELY(is_small_uniform(obj))) {
+        assert(!(obj->stm_flags & GCFLAG_CARDS_SET));
         OPT_ASSERT(obj_size <= GC_LAST_SMALL_SIZE);
         _synchronize_fragment((stm_char *)obj, obj_size);
         return;
+    } else if (ignore_cards || !obj_should_use_cards(obj)) {
+        /* else, a more complicated case for large objects, to copy
+           around data only within the needed pages */
+        _page_wise_synchronize_object_now(obj, obj_size);
+    } else {
+        /* ... or even only cards that need to be updated */
+        _card_wise_synchronize_object_now(obj, obj_size);
     }
-
-    /* else, a more complicated case for large objects, to copy
-       around data only within the needed pages
-    */
-    uintptr_t start = (uintptr_t)obj;
-    uintptr_t end = start + obj_size;
-
-    do {
-        uintptr_t copy_up_to = (start + 4096) & ~4095;   /* end of page */
-        if (copy_up_to >= end) {
-            copy_up_to = end;        /* this is the last fragment */
-        }
-        uintptr_t copy_size = copy_up_to - start;
-
-        _synchronize_fragment((stm_char *)start, copy_size);
-
-        start = copy_up_to;
-    } while (start != end);
 }
 
 static void synchronize_objects_flush(void)
diff --git a/c8/stm/core.h b/c8/stm/core.h
--- a/c8/stm/core.h
+++ b/c8/stm/core.h
@@ -34,19 +34,26 @@
 #define FIRST_OLD_RM_PAGE     (OLD_RM_START / 4096UL)
 #define NB_READMARKER_PAGES   (FIRST_OBJECT_PAGE - FIRST_READMARKER_PAGE)
 
+#define CARD_SIZE   _STM_CARD_SIZE
+
 enum /* stm_flags */ {
     GCFLAG_WRITE_BARRIER = _STM_GCFLAG_WRITE_BARRIER,
     GCFLAG_HAS_SHADOW = 0x02,
     GCFLAG_WB_EXECUTED = 0x04,
-    GCFLAG_HAS_CARDS = 0x08,
     GCFLAG_CARDS_SET = _STM_GCFLAG_CARDS_SET,
-    GCFLAG_VISITED = 0x20,
-    GCFLAG_FINALIZATION_ORDERING = 0x40,
+    GCFLAG_VISITED = 0x10,
+    GCFLAG_FINALIZATION_ORDERING = 0x20,
 };
 
+#define SYNC_QUEUE_SIZE    31
 
+enum /* card values in read markers */ {
+    CARD_CLEAR = 0,                 /* card not used at all */
+    CARD_MARKED = _STM_CARD_MARKED, /* card marked for tracing in the next gc */
+    CARD_MARKED_OLD = _STM_CARD_MARKED+1,
+    /* card was marked before, but cleared in a GC */
+};
 
-#define SYNC_QUEUE_SIZE    31
 
 
 /************************************************************/
@@ -196,6 +203,18 @@
 
 #define REAL_ADDRESS(segment_base, src)   ((segment_base) + (uintptr_t)(src))
 
+static inline uintptr_t get_index_to_card_index(uintptr_t index) {
+    return (index / CARD_SIZE) + 1;
+}
+
+static inline uintptr_t get_card_index_to_index(uintptr_t card_index) {
+    return (card_index - 1) * CARD_SIZE;
+}
+
+static inline struct stm_read_marker_s *get_read_marker(char *segment_base, uintptr_t obj)
+{
+   return (struct stm_read_marker_s *)(segment_base + (obj >> 4));
+}
 
 static inline char *get_segment_base(long segment_num) {
     return stm_object_pages + segment_num * (NB_PAGES * 4096UL);
@@ -226,7 +245,7 @@
 static stm_thread_local_t *abort_with_mutex_no_longjmp(void);
 static void abort_data_structures_from_segment_num(int segment_num);
 
-static void synchronize_object_enqueue(object_t *obj);
+static void synchronize_object_enqueue(object_t *obj, bool ignore_cards);
 static void synchronize_objects_flush(void);
 
 static void _signal_handler(int sig, siginfo_t *siginfo, void *context);
diff --git a/c8/stm/gcpage.h b/c8/stm/gcpage.h
--- a/c8/stm/gcpage.h
+++ b/c8/stm/gcpage.h
@@ -7,7 +7,6 @@
 #define GC_MIN                 (NB_NURSERY_PAGES * 4096 * 8)
 #define GC_MAJOR_COLLECT       1.82
 
-
 static struct list_s *testing_prebuilt_objs;
 static char *uninitialized_page_start;   /* within segment 0 */
 static char *uninitialized_page_stop;
diff --git a/c8/stm/nursery.c b/c8/stm/nursery.c
--- a/c8/stm/nursery.c
+++ b/c8/stm/nursery.c
@@ -142,8 +142,138 @@
 
     /* Must trace the object later */
     LIST_APPEND(STM_PSEGMENT->objects_pointing_to_nursery, nobj_sync_now);
+    _cards_cleared_in_object(get_priv_segment(STM_SEGMENT->segment_num), nobj);
 }
 
+static void _cards_cleared_in_object(struct stm_priv_segment_info_s *pseg, object_t *obj)
+{
+#ifndef NDEBUG
+    struct object_s *realobj = (struct object_s *)REAL_ADDRESS(pseg->pub.segment_base, obj);
+    size_t size = stmcb_size_rounded_up(realobj);
+
+    if (size < _STM_MIN_CARD_OBJ_SIZE)
+        return;                 /* too small for cards */
+
+    struct stm_read_marker_s *cards = get_read_marker(pseg->pub.segment_base, (uintptr_t)obj);
+    uintptr_t card_index = 1;
+    uintptr_t last_card_index = get_index_to_card_index(size - 1); /* max valid index */
+
+    while (card_index <= last_card_index) {
+        assert(cards[card_index].rm == CARD_CLEAR);
+        card_index++;
+    }
+
+    assert(!(realobj->stm_flags & GCFLAG_CARDS_SET));
+#endif
+}
+
+static void _verify_cards_cleared_in_all_lists(struct stm_priv_segment_info_s *pseg)
+{
+#ifndef NDEBUG
+    struct list_s *list = pseg->modified_old_objects;
+    struct stm_undo_s *undo = (struct stm_undo_s *)list->items;
+    struct stm_undo_s *end = (struct stm_undo_s *)(list->items + list->count);
+
+    for (; undo < end; undo++) {
+        _cards_cleared_in_object(pseg, undo->object);
+    }
+    LIST_FOREACH_R(
+        pseg->new_objects, object_t * /*item*/,
+        _cards_cleared_in_object(pseg, item));
+    LIST_FOREACH_R(
+        pseg->objects_pointing_to_nursery, object_t * /*item*/,
+        _cards_cleared_in_object(pseg, item));
+    LIST_FOREACH_R(
+        pseg->old_objects_with_cards_set, object_t * /*item*/,
+        _cards_cleared_in_object(pseg, item));
+#endif
+}
+
+static void _reset_object_cards(struct stm_priv_segment_info_s *pseg,
+                                object_t *obj, uint8_t mark_value,
+                                bool mark_all)
+{
+#pragma push_macro("STM_PSEGMENT")
+#pragma push_macro("STM_SEGMENT")
+#undef STM_PSEGMENT
+#undef STM_SEGMENT
+    struct object_s *realobj = (struct object_s *)REAL_ADDRESS(pseg->pub.segment_base, obj);
+    size_t size = stmcb_size_rounded_up(realobj);
+    OPT_ASSERT(size >= _STM_MIN_CARD_OBJ_SIZE);
+
+    uintptr_t offset_itemsize[2];
+    stmcb_get_card_base_itemsize(realobj, offset_itemsize);
+    size = (size - offset_itemsize[0]) / offset_itemsize[1];
+
+    assert(IMPLY(mark_value == CARD_CLEAR, !mark_all)); /* not necessary */
+    assert(IMPLY(mark_all, mark_value == CARD_MARKED_OLD)); /* set *all* to OLD */
+    assert(IMPLY(realobj->stm_flags & GCFLAG_WB_EXECUTED,
+                 mark_value == CARD_CLEAR)); /* overflows are always CLEARed */
+
+
+    struct stm_read_marker_s *cards = get_read_marker(pseg->pub.segment_base, (uintptr_t)obj);
+    uintptr_t card_index = 1;
+    uintptr_t last_card_index = get_index_to_card_index(size - 1); /* max valid index */
+
+    /* dprintf(("mark cards of %p, size %lu with %d, all: %d\n",
+                obj, size, mark_value, mark_all));
+       dprintf(("obj has %lu cards\n", last_card_index));*/
+    while (card_index <= last_card_index) {
+        if (mark_all || cards[card_index].rm != CARD_CLEAR) {
+            /* dprintf(("mark card %lu,wl:%lu of %p with %d\n", */
+            /*          card_index, card_lock_idx, obj, mark_value)); */
+            cards[card_index].rm = mark_value;
+        }
+        card_index++;
+    }
+
+    realobj->stm_flags &= ~GCFLAG_CARDS_SET;
+
+#pragma pop_macro("STM_SEGMENT")
+#pragma pop_macro("STM_PSEGMENT")
+}
+
+
+static void _trace_card_object(object_t *obj)
+{
+    assert(!_is_in_nursery(obj));
+    assert(obj->stm_flags & GCFLAG_CARDS_SET);
+    assert(obj->stm_flags & GCFLAG_WRITE_BARRIER);
+
+    dprintf(("_trace_card_object(%p)\n", obj));
+    uint8_t mark_value = CARD_MARKED_OLD;
+
+    struct object_s *realobj = (struct object_s *)REAL_ADDRESS(STM_SEGMENT->segment_base, obj);
+    size_t size = stmcb_size_rounded_up(realobj);
+    uintptr_t offset_itemsize[2];
+    stmcb_get_card_base_itemsize(realobj, offset_itemsize);
+    size = (size - offset_itemsize[0]) / offset_itemsize[1];
+
+    struct stm_read_marker_s *cards = get_read_marker(STM_SEGMENT->segment_base, (uintptr_t)obj);
+    uintptr_t card_index = 1;
+    uintptr_t last_card_index = get_index_to_card_index(size - 1); /* max valid index */
+
+    assert(cards->rm == STM_SEGMENT->transaction_read_version); /* stm_read() */
+
+    /* XXX: merge ranges */
+    while (card_index <= last_card_index) {
+        if (cards[card_index].rm == CARD_MARKED) {
+            /* clear or set to old: */
+            cards[card_index].rm = mark_value;
+
+            uintptr_t start = get_card_index_to_index(card_index);
+            uintptr_t stop = get_card_index_to_index(card_index + 1);
+
+            dprintf(("trace_cards on %p with start:%lu stop:%lu\n",
+                     obj, start, stop));
+            stmcb_trace_cards(realobj, &minor_trace_if_young,
+                              start, stop);
+        }
+
+        card_index++;
+    }
+    obj->stm_flags &= ~GCFLAG_CARDS_SET;
+}
 
 static void collect_roots_in_nursery(void)
 {
@@ -177,15 +307,20 @@
 static inline void _collect_now(object_t *obj)
 {
     assert(!_is_young(obj));
+    assert(!(obj->stm_flags & GCFLAG_CARDS_SET));
 
     //dprintf(("_collect_now: %p\n", obj));
 
-    assert(!(obj->stm_flags & GCFLAG_WRITE_BARRIER));
+    if (!(obj->stm_flags & GCFLAG_WRITE_BARRIER)) {
+        /* Trace the 'obj' to replace pointers to nursery with pointers
+           outside the nursery, possibly forcing nursery objects out and
+           adding them to 'objects_pointing_to_nursery' as well. */
+        char *realobj = REAL_ADDRESS(STM_SEGMENT->segment_base, obj);
+        stmcb_trace((struct object_s *)realobj, &minor_trace_if_young);
 
-    char *realobj = REAL_ADDRESS(STM_SEGMENT->segment_base, obj);
-    stmcb_trace((struct object_s *)realobj, &minor_trace_if_young);
-
-    obj->stm_flags |= GCFLAG_WRITE_BARRIER;
+        obj->stm_flags |= GCFLAG_WRITE_BARRIER;
+    }
+    /* else traced in collect_cardrefs_to_nursery if necessary */
 }
 
 
@@ -201,18 +336,21 @@
         assert(!_is_in_nursery(obj));
 
         _collect_now(obj);
+        assert(!(obj->stm_flags & GCFLAG_CARDS_SET));
 
         if (obj_sync_now & FLAG_SYNC_LARGE) {
             /* this is a newly allocated obj in this transaction. We must
                either synchronize the object to other segments now, or
                add the object to new_objects list */
-            if (STM_PSEGMENT->minor_collect_will_commit_now) {
-                acquire_privatization_lock(STM_SEGMENT->segment_num);
-                synchronize_object_enqueue(obj);
-                release_privatization_lock(STM_SEGMENT->segment_num);
+            struct stm_priv_segment_info_s *pseg = get_priv_segment(STM_SEGMENT->segment_num);
+            if (pseg->minor_collect_will_commit_now) {
+                acquire_privatization_lock(pseg->pub.segment_num);
+                synchronize_object_enqueue(obj, true); /* ignore cards! */
+                release_privatization_lock(pseg->pub.segment_num);
             } else {
-                LIST_APPEND(STM_PSEGMENT->new_objects, obj);
+                LIST_APPEND(pseg->new_objects, obj);
             }
+            _cards_cleared_in_object(pseg, obj);
         }
 
         /* the list could have moved while appending */
@@ -230,6 +368,30 @@
     }
 }
 
+
+static void collect_cardrefs_to_nursery(void)
+{
+    dprintf(("collect_cardrefs_to_nursery\n"));
+    struct list_s *lst = STM_PSEGMENT->old_objects_with_cards_set;
+
+    while (!list_is_empty(lst)) {
+        object_t *obj = (object_t*)list_pop_item(lst);
+
+        assert(!_is_young(obj));
+
+        if (!(obj->stm_flags & GCFLAG_CARDS_SET)) {
+            /* sometimes we remove the CARDS_SET in the WB slowpath, see core.c */
+            continue;
+        }
+
+        /* traces cards, clears marked cards or marks them old if necessary */
+        _trace_card_object(obj);
+
+        assert(!(obj->stm_flags & GCFLAG_CARDS_SET));
+    }
+}
+
+
 static void collect_objs_still_young_but_with_finalizers(void)
 {
     struct list_s *lst = STM_PSEGMENT->finalizers->objects_with_finalizers;
@@ -314,12 +476,15 @@
 
     STM_PSEGMENT->minor_collect_will_commit_now = commit;
 
+    collect_cardrefs_to_nursery();
+
     collect_roots_in_nursery();
 
     if (STM_PSEGMENT->finalizers != NULL)
         collect_objs_still_young_but_with_finalizers();
 
     collect_oldrefs_to_nursery();
+    assert(list_is_empty(STM_PSEGMENT->old_objects_with_cards_set));
 
     /* now all surviving nursery objects have been moved out */
     acquire_privatization_lock(STM_SEGMENT->segment_num);
diff --git a/c8/stm/nursery.h b/c8/stm/nursery.h
--- a/c8/stm/nursery.h
+++ b/c8/stm/nursery.h
@@ -2,6 +2,11 @@
 #define NSE_SIGPAUSE   _STM_NSE_SIGNAL_MAX
 #define NSE_SIGABORT   _STM_NSE_SIGNAL_ABORT
 
+static void _cards_cleared_in_object(struct stm_priv_segment_info_s *pseg, object_t *obj);
+static void _reset_object_cards(struct stm_priv_segment_info_s *pseg,
+                                object_t *obj, uint8_t mark_value,
+                                bool mark_all);
+
 static void minor_collection(bool commit);
 static void check_nursery_at_transaction_start(void);
 static size_t throw_away_nursery(struct stm_priv_segment_info_s *pseg);
diff --git a/c8/stmgc.h b/c8/stmgc.h
--- a/c8/stmgc.h
+++ b/c8/stmgc.h
@@ -73,11 +73,16 @@
 } stm_thread_local_t;
 
 #define _STM_GCFLAG_WRITE_BARRIER      0x01
-#define _STM_GCFLAG_CARDS_SET          0x10
 #define _STM_FAST_ALLOC           (66*1024)
 #define _STM_NSE_SIGNAL_ABORT             1
 #define _STM_NSE_SIGNAL_MAX               2
 
+#define _STM_CARD_MARKED 1
+#define _STM_GCFLAG_CARDS_SET          0x8
+#define _STM_CARD_SIZE                 32     /* must be >= 32 */
+#define _STM_MIN_CARD_COUNT            17
+#define _STM_MIN_CARD_OBJ_SIZE         (_STM_CARD_SIZE * _STM_MIN_CARD_COUNT)
+
 void _stm_write_slowpath(object_t *);
 void _stm_write_slowpath_card(object_t *, uintptr_t);
 object_t *_stm_allocate_slowpath(ssize_t);
@@ -163,6 +168,16 @@
    ranges of indices (using stm_write_card(o, index)) */
 extern void stmcb_trace_cards(struct object_s *, void (object_t **),
                               uintptr_t start, uintptr_t stop);
+/* this function will be called on objects that support cards.
+   It returns the base_offset (in bytes) inside the object from
+   where the indices start, and item_size (in bytes) for the size of
+   one item */
+extern void stmcb_get_card_base_itemsize(struct object_s *,
+                                         uintptr_t offset_itemsize[2]);
+/* returns whether this object supports cards. we will only call
+   stmcb_get_card_base_itemsize on objs that do so. */
+extern long stmcb_obj_supports_cards(struct object_s *);
+
 
 
 
diff --git a/c8/test/support.py b/c8/test/support.py
--- a/c8/test/support.py
+++ b/c8/test/support.py
@@ -10,6 +10,7 @@
 #define STM_NB_SEGMENTS ...
 #define _STM_GCFLAG_WRITE_BARRIER ...
 #define _STM_FAST_ALLOC ...
+#define _STM_CARD_SIZE ...
 
 typedef struct {
 ...;
@@ -332,6 +333,11 @@
     }
 }
 
+long stmcb_obj_supports_cards(struct object_s *obj)
+{
+    return 1;
+}
+
 void stmcb_trace_cards(struct object_s *obj, void visit(object_t **),
                        uintptr_t start, uintptr_t stop)
 {
@@ -350,6 +356,19 @@
     }
 }
 
+void stmcb_get_card_base_itemsize(struct object_s *obj,
+                                  uintptr_t offset_itemsize[2])
+{
+    struct myobj_s *myobj = (struct myobj_s*)obj;
+    if (myobj->type_id < 421420) {
+        offset_itemsize[0] = SIZEOF_MYOBJ;
+        offset_itemsize[1] = 1;
+    }
+    else {
+        offset_itemsize[0] = sizeof(struct myobj_s);
+        offset_itemsize[1] = sizeof(object_t *);
+    }
+}
 
 long current_segment_num(void)
 {
@@ -376,6 +395,7 @@
 GCFLAG_WRITE_BARRIER = lib._STM_GCFLAG_WRITE_BARRIER
 NB_SEGMENTS = lib.STM_NB_SEGMENTS
 FAST_ALLOC = lib._STM_FAST_ALLOC
+CARD_SIZE = lib._STM_CARD_SIZE # 16b at least
 
 class Conflict(Exception):
     pass
diff --git a/c8/test/test_card_marking.py b/c8/test/test_card_marking.py
--- a/c8/test/test_card_marking.py
+++ b/c8/test/test_card_marking.py
@@ -42,7 +42,7 @@
 
         stm_write_card(o, 5)
 
-        assert o in old_objects_with_cards()
+        assert o in old_objects_with_cards_set()
         assert o not in modified_old_objects() # overflow object
         assert o not in objects_pointing_to_nursery()
         # don't remove GCFLAG_WB
@@ -141,7 +141,7 @@
 
         assert not modified_old_objects()
         assert not objects_pointing_to_nursery()
-        assert not old_objects_with_cards()
+        assert not old_objects_with_cards_set()
 
         self.start_transaction()
         d = stm_allocate(64)


More information about the pypy-commit mailing list