[pypy-commit] stmgc default: hg merge c8-card-marking

arigo noreply at buildbot.pypy.org
Sat Mar 7 15:27:39 CET 2015


Author: Armin Rigo <arigo at tunes.org>
Branch: 
Changeset: r1692:21554aee8ae6
Date: 2015-03-07 15:09 +0100
http://bitbucket.org/pypy/stmgc/changeset/21554aee8ae6/

Log:	hg merge c8-card-marking

diff too long, truncating to 2000 out of 2709 lines

diff --git a/c8/stm/core.c b/c8/stm/core.c
--- a/c8/stm/core.c
+++ b/c8/stm/core.c
@@ -228,7 +228,7 @@
         addr >= stm_object_pages+TOTAL_MEMORY) {
         /* actual segfault, unrelated to stmgc */
         fprintf(stderr, "Segmentation fault: accessing %p\n", addr);
-        raise(SIGINT);
+        abort();
     }
 
     int segnum = get_segment_of_linear_address(addr);
@@ -236,7 +236,7 @@
     if (segnum != STM_SEGMENT->segment_num) {
         fprintf(stderr, "Segmentation fault: accessing %p (seg %d) from"
                 " seg %d\n", addr, segnum, STM_SEGMENT->segment_num);
-        raise(SIGINT);
+        abort();
     }
     dprintf(("-> segment: %d\n", segnum));
 
@@ -245,7 +245,7 @@
     if (pagenum < END_NURSERY_PAGE) {
         fprintf(stderr, "Segmentation fault: accessing %p (seg %d "
                         "page %lu)\n", addr, segnum, pagenum);
-        raise(SIGINT);
+        abort();
     }
 
     DEBUG_EXPECT_SEGFAULT(false);
@@ -576,118 +576,422 @@
 }
 
 
-void _stm_write_slowpath(object_t *obj)
+bool obj_should_use_cards(char *seg_base, object_t *obj)
 {
-    assert(_seems_to_be_running_transaction());
-    assert(!_is_in_nursery(obj));
-    assert(obj->stm_flags & GCFLAG_WRITE_BARRIER);
+    if (is_small_uniform(obj))
+        return false;
 
-    int my_segnum = STM_SEGMENT->segment_num;
-    uintptr_t end_page, first_page = ((uintptr_t)obj) / 4096UL;
-    char *realobj;
-    size_t obj_size;
+    struct object_s *realobj = (struct object_s *)
+        REAL_ADDRESS(seg_base, obj);
+    long supports = stmcb_obj_supports_cards(realobj);
+    if (!supports)
+        return false;
 
-    realobj = REAL_ADDRESS(STM_SEGMENT->segment_base, obj);
-    obj_size = stmcb_size_rounded_up((struct object_s *)realobj);
-    /* get the last page containing data from the object */
-    if (LIKELY(is_small_uniform(obj))) {
-        end_page = first_page;
-    } else {
-        end_page = (((uintptr_t)obj) + obj_size - 1) / 4096UL;
-    }
+    /* check also if it makes sense: */
+    size_t size = stmcb_size_rounded_up(realobj);
+    return (size >= _STM_MIN_CARD_OBJ_SIZE);
+}
 
-    /* add to read set: */
-    stm_read(obj);
 
-    if (obj->stm_flags & GCFLAG_WB_EXECUTED) {
-        /* already executed WB once in this transaction. do GC
-           part again: */
-        dprintf(("write_slowpath-fast(%p)\n", obj));
-        obj->stm_flags &= ~GCFLAG_WRITE_BARRIER;
-        LIST_APPEND(STM_PSEGMENT->objects_pointing_to_nursery, obj);
-        return;
-    }
-
-    assert(!(obj->stm_flags & GCFLAG_WB_EXECUTED));
-    dprintf(("write_slowpath(%p): sz=%lu\n", obj, obj_size));
-
- retry:
-    /* privatize pages: */
-    /* XXX don't always acquire all locks... */
-    acquire_all_privatization_locks();
+static void make_bk_slices_for_range(
+    object_t *obj,
+    stm_char *start, stm_char *end) /* [start, end[ */
+{
+    dprintf(("make_bk_slices_for_range(%p, %lu, %lu)\n",
+             obj, start - (stm_char*)obj, end - start));
+    char *realobj = REAL_ADDRESS(STM_SEGMENT->segment_base, obj);
+    uintptr_t first_page = ((uintptr_t)start) / 4096UL;
+    uintptr_t end_page = ((uintptr_t)end) / 4096UL;
 
     uintptr_t page;
-    for (page = first_page; page <= end_page; page++) {
-        if (get_page_status_in(my_segnum, page) == PAGE_NO_ACCESS) {
-            /* XXX: slow? */
-            release_all_privatization_locks();
-
-            volatile char *dummy = REAL_ADDRESS(STM_SEGMENT->segment_base, page * 4096UL);
-            *dummy;            /* force segfault */
-
-            goto retry;
-        }
-    }
-    /* all pages are private to us and we hold the privatization_locks so
-       we are allowed to modify them */
-
-    /* phew, now add the obj to the write-set and register the
-       backup copy. */
-    /* XXX: we should not be here at all fiddling with page status
-       if 'obj' is merely an overflow object.  FIX ME, likely by copying
-       the overflow number logic from c7. */
-
-    DEBUG_EXPECT_SEGFAULT(false);
-
-    acquire_modification_lock(STM_SEGMENT->segment_num);
     uintptr_t slice_sz;
-    uintptr_t in_page_offset = (uintptr_t)obj % 4096UL;
-    uintptr_t remaining_obj_sz = obj_size;
-    for (page = first_page; page <= end_page; page++) {
-        /* XXX Maybe also use mprotect() again to mark pages of the object as read-only, and
-           only stick it into modified_old_objects page-by-page?  Maybe it's
-           possible to do card-marking that way, too. */
-        OPT_ASSERT(remaining_obj_sz);
-
+    uintptr_t slice_off = start - (stm_char*)obj;
+    uintptr_t in_page_offset = (uintptr_t)start % 4096UL;
+    uintptr_t remaining_obj_sz = end - start;
+    for (page = first_page; page <= end_page && remaining_obj_sz; page++) {
         slice_sz = remaining_obj_sz;
         if (in_page_offset + slice_sz > 4096UL) {
             /* not over page boundaries */
             slice_sz = 4096UL - in_page_offset;
         }
 
-        size_t slice_off = obj_size - remaining_obj_sz;
+        remaining_obj_sz -= slice_sz;
+        in_page_offset = (in_page_offset + slice_sz) % 4096UL; /* mostly 0 */
 
         /* make backup slice: */
         char *bk_slice = malloc(slice_sz);
         increment_total_allocated(slice_sz);
         memcpy(bk_slice, realobj + slice_off, slice_sz);
 
+        acquire_modification_lock(STM_SEGMENT->segment_num);
         /* !! follows layout of "struct stm_undo_s" !! */
         STM_PSEGMENT->modified_old_objects = list_append3(
             STM_PSEGMENT->modified_old_objects,
             (uintptr_t)obj,     /* obj */
             (uintptr_t)bk_slice,  /* bk_addr */
             NEW_SLICE(slice_off, slice_sz));
+        dprintf(("> append slice %p, off=%lu, sz=%lu\n", bk_slice, slice_off, slice_sz));
+        release_modification_lock(STM_SEGMENT->segment_num);
 
-        remaining_obj_sz -= slice_sz;
-        in_page_offset = (in_page_offset + slice_sz) % 4096UL; /* mostly 0 */
+        slice_off += slice_sz;
     }
-    OPT_ASSERT(remaining_obj_sz == 0);
 
-    /* remove the WRITE_BARRIER flag and add WB_EXECUTED */
-    obj->stm_flags &= ~GCFLAG_WRITE_BARRIER;
-    obj->stm_flags |= GCFLAG_WB_EXECUTED;
+}
+
+static void make_bk_slices(object_t *obj,
+                           bool first_call, /* tells us if we also need to make a bk
+                                               of the non-array part of the object */
+                           uintptr_t index,  /* index == -1: all cards, index == -2: no cards */
+                           bool do_missing_cards /* only bk the cards that don't have a bk */
+                           )
+{
+    dprintf(("make_bk_slices(%p, %d, %ld, %d)\n", obj, first_call, index, do_missing_cards));
+    /* do_missing_cards also implies that all cards are cleared at the end */
+    /* index == -1 but not do_missing_cards: bk whole obj */
+    assert(IMPLY(index == -2, first_call && !do_missing_cards));
+    assert(IMPLY(index == -1 && !do_missing_cards, first_call));
+    assert(IMPLY(do_missing_cards, index == -1));
+    assert(IMPLY(is_small_uniform(obj), index == -1 && !do_missing_cards && first_call));
+    assert(IMPLY(first_call, !do_missing_cards));
+    assert(IMPLY(index != -1, obj_should_use_cards(STM_SEGMENT->segment_base, obj)));
+
+    /* get whole card range */
+    struct object_s *realobj = (struct object_s*)REAL_ADDRESS(STM_SEGMENT->segment_base, obj);
+    size_t obj_size = stmcb_size_rounded_up(realobj);
+    uintptr_t offset_itemsize[2] = {-1, -1};
+
+    /* decide where to start copying: */
+    size_t start_offset;
+    if (first_call) {
+        start_offset = 0;
+    } else {
+        start_offset = -1;
+    }
+
+    /* decide if we don't want to look at cards at all: */
+    if ((index == -1 || index == -2) && !do_missing_cards) {
+        assert(first_call);
+        if (index == -1) {
+            /* whole obj */
+            make_bk_slices_for_range(obj, (stm_char*)obj + start_offset,
+                                     (stm_char*)obj + obj_size);
+            if (obj_should_use_cards(STM_SEGMENT->segment_base, obj)) {
+                /* mark whole obj as MARKED_OLD so we don't do bk slices anymore */
+                _reset_object_cards(get_priv_segment(STM_SEGMENT->segment_num),
+                                    obj, STM_SEGMENT->transaction_read_version,
+                                    true, false);
+            }
+        } else {
+            /* only fixed part */
+            stmcb_get_card_base_itemsize(realobj, offset_itemsize);
+            make_bk_slices_for_range(obj, (stm_char*)obj + start_offset,
+                                     (stm_char*)obj + offset_itemsize[0]);
+        }
+        return;
+    }
+
+    stmcb_get_card_base_itemsize(realobj, offset_itemsize);
+
+    size_t real_idx_count = (obj_size - offset_itemsize[0]) / offset_itemsize[1];
+    assert(IMPLY(index != -1 && index != -2, index >= 0 && index < real_idx_count));
+    struct stm_read_marker_s *cards = get_read_marker(STM_SEGMENT->segment_base, (uintptr_t)obj);
+    uintptr_t last_card_index = get_index_to_card_index(real_idx_count - 1); /* max valid index */
+    uintptr_t card_index;
+
+    /* decide if we want only a specific card: */
+    if (index != -1) {
+        if (start_offset != -1) {
+            /* bk fixed part separately: */
+            make_bk_slices_for_range(obj, (stm_char*)obj + start_offset,
+                                     (stm_char*)obj + offset_itemsize[0]);
+        }
+
+        card_index = get_index_to_card_index(index);
+
+        size_t card_offset = offset_itemsize[0]
+            + get_card_index_to_index(card_index) * offset_itemsize[1];
+        size_t after_card_offset = offset_itemsize[0]
+            + get_card_index_to_index(card_index + 1) * offset_itemsize[1];
+
+        if (after_card_offset > obj_size)
+            after_card_offset = obj_size;
+
+        make_bk_slices_for_range(
+            obj, (stm_char*)obj + card_offset, (stm_char*)obj + after_card_offset);
+
+        return;
+    }
+
+    /* look for CARD_CLEAR or some non-transaction_read_version cards
+       and make bk slices for them */
+    assert(do_missing_cards && index == -1 && start_offset == -1);
+    card_index = 1;
+    uintptr_t start_card_index = -1;
+    while (card_index <= last_card_index) {
+        uint8_t card_value = cards[card_index].rm;
+
+        if (card_value == CARD_CLEAR
+            || (card_value != CARD_MARKED
+                && card_value < STM_SEGMENT->transaction_read_version)) {
+            /* we need a backup of this card */
+            if (start_card_index == -1) {   /* first unmarked card */
+                start_card_index = card_index;
+            }
+        } else {
+            /* "CARD_MARKED_OLD" or CARD_MARKED */
+            OPT_ASSERT(card_value == STM_SEGMENT->transaction_read_version
+                       || card_value == CARD_MARKED);
+        }
+        /* in any case, remember that we already made a bk slice for this
+           card, so set to "MARKED_OLD": */
+        cards[card_index].rm = STM_SEGMENT->transaction_read_version;
+
+
+        if (start_card_index != -1                    /* something to copy */
+            && (card_value == CARD_MARKED             /* found marked card */
+                || card_value == STM_SEGMENT->transaction_read_version/* old marked */
+                || card_index == last_card_index)) {  /* this is the last card */
+
+            /* do the bk slice: */
+            uintptr_t copy_size;
+            uintptr_t next_card_offset;
+            uintptr_t start_card_offset;
+            uintptr_t next_card_index = card_index;
+
+            if (card_value == CARD_CLEAR
+                || (card_value != CARD_MARKED
+                    && card_value < STM_SEGMENT->transaction_read_version)) {
+                /* this was actually the last card which wasn't set, but we
+                   need to go one further to get the right offset */
+                next_card_index++;
+            }
+
+            start_card_offset = offset_itemsize[0] +
+                get_card_index_to_index(start_card_index) * offset_itemsize[1];
+
+            next_card_offset = offset_itemsize[0] +
+                get_card_index_to_index(next_card_index) * offset_itemsize[1];
+
+            if (next_card_offset > obj_size)
+                next_card_offset = obj_size;
+
+            copy_size = next_card_offset - start_card_offset;
+            OPT_ASSERT(copy_size > 0);
+
+            /* add the slices: */
+            make_bk_slices_for_range(
+                obj, (stm_char*)obj + start_card_offset,
+                (stm_char*)obj + next_card_offset);
+
+            start_card_index = -1;
+        }
+
+        card_index++;
+    }
+
+    obj->stm_flags &= ~GCFLAG_CARDS_SET;
+    _cards_cleared_in_object(get_priv_segment(STM_SEGMENT->segment_num), obj, false);
+}
+
+__attribute__((always_inline))
+static void write_slowpath_overflow_obj(object_t *obj, bool mark_card)
+{
+    assert(obj->stm_flags & GCFLAG_WRITE_BARRIER);
+    assert(!(obj->stm_flags & GCFLAG_WB_EXECUTED));
+    dprintf(("write_slowpath_overflow_obj(%p)\n", obj));
+
+    if (!mark_card) {
+        /* The basic case, with no card marking.  We append the object
+           into 'objects_pointing_to_nursery', and remove the flag so
+           that the write_slowpath will not be called again until the
+           next minor collection. */
+        if (obj->stm_flags & GCFLAG_CARDS_SET) {
+            /* if we clear this flag, we also need to clear the cards.
+               bk_slices are not needed as this is an overflow object */
+            _reset_object_cards(get_priv_segment(STM_SEGMENT->segment_num),
+                                obj, CARD_CLEAR, false, false);
+        }
+        obj->stm_flags &= ~(GCFLAG_WRITE_BARRIER | GCFLAG_CARDS_SET);
+        LIST_APPEND(STM_PSEGMENT->objects_pointing_to_nursery, obj);
+    } else {
+        /* Card marking.  Don't remove GCFLAG_WRITE_BARRIER because we
+           need to come back to _stm_write_slowpath_card() for every
+           card to mark.  Add GCFLAG_CARDS_SET.
+           again, we don't need bk_slices as this is an overflow obj */
+        assert(!(obj->stm_flags & GCFLAG_CARDS_SET));
+        obj->stm_flags |= GCFLAG_CARDS_SET;
+        LIST_APPEND(STM_PSEGMENT->old_objects_with_cards_set, obj);
+    }
+}
+
+
+__attribute__((always_inline))
+static void write_slowpath_common(object_t *obj, bool mark_card)
+{
+    assert(_seems_to_be_running_transaction());
+    assert(!_is_in_nursery(obj));
+    assert(obj->stm_flags & GCFLAG_WRITE_BARRIER);
+
+    if (IS_OVERFLOW_OBJ(STM_PSEGMENT, obj)) {
+        /* already executed WB once in this transaction. do GC
+           part again: */
+        assert(!(obj->stm_flags & GCFLAG_WB_EXECUTED));
+        write_slowpath_overflow_obj(obj, mark_card);
+        return;
+    }
+
+    dprintf(("write_slowpath(%p)\n", obj));
+
+    /* add to read set: */
+    stm_read(obj);
+
+    if (!(obj->stm_flags & GCFLAG_WB_EXECUTED)) {
+        /* the first time we write this obj, make sure it is fully
+           accessible, as major gc may depend on being able to trace
+           the full obj in this segment (XXX) */
+        char *realobj;
+        size_t obj_size;
+        int my_segnum = STM_SEGMENT->segment_num;
+        uintptr_t end_page, first_page = ((uintptr_t)obj) / 4096UL;
+
+        realobj = REAL_ADDRESS(STM_SEGMENT->segment_base, obj);
+        obj_size = stmcb_size_rounded_up((struct object_s *)realobj);
+        /* get the last page containing data from the object */
+        if (LIKELY(is_small_uniform(obj))) {
+            end_page = first_page;
+        } else {
+            end_page = (((uintptr_t)obj) + obj_size - 1) / 4096UL;
+        }
+
+        acquire_privatization_lock(STM_SEGMENT->segment_num);
+        uintptr_t page;
+        for (page = first_page; page <= end_page; page++) {
+            if (get_page_status_in(my_segnum, page) == PAGE_NO_ACCESS) {
+                release_privatization_lock(STM_SEGMENT->segment_num);
+                volatile char *dummy = REAL_ADDRESS(STM_SEGMENT->segment_base, page * 4096UL);
+                *dummy;            /* force segfault */
+                acquire_privatization_lock(STM_SEGMENT->segment_num);
+            }
+        }
+        release_privatization_lock(STM_SEGMENT->segment_num);
+    }
+
+    if (mark_card) {
+        if (!(obj->stm_flags & GCFLAG_WB_EXECUTED)) {
+            make_bk_slices(obj,
+                           true,        /* first_call */
+                           -2,          /* index: backup only fixed part */
+                           false);      /* do_missing_cards */
+        }
+
+        DEBUG_EXPECT_SEGFAULT(false);
+
+        /* don't remove WRITE_BARRIER, but add CARDS_SET */
+        obj->stm_flags |= (GCFLAG_CARDS_SET | GCFLAG_WB_EXECUTED);
+        LIST_APPEND(STM_PSEGMENT->old_objects_with_cards_set, obj);
+    } else {
+        /* called if WB_EXECUTED is set or this is the first time
+           for this obj: */
+
+        /* add it to the GC list for minor collections */
+        LIST_APPEND(STM_PSEGMENT->objects_pointing_to_nursery, obj);
+
+        if (obj->stm_flags & GCFLAG_CARDS_SET) {
+            assert(obj->stm_flags & GCFLAG_WB_EXECUTED);
+
+            /* this is not the first_call to the WB for this obj,
+               we executed the above then-part before.
+               if we clear this flag, we have to add all the other
+               bk slices we didn't add yet */
+            make_bk_slices(obj,
+                           false,       /* first_call */
+                           -1,          /* index: whole obj */
+                           true);       /* do_missing_cards */
+
+        } else if (!(obj->stm_flags & GCFLAG_WB_EXECUTED)) {
+            /* first and only time we enter here: */
+            make_bk_slices(obj,
+                           true,        /* first_call */
+                           -1,          /* index: whole obj */
+                           false);      /* do_missing_cards */
+        }
+
+        DEBUG_EXPECT_SEGFAULT(false);
+        /* remove the WRITE_BARRIER flag and add WB_EXECUTED */
+        obj->stm_flags &= ~(GCFLAG_WRITE_BARRIER | GCFLAG_CARDS_SET);
+        obj->stm_flags |= GCFLAG_WB_EXECUTED;
+    }
 
     DEBUG_EXPECT_SEGFAULT(true);
+}
 
-    release_modification_lock(STM_SEGMENT->segment_num);
-    /* done fiddling with protection and privatization */
-    release_all_privatization_locks();
 
-    /* also add it to the GC list for minor collections */
-    LIST_APPEND(STM_PSEGMENT->objects_pointing_to_nursery, obj);
+void _stm_write_slowpath_card(object_t *obj, uintptr_t index)
+{
+    dprintf_test(("write_slowpath_card(%p, %lu)\n",
+                  obj, index));
+
+    /* If CARDS_SET is not set so far, issue a normal write barrier.
+       If the object is large enough, ask it to set up the object for
+       card marking instead. */
+    if (!(obj->stm_flags & GCFLAG_CARDS_SET)) {
+        bool mark_card = obj_should_use_cards(STM_SEGMENT->segment_base, obj);
+        write_slowpath_common(obj, mark_card);
+        if (!mark_card)
+            return;
+    }
+
+    assert(obj_should_use_cards(STM_SEGMENT->segment_base, obj));
+    dprintf_test(("write_slowpath_card %p -> index:%lu\n",
+                  obj, index));
+
+    /* We reach this point if we have to mark the card. */
+    assert(obj->stm_flags & GCFLAG_WRITE_BARRIER);
+    assert(obj->stm_flags & GCFLAG_CARDS_SET);
+    assert(!is_small_uniform(obj)); /* not supported/tested */
+
+#ifndef NDEBUG
+    struct object_s *realobj = (struct object_s *)
+        REAL_ADDRESS(STM_SEGMENT->segment_base, obj);
+    size_t size = stmcb_size_rounded_up(realobj);
+    /* we need at least one read marker in addition to the STM-reserved object
+       write-lock */
+    assert(size >= 32);
+    /* the 'index' must be in range(length-of-obj), but we don't have
+       a direct way to know the length.  We know that it is smaller
+       than the size in bytes. */
+    assert(index < size);
+    /* this object was allocated with allocate_outside_nursery_large(),
+       which returns addresses aligned to 16 bytes */
+    assert((((uintptr_t)obj) & 15) == 0);
+#endif
+
+    /* Write into the card's lock.  This is used by the next minor
+       collection to know what parts of the big object may have changed.
+       We already own the object here or it is an overflow obj. */
+    stm_read_marker_t *card = (stm_read_marker_t *)(((uintptr_t)obj) >> 4);
+    card += get_index_to_card_index(index);
+
+    if (!IS_OVERFLOW_OBJ(STM_PSEGMENT, obj)
+        && !(card->rm == CARD_MARKED
+             || card->rm == STM_SEGMENT->transaction_read_version)) {
+        /* need to do the backup slice of the card */
+        make_bk_slices(obj,
+                       false,       /* first_call */
+                       index,       /* index: only 1 card */
+                       false);      /* do_missing_cards */
+    }
+    card->rm = CARD_MARKED;
+
+    dprintf(("mark %p index %lu, card:%lu with %d\n",
+             obj, index, get_index_to_card_index(index), CARD_MARKED));
 }
 
+void _stm_write_slowpath(object_t *obj) {
+    write_slowpath_common(obj,  /* mark_card */ false);
+}
+
+
 static void reset_transaction_read_version(void)
 {
     /* force-reset all read markers to 0 */
@@ -705,7 +1009,8 @@
 #endif
         memset(readmarkers, 0, NB_READMARKER_PAGES * 4096UL);
     }
-    STM_SEGMENT->transaction_read_version = 1;
+    STM_SEGMENT->transaction_read_version = 2;
+    assert(STM_SEGMENT->transaction_read_version > _STM_CARD_MARKED);
 }
 
 static void reset_wb_executed_flags(void)
@@ -766,7 +1071,7 @@
     }
 
     assert(list_is_empty(STM_PSEGMENT->modified_old_objects));
-    assert(list_is_empty(STM_PSEGMENT->new_objects));
+    assert(list_is_empty(STM_PSEGMENT->large_overflow_objects));
     assert(list_is_empty(STM_PSEGMENT->objects_pointing_to_nursery));
     assert(list_is_empty(STM_PSEGMENT->young_weakrefs));
     assert(tree_is_cleared(STM_PSEGMENT->young_outside_nursery));
@@ -826,8 +1131,11 @@
 
     STM_PSEGMENT->safe_point = SP_NO_TRANSACTION;
     STM_PSEGMENT->transaction_state = TS_NONE;
+
+    _verify_cards_cleared_in_all_lists(get_priv_segment(STM_SEGMENT->segment_num));
     list_clear(STM_PSEGMENT->objects_pointing_to_nursery);
-    list_clear(STM_PSEGMENT->new_objects);
+    list_clear(STM_PSEGMENT->old_objects_with_cards_set);
+    list_clear(STM_PSEGMENT->large_overflow_objects);
 
     release_thread_segment(tl);
     /* cannot access STM_SEGMENT or STM_PSEGMENT from here ! */
@@ -847,13 +1155,16 @@
 #endif
 }
 
-static void push_new_objects_to_other_segments(void)
+static void push_large_overflow_objects_to_other_segments(void)
 {
+    if (list_is_empty(STM_PSEGMENT->large_overflow_objects))
+        return;
+
+    /* XXX: also pushes small ones right now */
     acquire_privatization_lock(STM_SEGMENT->segment_num);
-    LIST_FOREACH_R(STM_PSEGMENT->new_objects, object_t *,
+    LIST_FOREACH_R(STM_PSEGMENT->large_overflow_objects, object_t *,
         ({
-            assert(item->stm_flags & GCFLAG_WB_EXECUTED);
-            item->stm_flags &= ~GCFLAG_WB_EXECUTED;
+            assert(!(item->stm_flags & GCFLAG_WB_EXECUTED));
             synchronize_object_enqueue(item);
         }));
     synchronize_objects_flush();
@@ -867,7 +1178,7 @@
        in handle_segfault_in_page() that also copies
        unknown-to-the-segment/uncommitted things.
     */
-    list_clear(STM_PSEGMENT->new_objects);
+    list_clear(STM_PSEGMENT->large_overflow_objects);
 }
 
 
@@ -882,20 +1193,32 @@
     dprintf(("> stm_commit_transaction()\n"));
     minor_collection(1);
 
-    push_new_objects_to_other_segments();
+    push_large_overflow_objects_to_other_segments();
     /* push before validate. otherwise they are reachable too early */
     bool was_inev = STM_PSEGMENT->transaction_state == TS_INEVITABLE;
     _validate_and_add_to_commit_log();
 
+    stm_rewind_jmp_forget(STM_SEGMENT->running_thread);
 
     /* XXX do we still need a s_mutex_lock() section here? */
     s_mutex_lock();
+    commit_finalizers();
 
+    /* update 'overflow_number' if needed */
+    if (STM_PSEGMENT->overflow_number_has_been_used) {
+        highest_overflow_number += GCFLAG_OVERFLOW_NUMBER_bit0;
+        assert(highest_overflow_number !=        /* XXX else, overflow! */
+               (uint32_t)-GCFLAG_OVERFLOW_NUMBER_bit0);
+        STM_PSEGMENT->overflow_number = highest_overflow_number;
+        STM_PSEGMENT->overflow_number_has_been_used = false;
+    }
+
+    invoke_and_clear_user_callbacks(0);   /* for commit */
+
+    /* >>>>> there may be a FORK() happening in the safepoint below <<<<<*/
     enter_safe_point_if_requested();
     assert(STM_SEGMENT->nursery_end == NURSERY_END);
 
-    stm_rewind_jmp_forget(STM_SEGMENT->running_thread);
-
     /* if a major collection is required, do it here */
     if (is_major_collection_requested()) {
         synchronize_all_threads(STOP_OTHERS_UNTIL_MUTEX_UNLOCK);
@@ -905,9 +1228,7 @@
         }
     }
 
-    commit_finalizers();
-
-    invoke_and_clear_user_callbacks(0);   /* for commit */
+    _verify_cards_cleared_in_all_lists(get_priv_segment(STM_SEGMENT->segment_num));
 
     if (globally_unique_transaction && was_inev) {
         committed_globally_unique_transaction();
@@ -946,8 +1267,9 @@
                undo->backup,
                SLICE_SIZE(undo->slice));
 
-        dprintf(("reset_modified_from_backup_copies(%d): obj=%p off=%lu bk=%p\n",
-                 segment_num, obj, SLICE_OFFSET(undo->slice), undo->backup));
+        dprintf(("reset_modified_from_backup_copies(%d): obj=%p off=%lu sz=%d bk=%p\n",
+                 segment_num, obj, SLICE_OFFSET(undo->slice),
+                 SLICE_SIZE(undo->slice), undo->backup));
 
         free_bk(undo);
     }
@@ -982,9 +1304,18 @@
 
     long bytes_in_nursery = throw_away_nursery(pseg);
 
+    /* clear CARD_MARKED on objs (don't care about CARD_MARKED_OLD) */
+    LIST_FOREACH_R(pseg->old_objects_with_cards_set, object_t * /*item*/,
+        {
+            /* CARDS_SET may have already been lost because stm_validate()
+               may call reset_modified_from_backup_copies() */
+            _reset_object_cards(pseg, item, CARD_CLEAR, false, false);
+        });
+
     acquire_modification_lock(segment_num);
     reset_modified_from_backup_copies(segment_num);
     release_modification_lock(segment_num);
+    _verify_cards_cleared_in_all_lists(pseg);
 
     stm_thread_local_t *tl = pseg->pub.running_thread;
 #ifdef STM_NO_AUTOMATIC_SETJMP
@@ -1008,7 +1339,8 @@
     tl->last_abort__bytes_in_nursery = bytes_in_nursery;
 
     list_clear(pseg->objects_pointing_to_nursery);
-    list_clear(pseg->new_objects);
+    list_clear(pseg->old_objects_with_cards_set);
+    list_clear(pseg->large_overflow_objects);
     list_clear(pseg->young_weakrefs);
 #pragma pop_macro("STM_SEGMENT")
 #pragma pop_macro("STM_PSEGMENT")
@@ -1138,6 +1470,8 @@
     ++STM_PSEGMENT->sq_len;
 }
 
+
+
 static void synchronize_object_enqueue(object_t *obj)
 {
     assert(!_is_young(obj));
@@ -1150,14 +1484,14 @@
     OPT_ASSERT(obj_size >= 16);
 
     if (LIKELY(is_small_uniform(obj))) {
+        assert(!(obj->stm_flags & GCFLAG_CARDS_SET));
         OPT_ASSERT(obj_size <= GC_LAST_SMALL_SIZE);
         _synchronize_fragment((stm_char *)obj, obj_size);
         return;
     }
 
     /* else, a more complicated case for large objects, to copy
-       around data only within the needed pages
-    */
+       around data only within the needed pages */
     uintptr_t start = (uintptr_t)obj;
     uintptr_t end = start + obj_size;
 
@@ -1168,6 +1502,10 @@
         }
         uintptr_t copy_size = copy_up_to - start;
 
+        /* double-check that the result fits in one page */
+        assert(copy_size > 0);
+        assert(copy_size + (start & 4095) <= 4096);
+
         _synchronize_fragment((stm_char *)start, copy_size);
 
         start = copy_up_to;
diff --git a/c8/stm/core.h b/c8/stm/core.h
--- a/c8/stm/core.h
+++ b/c8/stm/core.h
@@ -34,17 +34,34 @@
 #define FIRST_OLD_RM_PAGE     (OLD_RM_START / 4096UL)
 #define NB_READMARKER_PAGES   (FIRST_OBJECT_PAGE - FIRST_READMARKER_PAGE)
 
+#define CARD_SIZE   _STM_CARD_SIZE
+
 enum /* stm_flags */ {
     GCFLAG_WRITE_BARRIER = _STM_GCFLAG_WRITE_BARRIER,
     GCFLAG_HAS_SHADOW = 0x02,
     GCFLAG_WB_EXECUTED = 0x04,
-    GCFLAG_VISITED = 0x08,
-    GCFLAG_FINALIZATION_ORDERING = 0x10,
+    GCFLAG_CARDS_SET = _STM_GCFLAG_CARDS_SET,
+    GCFLAG_VISITED = 0x10,
+    GCFLAG_FINALIZATION_ORDERING = 0x20,
+    /* All remaining bits of the 32-bit 'stm_flags' field are taken by
+       the "overflow number".  This is a number that identifies the
+       "overflow objects" from the current transaction among all old
+       objects.  More precisely, overflow objects are objects from the
+       current transaction that have been flushed out of the nursery,
+       which occurs if the same transaction allocates too many objects.
+    */
+    GCFLAG_OVERFLOW_NUMBER_bit0 = 0x40   /* must be last */
 };
 
+#define SYNC_QUEUE_SIZE    31
 
+enum /* card values in read markers */ {
+    CARD_CLEAR = 0,                 /* card not used at all */
+    CARD_MARKED = _STM_CARD_MARKED, /* card marked for tracing in the next gc */
+    /* CARD_MARKED_OLD = STM_PSEGMENT->transaction_read_version, */
+    /* card was marked before, but cleared in a GC */
+};
 
-#define SYNC_QUEUE_SIZE    31
 
 
 /************************************************************/
@@ -72,6 +89,7 @@
     struct list_s *modified_old_objects;
 
     struct list_s *objects_pointing_to_nursery;
+    struct list_s *old_objects_with_cards_set;
     struct tree_s *young_outside_nursery;
     struct tree_s *nursery_objects_shadows;
 
@@ -88,8 +106,9 @@
     /* list of objects created in the current transaction and
        that survived at least one minor collection. They need
        to be synchronized to other segments on commit, but they
-       do not need to be in the commit log entry. */
-    struct list_s *new_objects;
+       do not need to be in the commit log entry.
+       XXX: for now it also contains small overflow objs */
+    struct list_s *large_overflow_objects;
 
     uint8_t privatization_lock;  // XXX KILL
 
@@ -101,6 +120,14 @@
 
     struct tree_s *callbacks_on_commit_and_abort[2];
 
+    /* This is the number stored in the overflowed objects (a multiple of
+       GCFLAG_OVERFLOW_NUMBER_bit0).  It is incremented when the
+       transaction is done, but only if we actually overflowed any
+       object; otherwise, no object has got this number. */
+    uint32_t overflow_number;
+    bool overflow_number_has_been_used;
+
+
     struct stm_commit_log_entry_s *last_commit_log_entry;
 
     struct stm_shadowentry_s *shadowstack_at_start_of_transaction;
@@ -193,6 +220,21 @@
 
 #define REAL_ADDRESS(segment_base, src)   ((segment_base) + (uintptr_t)(src))
 
+#define IS_OVERFLOW_OBJ(pseg, obj) (((obj)->stm_flags & -GCFLAG_OVERFLOW_NUMBER_bit0) \
+                                    == (pseg)->overflow_number)
+
+static inline uintptr_t get_index_to_card_index(uintptr_t index) {
+    return (index / CARD_SIZE) + 1;
+}
+
+static inline uintptr_t get_card_index_to_index(uintptr_t card_index) {
+    return (card_index - 1) * CARD_SIZE;
+}
+
+static inline struct stm_read_marker_s *get_read_marker(char *segment_base, uintptr_t obj)
+{
+   return (struct stm_read_marker_s *)(segment_base + (obj >> 4));
+}
 
 static inline char *get_segment_base(long segment_num) {
     return stm_object_pages + segment_num * (NB_PAGES * 4096UL);
@@ -215,6 +257,7 @@
     return (addr - stm_object_pages) / (NB_PAGES * 4096UL);
 }
 
+bool obj_should_use_cards(char *seg_base, object_t *obj);
 
 static bool _is_tl_registered(stm_thread_local_t *tl);
 static bool _seems_to_be_running_transaction(void);
diff --git a/c8/stm/finalizer.c b/c8/stm/finalizer.c
--- a/c8/stm/finalizer.c
+++ b/c8/stm/finalizer.c
@@ -98,14 +98,14 @@
         list_clear(lst);
     }
 
-    /* also deals with newly created objects: they are at the tail of
+    /* also deals with overflow objects: they are at the tail of
        old_objects_with_light_finalizers (this list is kept in order
        and we cannot add any already-committed object) */
     lst = pseg->old_objects_with_light_finalizers;
     count = list_count(lst);
     while (count > 0) {
         object_t *obj = (object_t *)list_item(lst, --count);
-        if (!(obj->stm_flags & GCFLAG_WB_EXECUTED))
+        if (!IS_OVERFLOW_OBJ(pseg, obj))
             break;
         lst->count = count;
         if (must_fix_gs) {
@@ -264,11 +264,14 @@
         LIST_APPEND(_finalizer_tmpstack, obj);
 }
 
-static inline struct list_s *finalizer_trace(char *base, object_t *obj,
-                                             struct list_s *lst)
+static inline struct list_s *finalizer_trace(
+    struct stm_priv_segment_info_s *pseg, object_t *obj, struct list_s *lst)
 {
-    if (!is_new_object(obj))
+    char *base;
+    if (!is_overflow_obj_safe(pseg, obj))
         base = stm_object_pages;
+    else
+        base = pseg->pub.segment_base;
 
     struct object_s *realobj = (struct object_s *)REAL_ADDRESS(base, obj);
     _finalizer_tmpstack = lst;
@@ -277,7 +280,8 @@
 }
 
 
-static void _recursively_bump_finalization_state_from_2_to_3(char *base, object_t *obj)
+static void _recursively_bump_finalization_state_from_2_to_3(
+    struct stm_priv_segment_info_s *pseg, object_t *obj)
 {
     assert(_finalization_state(obj) == 2);
     struct list_s *tmpstack = _finalizer_emptystack;
@@ -289,7 +293,7 @@
             realobj->stm_flags &= ~GCFLAG_FINALIZATION_ORDERING;
 
             /* trace */
-            tmpstack = finalizer_trace(base, obj, tmpstack);
+            tmpstack = finalizer_trace(pseg, obj, tmpstack);
         }
 
         if (list_is_empty(tmpstack))
@@ -300,14 +304,16 @@
     _finalizer_emptystack = tmpstack;
 }
 
-static void _recursively_bump_finalization_state_from_1_to_2(char *base, object_t *obj)
+static void _recursively_bump_finalization_state_from_1_to_2(
+    struct stm_priv_segment_info_s *pseg, object_t *obj)
 {
     assert(_finalization_state(obj) == 1);
     /* The call will add GCFLAG_VISITED recursively, thus bump state 1->2 */
-    mark_visit_possibly_new_object(base, obj);
+    mark_visit_possibly_new_object(obj, pseg);
 }
 
-static struct list_s *mark_finalize_step1(char *base, struct finalizers_s *f)
+static struct list_s *mark_finalize_step1(
+    struct stm_priv_segment_info_s *pseg, struct finalizers_s *f)
 {
     if (f == NULL)
         return NULL;
@@ -336,21 +342,22 @@
             int state = _finalization_state(y);
             if (state <= 0) {
                 _bump_finalization_state_from_0_to_1(y);
-                pending = finalizer_trace(base, y, pending);
+                pending = finalizer_trace(pseg, y, pending);
             }
             else if (state == 2) {
-                _recursively_bump_finalization_state_from_2_to_3(base, y);
+                _recursively_bump_finalization_state_from_2_to_3(pseg, y);
             }
         }
         _finalizer_pending = pending;
         assert(_finalization_state(x) == 1);
-        _recursively_bump_finalization_state_from_1_to_2(base, x);
+        _recursively_bump_finalization_state_from_1_to_2(pseg, x);
     }
     return marked;
 }
 
-static void mark_finalize_step2(char *base, struct finalizers_s *f,
-                                struct list_s *marked)
+static void mark_finalize_step2(
+    struct stm_priv_segment_info_s *pseg, struct finalizers_s *f,
+    struct list_s *marked)
 {
     if (f == NULL)
         return;
@@ -367,7 +374,7 @@
             if (run_finalizers == NULL)
                 run_finalizers = list_create();
             LIST_APPEND(run_finalizers, x);
-            _recursively_bump_finalization_state_from_2_to_3(base, x);
+            _recursively_bump_finalization_state_from_2_to_3(pseg, x);
         }
         else {
             struct list_s *lst = f->objects_with_finalizers;
@@ -403,29 +410,28 @@
     long j;
     for (j = 1; j < NB_SEGMENTS; j++) {
         struct stm_priv_segment_info_s *pseg = get_priv_segment(j);
-        marked_seg[j] = mark_finalize_step1(pseg->pub.segment_base,
-                                            pseg->finalizers);
+        marked_seg[j] = mark_finalize_step1(pseg, pseg->finalizers);
     }
-    marked_seg[0] = mark_finalize_step1(stm_object_pages, &g_finalizers);
+    marked_seg[0] = mark_finalize_step1(get_priv_segment(0), &g_finalizers);
 
     LIST_FREE(_finalizer_pending);
 
     for (j = 1; j < NB_SEGMENTS; j++) {
         struct stm_priv_segment_info_s *pseg = get_priv_segment(j);
-        mark_finalize_step2(pseg->pub.segment_base, pseg->finalizers,
-                            marked_seg[j]);
+        mark_finalize_step2(pseg, pseg->finalizers, marked_seg[j]);
     }
-    mark_finalize_step2(stm_object_pages, &g_finalizers, marked_seg[0]);
+    mark_finalize_step2(get_priv_segment(0), &g_finalizers, marked_seg[0]);
 
     LIST_FREE(_finalizer_emptystack);
 }
 
-static void mark_visit_from_finalizer1(char *base, struct finalizers_s *f)
+static void mark_visit_from_finalizer1(
+    struct stm_priv_segment_info_s *pseg, struct finalizers_s *f)
 {
     if (f != NULL && f->run_finalizers != NULL) {
         LIST_FOREACH_R(f->run_finalizers, object_t * /*item*/,
                        ({
-                           mark_visit_possibly_new_object(base, item);
+                           mark_visit_possibly_new_object(item, pseg);
                        }));
     }
 }
@@ -435,9 +441,9 @@
     long j;
     for (j = 1; j < NB_SEGMENTS; j++) {
         struct stm_priv_segment_info_s *pseg = get_priv_segment(j);
-        mark_visit_from_finalizer1(pseg->pub.segment_base, pseg->finalizers);
+        mark_visit_from_finalizer1(pseg, pseg->finalizers);
     }
-    mark_visit_from_finalizer1(stm_object_pages, &g_finalizers);
+    mark_visit_from_finalizer1(get_priv_segment(0), &g_finalizers);
 }
 
 static void _execute_finalizers(struct finalizers_s *f)
diff --git a/c8/stm/forksupport.c b/c8/stm/forksupport.c
--- a/c8/stm/forksupport.c
+++ b/c8/stm/forksupport.c
@@ -84,11 +84,23 @@
     stm_thread_local_t *tl = pr->pub.running_thread;
     dprintf(("forksupport_child: abort in seg%ld\n", i));
     assert(tl->associated_segment_num == i);
-    assert(pr->transaction_state == TS_REGULAR);
+    assert(pr->transaction_state != TS_INEVITABLE);
     set_gs_register(get_segment_base(i));
     assert(STM_SEGMENT->segment_num == i);
 
     s_mutex_lock();
+    if (pr->transaction_state == TS_NONE) {
+        /* just committed, TS_NONE but still has running_thread */
+
+        /* do _finish_transaction() */
+        STM_PSEGMENT->safe_point = SP_NO_TRANSACTION;
+        list_clear(STM_PSEGMENT->objects_pointing_to_nursery);
+        list_clear(STM_PSEGMENT->large_overflow_objects);
+
+        s_mutex_unlock();
+        return;
+    }
+
 #ifndef NDEBUG
     pr->running_pthread = pthread_self();
 #endif
diff --git a/c8/stm/gcpage.c b/c8/stm/gcpage.c
--- a/c8/stm/gcpage.c
+++ b/c8/stm/gcpage.c
@@ -52,10 +52,14 @@
 
 static stm_char *allocate_outside_nursery_large(uint64_t size)
 {
-    /* Allocate the object with largemalloc.c from the lower addresses. */
-    char *addr = _stm_large_malloc(size);
+    /* Allocate the object with largemalloc.c from the lower
+       addresses.  Round up the size to a multiple of 16, rather than
+       8, as a quick way to simplify the code in stm_write_card().
+    */
+    char *addr = _stm_large_malloc((size + 15) & ~15);
     if (addr == NULL)
         stm_fatalerror("not enough memory!");
+    assert((((uintptr_t)addr) & 15) == 0);    /* alignment check */
 
     if (LIKELY(addr + size <= uninitialized_page_start)) {
         dprintf(("allocate_outside_nursery_large(%lu): %p, page=%lu\n",
@@ -200,11 +204,16 @@
 
 /************************************************************/
 
+static bool is_overflow_obj_safe(struct stm_priv_segment_info_s *pseg, object_t *obj)
+{
+    /* this function first also checks if the page is accessible in order
+       to not cause segfaults during major gc (it does exactly the same
+       as IS_OVERFLOW_OBJ otherwise) */
+    if (get_page_status_in(pseg->pub.segment_num, (uintptr_t)obj / 4096UL) == PAGE_NO_ACCESS)
+        return false;
 
-static bool is_new_object(object_t *obj)
-{
-    struct object_s *realobj = (struct object_s*)REAL_ADDRESS(stm_object_pages, obj); /* seg0 */
-    return realobj->stm_flags & GCFLAG_WB_EXECUTED;
+    struct object_s *realobj = (struct object_s*)REAL_ADDRESS(pseg->pub.segment_base, obj);
+    return IS_OVERFLOW_OBJ(pseg, realobj);
 }
 
 
@@ -230,7 +239,10 @@
 }
 
 
-static void mark_and_trace(object_t *obj, char *segment_base)
+static void mark_and_trace(
+    object_t *obj,
+    char *segment_base, /* to trace obj in */
+    struct stm_priv_segment_info_s *pseg) /* to trace children in */
 {
     /* mark the obj and trace all reachable objs from it */
 
@@ -242,36 +254,40 @@
     stmcb_trace(realobj, &mark_record_trace);
 
     /* trace all references found in sharing seg0 (should always be
-       up-to-date and not cause segfaults, except for new objs) */
+       up-to-date and not cause segfaults, except for overflow objs) */
+    segment_base = pseg->pub.segment_base;
     while (!list_is_empty(marked_objects_to_trace)) {
         obj = (object_t *)list_pop_item(marked_objects_to_trace);
 
-        char *base = is_new_object(obj) ? segment_base : stm_object_pages;
+        char *base = is_overflow_obj_safe(pseg, obj) ? segment_base : stm_object_pages;
         realobj = (struct object_s *)REAL_ADDRESS(base, obj);
         stmcb_trace(realobj, &mark_record_trace);
     }
 }
 
-static inline void mark_visit_object(object_t *obj, char *segment_base)
+static inline void mark_visit_object(
+    object_t *obj,
+    char *segment_base, /* to trace ojb in */
+    struct stm_priv_segment_info_s *pseg) /* to trace children in */
 {
     /* if already visited, don't trace */
     if (obj == NULL || mark_visited_test_and_set(obj))
         return;
-    mark_and_trace(obj, segment_base);
+    mark_and_trace(obj, segment_base, pseg);
 }
 
 
-static void mark_visit_possibly_new_object(char *segment_base, object_t *obj)
+static void mark_visit_possibly_new_object(object_t *obj, struct stm_priv_segment_info_s *pseg)
 {
     /* if newly allocated object, we trace in segment_base, otherwise in
        the sharing seg0 */
     if (obj == NULL)
         return;
 
-    if (is_new_object(obj)) {
-        mark_visit_object(obj, segment_base);
+    if (is_overflow_obj_safe(pseg, obj)) {
+        mark_visit_object(obj, pseg->pub.segment_base, pseg);
     } else {
-        mark_visit_object(obj, stm_object_pages);
+        mark_visit_object(obj, stm_object_pages, pseg);
     }
 }
 
@@ -282,8 +298,10 @@
     end = (const struct stm_shadowentry_s *)(slice + size);
     for (; p < end; p++)
         if ((((uintptr_t)p->ss) & 3) == 0) {
-            assert(!is_new_object(p->ss));
-            mark_visit_object(p->ss, stm_object_pages); // seg0
+            mark_visit_object(p->ss, stm_object_pages, // seg0
+                              /* there should be no overflow objs not already
+                                 visited, so any pseg is fine really: */
+                              get_priv_segment(STM_SEGMENT->segment_num));
         }
     return NULL;
 }
@@ -350,7 +368,7 @@
                   and thus make all pages accessible. */
                assert_obj_accessible_in(i, item);
 
-               assert(!is_new_object(item)); /* should never be in that list */
+               assert(!is_overflow_obj_safe(get_priv_segment(i), item)); /* should never be in that list */
 
                if (!mark_visited_test_and_set(item)) {
                    /* trace shared, committed version: only do this if we didn't
@@ -358,9 +376,9 @@
                       objs before mark_visit_from_modified_objects AND if we
                       do mark_and_trace on an obj that is modified in >1 segment,
                       the tracing always happens in seg0 (see mark_and_trace). */
-                   mark_and_trace(item, stm_object_pages);
+                   mark_and_trace(item, stm_object_pages, get_priv_segment(i));
                }
-               mark_and_trace(item, base);   /* private, modified version */
+               mark_and_trace(item, base, get_priv_segment(i));   /* private, modified version */
            }));
 
         list_clear(uniques);
@@ -372,7 +390,11 @@
 {
     if (testing_prebuilt_objs != NULL) {
         LIST_FOREACH_R(testing_prebuilt_objs, object_t * /*item*/,
-                       mark_visit_object(item, stm_object_pages)); // seg0
+                   mark_visit_object(item, stm_object_pages, // seg0
+                                     /* any pseg is fine, as we already traced modified
+                                        objs and thus covered all overflow objs reachable
+                                        from here */
+                                     get_priv_segment(STM_SEGMENT->segment_num)));
     }
 
     stm_thread_local_t *tl = stm_all_thread_locals;
@@ -380,7 +402,7 @@
         /* look at all objs on the shadow stack (they are old but may
            be uncommitted so far, so only exist in the associated_segment_num).
 
-           IF they are uncommitted new objs, trace in the actual segment,
+           IF they are uncommitted overflow objs, trace in the actual segment,
            otherwise, since we just executed a minor collection, they were
            all synced to the sharing seg0. Thus we can trace them there.
 
@@ -392,17 +414,17 @@
            If 'tl' is currently running, its 'last_associated_segment_num'
            field is the segment number that contains the correct
            version of its overflowed objects. */
-        char *segment_base = get_segment_base(tl->last_associated_segment_num);
+        struct stm_priv_segment_info_s *pseg = get_priv_segment(tl->last_associated_segment_num);
 
         struct stm_shadowentry_s *current = tl->shadowstack;
         struct stm_shadowentry_s *base = tl->shadowstack_base;
         while (current-- != base) {
             if ((((uintptr_t)current->ss) & 3) == 0) {
-                mark_visit_possibly_new_object(segment_base, current->ss);
+                mark_visit_possibly_new_object(current->ss, pseg);
             }
         }
 
-        mark_visit_possibly_new_object(segment_base, tl->thread_local_obj);
+        mark_visit_possibly_new_object(tl->thread_local_obj, pseg);
 
         tl = tl->next;
     } while (tl != stm_all_thread_locals);
@@ -413,8 +435,8 @@
     for (i = 1; i < NB_SEGMENTS; i++) {
         if (get_priv_segment(i)->transaction_state != TS_NONE) {
             mark_visit_possibly_new_object(
-                get_segment_base(i),
-                get_priv_segment(i)->threadlocal_at_start_of_transaction);
+                get_priv_segment(i)->threadlocal_at_start_of_transaction,
+                get_priv_segment(i));
 
             stm_rewind_jmp_enum_shadowstack(
                 get_segment(i)->running_thread,
@@ -423,49 +445,6 @@
     }
 }
 
-static void ready_new_objects(void)
-{
-#pragma push_macro("STM_PSEGMENT")
-#pragma push_macro("STM_SEGMENT")
-#undef STM_PSEGMENT
-#undef STM_SEGMENT
-    /* objs in new_objects only have garbage in the sharing seg0,
-       since it is used to mark objs as visited, we must make
-       sure the flag is cleared at the start of a major collection.
-       (XXX: ^^^ may be optional if we have the part below)
-
-       Also, we need to be able to recognize these objects in order
-       to only trace them in the segment they are valid in. So we
-       also make sure to set WB_EXECUTED in the sharing seg0. No
-       other objs than new_objects have WB_EXECUTED in seg0 (since
-       there can only be committed versions there).
-    */
-
-    long i;
-    for (i = 1; i < NB_SEGMENTS; i++) {
-        struct stm_priv_segment_info_s *pseg = get_priv_segment(i);
-        struct list_s *lst = pseg->new_objects;
-
-        LIST_FOREACH_R(lst, object_t* /*item*/,
-            ({
-                struct object_s *realobj;
-                /* WB_EXECUTED always set in this segment */
-                assert(realobj = (struct object_s*)REAL_ADDRESS(pseg->pub.segment_base, item));
-                assert(realobj->stm_flags & GCFLAG_WB_EXECUTED);
-
-                /* clear VISITED (garbage) and ensure WB_EXECUTED in seg0 */
-                mark_visited_test_and_clear(item);
-                realobj = (struct object_s*)REAL_ADDRESS(stm_object_pages, item);
-                realobj->stm_flags |= GCFLAG_WB_EXECUTED;
-
-                /* make sure this flag is cleared as well */
-                realobj->stm_flags &= ~GCFLAG_FINALIZATION_ORDERING;
-            }));
-    }
-#pragma pop_macro("STM_SEGMENT")
-#pragma pop_macro("STM_PSEGMENT")
-}
-
 
 static void clean_up_segment_lists(void)
 {
@@ -494,11 +473,10 @@
                 ({
                     struct object_s *realobj = (struct object_s *)
                         REAL_ADDRESS(pseg->pub.segment_base, (uintptr_t)item);
+                    assert(!(realobj->stm_flags & GCFLAG_WRITE_BARRIER));
+                    realobj->stm_flags |= GCFLAG_WRITE_BARRIER;
 
-                    assert(realobj->stm_flags & GCFLAG_WB_EXECUTED);
-                    assert(!(realobj->stm_flags & GCFLAG_WRITE_BARRIER));
-
-                    realobj->stm_flags |= GCFLAG_WRITE_BARRIER;
+                    OPT_ASSERT(!(realobj->stm_flags & GCFLAG_CARDS_SET));
                 }));
             list_clear(lst);
         } else {
@@ -507,12 +485,31 @@
                modified_old_objs. */
         }
 
-        /* remove from new_objects all objects that die */
-        lst = pseg->new_objects;
+        lst = pseg->old_objects_with_cards_set;
+        LIST_FOREACH_R(lst, object_t* /*item*/,
+            ({
+                struct object_s *realobj = (struct object_s *)
+                    REAL_ADDRESS(pseg->pub.segment_base, item);
+                OPT_ASSERT(realobj->stm_flags & GCFLAG_WRITE_BARRIER);
+
+                /* mark marked cards as old if it survives, otherwise
+                   CLEAR, as their spot could get reused */
+                uint8_t mark_value = mark_visited_test(item) ?
+                    pseg->pub.transaction_read_version : CARD_CLEAR;
+                _reset_object_cards(pseg, item, mark_value, false,
+                                    mark_value == CARD_CLEAR);
+            }));
+        list_clear(lst);
+
+
+        /* remove from large_overflow_objects all objects that die */
+        lst = pseg->large_overflow_objects;
         uintptr_t n = list_count(lst);
         while (n-- > 0) {
             object_t *obj = (object_t *)list_item(lst, n);
             if (!mark_visited_test(obj)) {
+                if (obj_should_use_cards(pseg->pub.segment_base, obj))
+                    _reset_object_cards(pseg, obj, CARD_CLEAR, false, true);
                 list_set_item(lst, n, list_pop_item(lst));
             }
         }
@@ -683,8 +680,6 @@
 
     DEBUG_EXPECT_SEGFAULT(false);
 
-    ready_new_objects();
-
     /* marking */
     LIST_CREATE(marked_objects_to_trace);
     mark_visit_from_modified_objects();
diff --git a/c8/stm/gcpage.h b/c8/stm/gcpage.h
--- a/c8/stm/gcpage.h
+++ b/c8/stm/gcpage.h
@@ -7,7 +7,6 @@
 #define GC_MIN                 (NB_NURSERY_PAGES * 4096 * 8)
 #define GC_MAJOR_COLLECT       1.82
 
-
 static struct list_s *testing_prebuilt_objs;
 static char *uninitialized_page_start;   /* within segment 0 */
 static char *uninitialized_page_stop;
diff --git a/c8/stm/misc.c b/c8/stm/misc.c
--- a/c8/stm/misc.c
+++ b/c8/stm/misc.c
@@ -42,6 +42,11 @@
     return (obj->stm_flags & _STM_GCFLAG_WRITE_BARRIER) == 0;
 }
 
+bool _stm_was_written_card(object_t *obj)
+{
+    return obj->stm_flags & _STM_GCFLAG_CARDS_SET;
+}
+
 long _stm_count_cl_entries()
 {
     struct stm_commit_log_entry_s *cl = &commit_log_root;
@@ -80,6 +85,13 @@
     return list_count(STM_PSEGMENT->objects_pointing_to_nursery);
 }
 
+long _stm_count_old_objects_with_cards_set(void)
+{
+    if (STM_PSEGMENT->old_objects_with_cards_set == NULL)
+        return -1;
+    return list_count(STM_PSEGMENT->old_objects_with_cards_set);
+}
+
 object_t *_stm_enum_modified_old_objects(long index)
 {
     return (object_t *)list_item(
@@ -92,6 +104,27 @@
         STM_PSEGMENT->objects_pointing_to_nursery, index);
 }
 
+object_t *_stm_enum_old_objects_with_cards_set(long index)
+{
+    return (object_t *)list_item(
+        STM_PSEGMENT->old_objects_with_cards_set, index);
+}
+
+
+uint8_t _stm_get_card_value(object_t *obj, long idx)
+{
+    struct stm_read_marker_s *cards = get_read_marker(STM_SEGMENT->segment_base,
+                                                      (uintptr_t)obj);
+    return cards[get_index_to_card_index(idx)].rm;
+}
+
+uint8_t _stm_get_transaction_read_version()
+{
+    return STM_SEGMENT->transaction_read_version;
+}
+
+
+
 static struct stm_commit_log_entry_s *_last_cl_entry;
 static long _last_cl_entry_index;
 void _stm_start_enum_last_cl_entry()
diff --git a/c8/stm/nursery.c b/c8/stm/nursery.c
--- a/c8/stm/nursery.c
+++ b/c8/stm/nursery.c
@@ -38,7 +38,7 @@
 }
 
 static inline bool _is_from_same_transaction(object_t *obj) {
-    return _is_young(obj) || (obj->stm_flags & GCFLAG_WB_EXECUTED);
+    return _is_young(obj) || IS_OVERFLOW_OBJ(STM_PSEGMENT, obj);
 }
 
 long stm_can_move(object_t *obj)
@@ -132,18 +132,166 @@
         nobj_sync_now = ((uintptr_t)nobj) | FLAG_SYNC_LARGE;
     }
 
-    /* if this is not during commit, we will add them to the new_objects
-       list and push them to other segments on commit. Thus we can add
-       the WB_EXECUTED flag so that they don't end up in modified_old_objects */
+    /* if this is not during commit, we make them overflow objects
+       and push them to other segments on commit. */
     assert(!(nobj->stm_flags & GCFLAG_WB_EXECUTED));
+    assert((nobj->stm_flags & -GCFLAG_OVERFLOW_NUMBER_bit0) == 0);
     if (!STM_PSEGMENT->minor_collect_will_commit_now) {
-        nobj->stm_flags |= GCFLAG_WB_EXECUTED;
+        nobj->stm_flags |= STM_PSEGMENT->overflow_number;
     }
 
     /* Must trace the object later */
     LIST_APPEND(STM_PSEGMENT->objects_pointing_to_nursery, nobj_sync_now);
+    _cards_cleared_in_object(get_priv_segment(STM_SEGMENT->segment_num), nobj, true);
 }
 
+static void _cards_cleared_in_object(struct stm_priv_segment_info_s *pseg, object_t *obj,
+                                     bool strict) /* strict = MARKED_OLD not allowed */
+{
+#ifndef NDEBUG
+    struct object_s *realobj = (struct object_s *)REAL_ADDRESS(pseg->pub.segment_base, obj);
+    size_t size = stmcb_size_rounded_up(realobj);
+
+    if (size < _STM_MIN_CARD_OBJ_SIZE)
+        return;                 /* too small for cards */
+
+    assert(!(realobj->stm_flags & GCFLAG_CARDS_SET));
+
+    if (!stmcb_obj_supports_cards(realobj))
+        return;
+
+    uintptr_t offset_itemsize[2] = {0, 0};
+    stmcb_get_card_base_itemsize(realobj, offset_itemsize);
+    struct stm_read_marker_s *cards = get_read_marker(pseg->pub.segment_base, (uintptr_t)obj);
+    uintptr_t card_index = 1;
+    size_t real_idx_count = (size - offset_itemsize[0]) / offset_itemsize[1];
+    uintptr_t last_card_index = get_index_to_card_index(real_idx_count - 1); /* max valid index */
+
+    while (card_index <= last_card_index) {
+        assert(cards[card_index].rm == CARD_CLEAR
+               || (cards[card_index].rm != CARD_MARKED
+                   && cards[card_index].rm < pseg->pub.transaction_read_version)
+               || (!strict && cards[card_index].rm != CARD_MARKED));
+        card_index++;
+    }
+#endif
+}
+
+static void _verify_cards_cleared_in_all_lists(struct stm_priv_segment_info_s *pseg)
+{
+#ifndef NDEBUG
+    struct list_s *list = pseg->modified_old_objects;
+    struct stm_undo_s *undo = (struct stm_undo_s *)list->items;
+    struct stm_undo_s *end = (struct stm_undo_s *)(list->items + list->count);
+
+    for (; undo < end; undo++) {
+        _cards_cleared_in_object(pseg, undo->object, false);
+    }
+    LIST_FOREACH_R(
+        pseg->large_overflow_objects, object_t * /*item*/,
+        _cards_cleared_in_object(pseg, item, false));
+    LIST_FOREACH_R(
+        pseg->objects_pointing_to_nursery, object_t * /*item*/,
+        _cards_cleared_in_object(pseg, item, false));
+    LIST_FOREACH_R(
+        pseg->old_objects_with_cards_set, object_t * /*item*/,
+        _cards_cleared_in_object(pseg, item, false));
+#endif
+}
+
+static void _reset_object_cards(struct stm_priv_segment_info_s *pseg,
+                                object_t *obj, uint8_t mark_value,
+                                bool mark_all, bool really_clear)
+{
+#pragma push_macro("STM_PSEGMENT")
+#pragma push_macro("STM_SEGMENT")
+#undef STM_PSEGMENT
+#undef STM_SEGMENT
+    dprintf(("_reset_object_cards(%p, mark=%d, mark_all=%d, really_clear=%d)\n",
+             obj, mark_value, mark_all, really_clear));
+    struct object_s *realobj = (struct object_s *)REAL_ADDRESS(pseg->pub.segment_base, obj);
+    size_t size = stmcb_size_rounded_up(realobj);
+    OPT_ASSERT(size >= _STM_MIN_CARD_OBJ_SIZE);
+
+    uintptr_t offset_itemsize[2];
+    stmcb_get_card_base_itemsize(realobj, offset_itemsize);
+    size = (size - offset_itemsize[0]) / offset_itemsize[1];
+
+    /* really_clear only used for freed new objs in minor collections, as
+       they need to clear cards even if they are set to transaction_read_version */
+    assert(IMPLY(really_clear, mark_value == CARD_CLEAR && !mark_all));
+    assert(IMPLY(mark_value == CARD_CLEAR, !mark_all)); /* not necessary */
+    assert(IMPLY(mark_all,
+                 mark_value == pseg->pub.transaction_read_version)); /* set *all* to OLD */
+
+    struct stm_read_marker_s *cards = get_read_marker(pseg->pub.segment_base, (uintptr_t)obj);
+    uintptr_t card_index = 1;
+    uintptr_t last_card_index = get_index_to_card_index(size - 1); /* max valid index */
+
+    /* dprintf(("mark cards of %p, size %lu with %d, all: %d\n",
+                obj, size, mark_value, mark_all));
+       dprintf(("obj has %lu cards\n", last_card_index));*/
+    while (card_index <= last_card_index) {
+        if (mark_all || cards[card_index].rm == CARD_MARKED
+            || (really_clear && cards[card_index].rm != CARD_CLEAR)) {
+            /* dprintf(("mark card %lu,wl:%lu of %p with %d\n", */
+            /*          card_index, card_lock_idx, obj, mark_value)); */
+            cards[card_index].rm = mark_value;
+        }
+        card_index++;
+    }
+
+    realobj->stm_flags &= ~GCFLAG_CARDS_SET;
+
+#pragma pop_macro("STM_SEGMENT")
+#pragma pop_macro("STM_PSEGMENT")
+}
+
+
+static void _trace_card_object(object_t *obj)
+{
+    assert(!_is_in_nursery(obj));
+    assert(obj->stm_flags & GCFLAG_CARDS_SET);
+    assert(obj->stm_flags & GCFLAG_WRITE_BARRIER);
+
+    dprintf(("_trace_card_object(%p)\n", obj));
+
+    struct object_s *realobj = (struct object_s *)REAL_ADDRESS(STM_SEGMENT->segment_base, obj);
+    size_t size = stmcb_size_rounded_up(realobj);
+    uintptr_t offset_itemsize[2];
+    stmcb_get_card_base_itemsize(realobj, offset_itemsize);
+    size = (size - offset_itemsize[0]) / offset_itemsize[1];
+
+    struct stm_read_marker_s *cards = get_read_marker(STM_SEGMENT->segment_base, (uintptr_t)obj);
+    uintptr_t card_index = 1;
+    uintptr_t last_card_index = get_index_to_card_index(size - 1); /* max valid index */
+
+    /* XXX: merge ranges */
+    while (card_index <= last_card_index) {
+        if (cards[card_index].rm == CARD_MARKED) {
+            /* clear or set to old: */
+            cards[card_index].rm = STM_SEGMENT->transaction_read_version;
+
+            uintptr_t start = get_card_index_to_index(card_index);
+            uintptr_t stop = get_card_index_to_index(card_index + 1);
+            if (card_index == last_card_index) {
+                assert(stop >= size);
+                stop = size;
+            }
+            else {
+                assert(stop < size);
+            }
+
+            dprintf(("trace_cards on %p with start:%lu stop:%lu\n",
+                     obj, start, stop));
+            stmcb_trace_cards(realobj, &minor_trace_if_young,
+                              start, stop);
+        }
+
+        card_index++;
+    }
+    obj->stm_flags &= ~GCFLAG_CARDS_SET;
+}
 
 static void collect_roots_in_nursery(void)
 {
@@ -177,15 +325,20 @@
 static inline void _collect_now(object_t *obj)
 {
     assert(!_is_young(obj));
+    assert(!(obj->stm_flags & GCFLAG_CARDS_SET));
 
     //dprintf(("_collect_now: %p\n", obj));
 
-    assert(!(obj->stm_flags & GCFLAG_WRITE_BARRIER));
+    if (!(obj->stm_flags & GCFLAG_WRITE_BARRIER)) {
+        /* Trace the 'obj' to replace pointers to nursery with pointers
+           outside the nursery, possibly forcing nursery objects out and
+           adding them to 'objects_pointing_to_nursery' as well. */
+        char *realobj = REAL_ADDRESS(STM_SEGMENT->segment_base, obj);
+        stmcb_trace((struct object_s *)realobj, &minor_trace_if_young);
 
-    char *realobj = REAL_ADDRESS(STM_SEGMENT->segment_base, obj);
-    stmcb_trace((struct object_s *)realobj, &minor_trace_if_young);
-
-    obj->stm_flags |= GCFLAG_WRITE_BARRIER;
+        obj->stm_flags |= GCFLAG_WRITE_BARRIER;
+    }
+    /* else traced in collect_cardrefs_to_nursery if necessary */
 }
 
 
@@ -201,25 +354,29 @@
         assert(!_is_in_nursery(obj));
 
         _collect_now(obj);
+        assert(!(obj->stm_flags & GCFLAG_CARDS_SET));
 
         if (obj_sync_now & FLAG_SYNC_LARGE) {
+            /* XXX: SYNC_LARGE even set for small objs right now */
             /* this is a newly allocated obj in this transaction. We must
                either synchronize the object to other segments now, or
-               add the object to new_objects list */
-            if (STM_PSEGMENT->minor_collect_will_commit_now) {
-                acquire_privatization_lock(STM_SEGMENT->segment_num);
+               add the object to large_overflow_objects list */
+            struct stm_priv_segment_info_s *pseg = get_priv_segment(STM_SEGMENT->segment_num);
+            if (pseg->minor_collect_will_commit_now) {
+                acquire_privatization_lock(pseg->pub.segment_num);
                 synchronize_object_enqueue(obj);
-                release_privatization_lock(STM_SEGMENT->segment_num);
+                release_privatization_lock(pseg->pub.segment_num);
             } else {
-                LIST_APPEND(STM_PSEGMENT->new_objects, obj);
+                LIST_APPEND(STM_PSEGMENT->large_overflow_objects, obj);
             }
+            _cards_cleared_in_object(pseg, obj, false);
         }
 
         /* the list could have moved while appending */
         lst = STM_PSEGMENT->objects_pointing_to_nursery;
     }
 
-    /* flush all new objects to other segments now */
+    /* flush all overflow objects to other segments now */
     if (STM_PSEGMENT->minor_collect_will_commit_now) {
         acquire_privatization_lock(STM_SEGMENT->segment_num);
         synchronize_objects_flush();
@@ -230,6 +387,30 @@
     }
 }
 
+
+static void collect_cardrefs_to_nursery(void)
+{
+    dprintf(("collect_cardrefs_to_nursery\n"));
+    struct list_s *lst = STM_PSEGMENT->old_objects_with_cards_set;
+
+    while (!list_is_empty(lst)) {
+        object_t *obj = (object_t*)list_pop_item(lst);
+
+        assert(!_is_young(obj));
+
+        if (!(obj->stm_flags & GCFLAG_CARDS_SET)) {
+            /* sometimes we remove the CARDS_SET in the WB slowpath, see core.c */
+            continue;
+        }
+
+        /* traces cards, clears marked cards or marks them old if necessary */
+        _trace_card_object(obj);
+
+        assert(!(obj->stm_flags & GCFLAG_CARDS_SET));
+    }
+}
+
+
 static void collect_objs_still_young_but_with_finalizers(void)
 {
     struct list_s *lst = STM_PSEGMENT->finalizers->objects_with_finalizers;
@@ -313,6 +494,13 @@
     dprintf(("minor_collection commit=%d\n", (int)commit));
 
     STM_PSEGMENT->minor_collect_will_commit_now = commit;
+    if (!commit) {
+        /* 'STM_PSEGMENT->overflow_number' is used now by this collection,
+           in the sense that it's copied to the overflow objects */
+        STM_PSEGMENT->overflow_number_has_been_used = true;
+    }
+
+    collect_cardrefs_to_nursery();
 
     collect_roots_in_nursery();
 
@@ -320,6 +508,7 @@
         collect_objs_still_young_but_with_finalizers();
 
     collect_oldrefs_to_nursery();
+    assert(list_is_empty(STM_PSEGMENT->old_objects_with_cards_set));
 
     /* now all surviving nursery objects have been moved out */
     acquire_privatization_lock(STM_SEGMENT->segment_num);
diff --git a/c8/stm/nursery.h b/c8/stm/nursery.h
--- a/c8/stm/nursery.h
+++ b/c8/stm/nursery.h
@@ -2,6 +2,14 @@
 #define NSE_SIGPAUSE   _STM_NSE_SIGNAL_MAX
 #define NSE_SIGABORT   _STM_NSE_SIGNAL_ABORT
 
+static uint32_t highest_overflow_number;
+
+static void _cards_cleared_in_object(struct stm_priv_segment_info_s *pseg, object_t *obj,
+                                     bool strict);
+static void _reset_object_cards(struct stm_priv_segment_info_s *pseg,
+                                object_t *obj, uint8_t mark_value,
+                                bool mark_all, bool really_clear);
+
 static void minor_collection(bool commit);
 static void check_nursery_at_transaction_start(void);
 static size_t throw_away_nursery(struct stm_priv_segment_info_s *pseg);
diff --git a/c8/stm/setup.c b/c8/stm/setup.c
--- a/c8/stm/setup.c
+++ b/c8/stm/setup.c
@@ -100,10 +100,11 @@
         pr->pub.segment_num = i;
         pr->pub.segment_base = segment_base;
         pr->modified_old_objects = list_create();
-        pr->new_objects = list_create();
+        pr->large_overflow_objects = list_create();
         pr->young_weakrefs = list_create();
         pr->old_weakrefs = list_create();
         pr->objects_pointing_to_nursery = list_create();
+        pr->old_objects_with_cards_set = list_create();
         pr->young_outside_nursery = tree_create();
         pr->nursery_objects_shadows = tree_create();
         pr->callbacks_on_commit_and_abort[0] = tree_create();
@@ -112,6 +113,8 @@
         pr->old_objects_with_light_finalizers = list_create();
 
         pr->last_commit_log_entry = &commit_log_root;
+        pr->overflow_number = GCFLAG_OVERFLOW_NUMBER_bit0 * i;
+        highest_overflow_number = pr->overflow_number;
         pr->pub.transaction_read_version = 0xff;
     }
 
@@ -147,9 +150,10 @@
         struct stm_priv_segment_info_s *pr = get_priv_segment(i);
         assert(list_is_empty(pr->objects_pointing_to_nursery));
         list_free(pr->objects_pointing_to_nursery);
+        list_free(pr->old_objects_with_cards_set);
         list_free(pr->modified_old_objects);
-        assert(list_is_empty(pr->new_objects));
-        list_free(pr->new_objects);
+        assert(list_is_empty(pr->large_overflow_objects));
+        list_free(pr->large_overflow_objects);
         list_free(pr->young_weakrefs);
         list_free(pr->old_weakrefs);
         tree_free(pr->young_outside_nursery);
diff --git a/c8/stmgc.h b/c8/stmgc.h
--- a/c8/stmgc.h
+++ b/c8/stmgc.h
@@ -77,7 +77,15 @@
 #define _STM_NSE_SIGNAL_ABORT             1
 #define _STM_NSE_SIGNAL_MAX               2
 
+#define _STM_CARD_MARKED 1      /* should always be 1... */
+#define _STM_GCFLAG_CARDS_SET          0x8
+#define _STM_CARD_BITS                 5   /* must be 5/6/7 for the pypy jit */
+#define _STM_CARD_SIZE                 (1 << _STM_CARD_BITS)
+#define _STM_MIN_CARD_COUNT            17
+#define _STM_MIN_CARD_OBJ_SIZE         (_STM_CARD_SIZE * _STM_MIN_CARD_COUNT)
+
 void _stm_write_slowpath(object_t *);
+void _stm_write_slowpath_card(object_t *, uintptr_t);
 object_t *_stm_allocate_slowpath(ssize_t);
 object_t *_stm_allocate_external(ssize_t);
 void _stm_become_inevitable(const char*);
@@ -87,9 +95,11 @@
 char *_stm_real_address(object_t *o);
 #ifdef STM_TESTS
 #include <stdbool.h>
+uint8_t _stm_get_transaction_read_version();
+uint8_t _stm_get_card_value(object_t *obj, long idx);
 bool _stm_was_read(object_t *obj);
 bool _stm_was_written(object_t *obj);
-
+bool _stm_was_written_card(object_t *obj);
 bool _stm_is_accessible_page(uintptr_t pagenum);
 
 void _stm_test_switch(stm_thread_local_t *tl);
@@ -125,7 +135,8 @@
 object_t *_stm_next_last_cl_entry();
 void _stm_start_enum_last_cl_entry();
 long _stm_count_cl_entries();
-
+long _stm_count_old_objects_with_cards_set(void);
+object_t *_stm_enum_old_objects_with_cards_set(long index);
 uint64_t _stm_total_allocated(void);
 #endif
 
@@ -156,6 +167,22 @@
 
 extern ssize_t stmcb_size_rounded_up(struct object_s *);
 void stmcb_trace(struct object_s *obj, void visit(object_t **));
+/* a special trace-callback that is only called for the marked
+   ranges of indices (using stm_write_card(o, index)) */
+extern void stmcb_trace_cards(struct object_s *, void (object_t **),
+                              uintptr_t start, uintptr_t stop);
+/* this function will be called on objects that support cards.
+   It returns the base_offset (in bytes) inside the object from
+   where the indices start, and item_size (in bytes) for the size of
+   one item */
+extern void stmcb_get_card_base_itemsize(struct object_s *,
+                                         uintptr_t offset_itemsize[2]);
+/* returns whether this object supports cards. we will only call
+   stmcb_get_card_base_itemsize on objs that do so. */
+extern long stmcb_obj_supports_cards(struct object_s *);
+
+
+
 
 __attribute__((always_inline))
 static inline void stm_read(object_t *obj)
@@ -173,6 +200,45 @@
 
 
 __attribute__((always_inline))
+static inline void stm_write_card(object_t *obj, uintptr_t index)
+{
+    /* if GCFLAG_WRITE_BARRIER is set, then don't do anything more. */
+    if (UNLIKELY((obj->stm_flags & _STM_GCFLAG_WRITE_BARRIER) != 0)) {
+
+        /* GCFLAG_WRITE_BARRIER is not set.  This might be because
+           it's the first time we see a given small array; or it might
+           be because it's a big array with card marking.  In the
+           latter case we will always reach this point, even if we
+           already marked the correct card.  Based on the idea that it
+           is actually the most common case, check it here.  If the
+           array doesn't actually use card marking, the following read
+           is a bit nonsensical, but in a way that should never return
+           CARD_MARKED by mistake.
+
+           The computation of the card marker is further optimized by
+           assuming that large objects are allocated to multiples of
+           16 (rather than just 8, as all objects are).  Under this
+           assumption the following code is equivalent to:
+
+               (obj >> 4) + (index / _STM_CARD_SIZE) + 1
+
+           The code below however takes only a couple of assembler
+           instructions.  It also assumes that the intermediate value
+           fits in a 64-bit value, which it clearly does (all values
+           are much smaller than 2 ** 60).
+        */
+        uintptr_t v = (((uintptr_t)obj) << (_STM_CARD_BITS - 4)) + index;
+        stm_read_marker_t *card1 = (stm_read_marker_t *)(v >> _STM_CARD_BITS);
+        if (card1[1].rm != _STM_CARD_MARKED) {
+
+            /* slow path. */
+            _stm_write_slowpath_card(obj, index);
+        }
+    }
+}
+
+
+__attribute__((always_inline))
 static inline object_t *stm_allocate(ssize_t size_rounded_up)
 {
     OPT_ASSERT(size_rounded_up >= 16);
@@ -327,14 +393,8 @@
 
 
 /* dummies for now: */
-__attribute__((always_inline))
-static inline void stm_write_card(object_t *obj, uintptr_t index)
-{
-    stm_write(obj);
-}
+static inline void stm_flush_timing(stm_thread_local_t *tl, int verbose) {}
 
-
-static inline void stm_flush_timing(stm_thread_local_t *tl, int verbose) {}
 /* ==================== END ==================== */
 
 static void (*stmcb_expand_marker)(char *segment_base, uintptr_t odd_number,
diff --git a/c8/test/support.py b/c8/test/support.py
--- a/c8/test/support.py
+++ b/c8/test/support.py
@@ -10,6 +10,8 @@
 #define STM_NB_SEGMENTS ...
 #define _STM_GCFLAG_WRITE_BARRIER ...
 #define _STM_FAST_ALLOC ...
+#define _STM_CARD_SIZE ...
+#define _STM_CARD_MARKED ...
 
 typedef struct {
 ...;
@@ -43,6 +45,11 @@
 object_t *stm_allocate_weakref(ssize_t size_rounded_up);
 object_t *stm_allocate_with_finalizer(ssize_t size_rounded_up);
 
+/*void stm_write_card(); use _checked_stm_write_card() instead */
+
+uint8_t _stm_get_card_value(object_t *obj, long idx);
+uint8_t _stm_get_transaction_read_version();
+
 void stm_setup(void);
 void stm_teardown(void);
 void stm_register_thread_local(stm_thread_local_t *tl);
@@ -59,8 +66,10 @@
 ssize_t stmcb_size_rounded_up(struct object_s *obj);
 
 bool _checked_stm_write(object_t *obj);
+bool _checked_stm_write_card(object_t *obj, uintptr_t index);
 bool _stm_was_read(object_t *obj);
 bool _stm_was_written(object_t *obj);
+bool _stm_was_written_card(object_t *obj);
 char *_stm_get_segment_base(long index);
 bool _stm_in_transaction(stm_thread_local_t *tl);
 int _stm_get_flags(object_t *obj);
@@ -118,8 +127,10 @@
 
 long _stm_count_modified_old_objects(void);
 long _stm_count_objects_pointing_to_nursery(void);
+long _stm_count_old_objects_with_cards_set(void);
 object_t *_stm_enum_modified_old_objects(long index);
 object_t *_stm_enum_objects_pointing_to_nursery(long index);
+object_t *_stm_enum_old_objects_with_cards_set(long index);
 object_t *_stm_next_last_cl_entry();
 void _stm_start_enum_last_cl_entry();
 long _stm_count_cl_entries();
@@ -191,6 +202,10 @@
     CHECKED(stm_write(object));
 }
 
+bool _checked_stm_write_card(object_t *object, uintptr_t index) {
+    CHECKED(stm_write_card(object, index));
+}
+
 bool _check_commit_transaction(void) {
     CHECKED(stm_commit_transaction());
 }
@@ -322,6 +337,43 @@
     }
 }
 
+long stmcb_obj_supports_cards(struct object_s *obj)
+{
+    return 1;
+}
+
+void stmcb_trace_cards(struct object_s *obj, void visit(object_t **),
+                       uintptr_t start, uintptr_t stop)
+{
+    int i;
+    struct myobj_s *myobj = (struct myobj_s*)obj;
+    assert(myobj->type_id != 421419);
+    assert(myobj->type_id != 421418);
+    if (myobj->type_id < 421420) {
+        /* basic case: no references */
+        return;
+    }
+
+    for (i=start; (i < myobj->type_id - 421420) && (i < stop); i++) {
+        object_t **ref = ((object_t **)(myobj + 1)) + i;
+        visit(ref);
+    }
+}
+
+void stmcb_get_card_base_itemsize(struct object_s *obj,
+                                  uintptr_t offset_itemsize[2])
+{
+    struct myobj_s *myobj = (struct myobj_s*)obj;
+    if (myobj->type_id < 421420) {
+        offset_itemsize[0] = SIZEOF_MYOBJ;
+        offset_itemsize[1] = 1;
+    }
+    else {
+        offset_itemsize[0] = sizeof(struct myobj_s);
+        offset_itemsize[1] = sizeof(object_t *);
+    }
+}
+
 long current_segment_num(void)
 {
     return STM_SEGMENT->segment_num;
@@ -347,6 +399,11 @@
 GCFLAG_WRITE_BARRIER = lib._STM_GCFLAG_WRITE_BARRIER
 NB_SEGMENTS = lib.STM_NB_SEGMENTS
 FAST_ALLOC = lib._STM_FAST_ALLOC
+CARD_SIZE = lib._STM_CARD_SIZE # 16b at least
+CARD_CLEAR = 0
+CARD_MARKED = lib._STM_CARD_MARKED
+CARD_MARKED_OLD = lib._stm_get_transaction_read_version
+
 
 class Conflict(Exception):
     pass
@@ -506,11 +563,11 @@
         return None
     return map(lib._stm_enum_objects_pointing_to_nursery, range(count))
 
-def old_objects_with_cards():
-    count = lib._stm_count_old_objects_with_cards()
+def old_objects_with_cards_set():
+    count = lib._stm_count_old_objects_with_cards_set()
     if count < 0:
         return None
-    return map(lib._stm_enum_old_objects_with_cards, range(count))
+    return map(lib._stm_enum_old_objects_with_cards_set, range(count))
 
 def last_commit_log_entry_objs():
     lib._stm_start_enum_last_cl_entry()
diff --git a/c8/test/test_card_marking.py b/c8/test/test_card_marking.py
new file mode 100644
--- /dev/null
+++ b/c8/test/test_card_marking.py
@@ -0,0 +1,376 @@
+from support import *
+import py
+
+
+get_card_value = lib._stm_get_card_value
+


More information about the pypy-commit mailing list