[pypy-commit] stmgc default: Merge c7-more-segments. It seems to work, including in a pypy. There

arigo noreply at buildbot.pypy.org
Sun Mar 16 19:46:27 CET 2014


Author: Armin Rigo <arigo at tunes.org>
Branch: 
Changeset: r1056:1e46b1ad2e26
Date: 2014-03-16 19:45 +0100
http://bitbucket.org/pypy/stmgc/changeset/1e46b1ad2e26/

Log:	Merge c7-more-segments. It seems to work, including in a pypy.
	There is a very small but measurable overhead when compared to the
	previous version, probably because it makes a bit more copies for
	now, but I think it's ok.

diff --git a/c7/demo/demo2.c b/c7/demo/demo2.c
--- a/c7/demo/demo2.c
+++ b/c7/demo/demo2.c
@@ -6,6 +6,7 @@
 
 #include "stmgc.h"
 
+#define NTHREADS    3
 #define LIST_LENGTH 2000
 #define BUNCH       100
 
@@ -223,7 +224,7 @@
 
 int main(void)
 {
-    int status;
+    int status, i;
 
     status = sem_init(&done, 0, 0); assert(status == 0);
 
@@ -233,11 +234,13 @@
 
     setup_list();
 
-    newthread(demo2, (void*)1);
-    newthread(demo2, (void*)2);
+    for (i = 1; i <= NTHREADS; i++) {
+        newthread(demo2, (void*)(uintptr_t)i);
+    }
 
-    status = sem_wait(&done); assert(status == 0);
-    status = sem_wait(&done); assert(status == 0);
+    for (i = 1; i <= NTHREADS; i++) {
+        status = sem_wait(&done); assert(status == 0);
+    }
 
     final_check();
 
diff --git a/c7/demo/demo_random.c b/c7/demo/demo_random.c
--- a/c7/demo/demo_random.c
+++ b/c7/demo/demo_random.c
@@ -385,7 +385,7 @@
     for (i = 0; i < PREBUILT_ROOTS; i++) {
         void* new_templ = malloc(sizeof(struct node_s));
         memcpy(new_templ, &prebuilt_template, sizeof(struct node_s));
-        prebuilt_roots[i] = stm_setup_prebuilt((objptr_t)new_templ);
+        prebuilt_roots[i] = stm_setup_prebuilt((objptr_t)(long)new_templ);
 
         if (i % 2 == 0) {
             int hash = i + 5;
diff --git a/c7/stm/contention.c b/c7/stm/contention.c
--- a/c7/stm/contention.c
+++ b/c7/stm/contention.c
@@ -235,7 +235,7 @@
     uint8_t prev_owner = ((volatile uint8_t *)write_locks)[lock_idx];
     if (prev_owner != 0 && prev_owner != STM_PSEGMENT->write_lock_num) {
 
-        uint8_t other_segment_num = prev_owner - 1;
+        uint8_t other_segment_num = prev_owner;
         assert(get_priv_segment(other_segment_num)->write_lock_num ==
                prev_owner);
 
diff --git a/c7/stm/core.c b/c7/stm/core.c
--- a/c7/stm/core.c
+++ b/c7/stm/core.c
@@ -8,11 +8,34 @@
     memset(write_locks, 0, sizeof(write_locks));
 }
 
+static void check_flag_write_barrier(object_t *obj)
+{
+    /* check that all copies of the object, apart from mine, have the
+       GCFLAG_WRITE_BARRIER.  (a bit messy because it's possible that we
+       read a page in the middle of privatization by another thread)
+    */
+#ifndef NDEBUG
+    long i;
+    struct object_s *o1;
+    for (i = 0; i <= NB_SEGMENTS; i++) {
+        if (i == STM_SEGMENT->segment_num)
+            continue;
+        o1 = (struct object_s *)REAL_ADDRESS(get_segment_base(i), obj);
+        if (!(o1->stm_flags & GCFLAG_WRITE_BARRIER)) {
+            mutex_pages_lock();  /* try again... */
+            if (!(o1->stm_flags & GCFLAG_WRITE_BARRIER))
+                stm_fatalerror("missing GCFLAG_WRITE_BARRIER");
+            mutex_pages_unlock();
+        }
+    }
+#endif
+}
 
 void _stm_write_slowpath(object_t *obj)
 {
     assert(_seems_to_be_running_transaction());
     assert(!_is_young(obj));
+    assert(obj->stm_flags & GCFLAG_WRITE_BARRIER);
 
     /* is this an object from the same transaction, outside the nursery? */
     if ((obj->stm_flags & -GCFLAG_OVERFLOW_NUMBER_bit0) ==
@@ -29,6 +52,8 @@
        safepoints that may be issued in write_write_contention_management(). */
     stm_read(obj);
 
+    /* XXX XXX XXX make the logic of write-locking objects optional! */
+
     /* claim the write-lock for this object.  In case we're running the
        same transaction since a long while, the object can be already in
        'modified_old_objects' (but, because it had GCFLAG_WRITE_BARRIER,
@@ -59,12 +84,12 @@
            the common case. Otherwise, we need to compute it based on
            its location and size. */
         if ((obj->stm_flags & GCFLAG_SMALL_UNIFORM) != 0) {
-            pages_privatize(first_page, 1, true);
+            page_privatize(first_page);
         }
         else {
             char *realobj;
             size_t obj_size;
-            uintptr_t end_page;
+            uintptr_t i, end_page;
 
             /* get the size of the object */
             realobj = REAL_ADDRESS(STM_SEGMENT->segment_base, obj);
@@ -73,7 +98,9 @@
             /* that's the page *following* the last page with the object */
             end_page = (((uintptr_t)obj) + obj_size + 4095) / 4096UL;
 
-            pages_privatize(first_page, end_page - first_page, true);
+            for (i = first_page; i < end_page; i++) {
+                page_privatize(i);
+            }
         }
     }
     else if (write_locks[lock_idx] == lock_num) {
@@ -100,19 +127,21 @@
         LIST_APPEND(STM_PSEGMENT->objects_pointing_to_nursery, obj);
     }
 
-    /* add the write-barrier-already-called flag ONLY if we succeeded in
+    /* check that we really have a private page */
+    assert(is_private_page(STM_SEGMENT->segment_num,
+                           ((uintptr_t)obj) / 4096));
+
+    /* check that so far all copies of the object have the flag */
+    check_flag_write_barrier(obj);
+
+    /* remove GCFLAG_WRITE_BARRIER, but only if we succeeded in
        getting the write-lock */
     assert(obj->stm_flags & GCFLAG_WRITE_BARRIER);
     obj->stm_flags &= ~GCFLAG_WRITE_BARRIER;
 
-    /* for sanity, check that all other segment copies of this object
-       still have the flag */
-    long i;
-    for (i = 0; i < NB_SEGMENTS; i++) {
-        if (i != STM_SEGMENT->segment_num)
-            assert(((struct object_s *)REAL_ADDRESS(get_segment_base(i), obj))
-                   ->stm_flags & GCFLAG_WRITE_BARRIER);
-    }
+    /* for sanity, check again that all other segment copies of this
+       object still have the flag (so privatization worked) */
+    check_flag_write_barrier(obj);
 }
 
 static void reset_transaction_read_version(void)
@@ -193,59 +222,130 @@
 
 /************************************************************/
 
-#if NB_SEGMENTS != 2
-# error "The logic in the functions below only works with two segments"
-#endif
 
 static bool detect_write_read_conflicts(void)
 {
-    long remote_num = 1 - STM_SEGMENT->segment_num;
-    char *remote_base = get_segment_base(remote_num);
-    uint8_t remote_version = get_segment(remote_num)->transaction_read_version;
+    /* Detect conflicts of the form: we want to commit a write to an object,
+       but the same object was also read in a different thread.
+    */
+    long i;
+    for (i = 1; i <= NB_SEGMENTS; i++) {
 
-    if (get_priv_segment(remote_num)->transaction_state == TS_NONE)
-        return false;    /* no need to check */
+        if (i == STM_SEGMENT->segment_num)
+            continue;
 
-    if (is_aborting_now(remote_num))
-        return false;    /* no need to check: is pending immediate abort */
+        if (get_priv_segment(i)->transaction_state == TS_NONE)
+            continue;    /* no need to check */
 
-    LIST_FOREACH_R(
-        STM_PSEGMENT->modified_old_objects,
-        object_t * /*item*/,
-        ({
-            if (was_read_remote(remote_base, item, remote_version)) {
-                /* A write-read conflict! */
-                write_read_contention_management(remote_num);
+        if (is_aborting_now(i))
+            continue;    /* no need to check: is pending immediate abort */
 
-                /* If we reach this point, we didn't abort, but maybe we
-                   had to wait for the other thread to commit.  If we
-                   did, then we have to restart committing from our call
-                   to synchronize_all_threads(). */
-                return true;
-            }
-        }));
+        char *remote_base = get_segment_base(i);
+        uint8_t remote_version = get_segment(i)->transaction_read_version;
+
+        LIST_FOREACH_R(
+            STM_PSEGMENT->modified_old_objects,
+            object_t * /*item*/,
+            ({
+                if (was_read_remote(remote_base, item, remote_version)) {
+                    /* A write-read conflict! */
+                    write_read_contention_management(i);
+
+                    /* If we reach this point, we didn't abort, but maybe we
+                       had to wait for the other thread to commit.  If we
+                       did, then we have to restart committing from our call
+                       to synchronize_all_threads(). */
+                    return true;
+                }
+            }));
+    }
 
     return false;
 }
 
-static void synchronize_overflow_object_now(object_t *obj)
+static void copy_object_to_shared(object_t *obj, int source_segment_num)
 {
+    /* Only used by major GC.  XXX There is a lot of code duplication
+       with synchronize_object_now() but I don't completely see how to
+       improve...
+    */
+    assert(_has_mutex_pages());
     assert(!_is_young(obj));
-    assert((obj->stm_flags & GCFLAG_SMALL_UNIFORM) == 0);
+
+    char *segment_base = get_segment_base(source_segment_num);
+    uintptr_t start = (uintptr_t)obj;
+    uintptr_t first_page = start / 4096UL;
+    struct object_s *realobj = (struct object_s *)
+        REAL_ADDRESS(segment_base, obj);
+
+    if (realobj->stm_flags & GCFLAG_SMALL_UNIFORM) {
+        abort();//XXX WRITE THE FAST CASE
+    }
+    else {
+        ssize_t obj_size = stmcb_size_rounded_up(realobj);
+        assert(obj_size >= 16);
+        uintptr_t end = start + obj_size;
+        uintptr_t last_page = (end - 1) / 4096UL;
+
+        for (; first_page <= last_page; first_page++) {
+
+            /* Copy the object into the shared page, if needed */
+            if (is_private_page(source_segment_num, first_page)) {
+
+                uintptr_t copy_size;
+                if (first_page == last_page) {
+                    /* this is the final fragment */
+                    copy_size = end - start;
+                }
+                else {
+                    /* this is a non-final fragment, going up to the
+                       page's end */
+                    copy_size = 4096 - (start & 4095);
+                }
+                /* double-check that the result fits in one page */
+                assert(copy_size > 0);
+                assert(copy_size + (start & 4095) <= 4096);
+
+                char *src = REAL_ADDRESS(segment_base, start);
+                char *dst = REAL_ADDRESS(stm_object_pages, start);
+                if (copy_size == 4096)
+                    pagecopy(dst, src);
+                else
+                    memcpy(dst, src, copy_size);
+            }
+
+            start = (start + 4096) & ~4095;
+        }
+    }
+}
+
+static void synchronize_object_now(object_t *obj)
+{
+    /* Copy around the version of 'obj' that lives in our own segment.
+       It is first copied into the shared pages, and then into other
+       segments' own private pages.
+
+       This must be called with the mutex_pages_lock!
+    */
+    assert(_has_mutex_pages());
+    assert(!_is_young(obj));
     assert(obj->stm_flags & GCFLAG_WRITE_BARRIER);
 
-    char *realobj = REAL_ADDRESS(STM_SEGMENT->segment_base, obj);
-    ssize_t obj_size = stmcb_size_rounded_up((struct object_s *)realobj);
-    assert(obj_size >= 16);
     uintptr_t start = (uintptr_t)obj;
-    uintptr_t end = start + obj_size;
     uintptr_t first_page = start / 4096UL;
-    uintptr_t last_page = (end - 1) / 4096UL;
 
-    do {
-        if (flag_page_private[first_page] != SHARED_PAGE) {
-            /* The page is a PRIVATE_PAGE.  We need to diffuse this fragment
-               of our object from our own segment to all other segments. */
+    if (obj->stm_flags & GCFLAG_SMALL_UNIFORM) {
+        abort();//XXX WRITE THE FAST CASE
+    }
+    else {
+        char *realobj = REAL_ADDRESS(STM_SEGMENT->segment_base, obj);
+        ssize_t obj_size = stmcb_size_rounded_up((struct object_s *)realobj);
+        assert(obj_size >= 16);
+        uintptr_t end = start + obj_size;
+        uintptr_t last_page = (end - 1) / 4096UL;
+        long i, myself = STM_SEGMENT->segment_num;
+
+        for (; first_page <= last_page; first_page++) {
 
             uintptr_t copy_size;
             if (first_page == last_page) {
@@ -253,26 +353,50 @@
                 copy_size = end - start;
             }
             else {
-                /* this is a non-final fragment, going up to the page's end */
+                /* this is a non-final fragment, going up to the
+                   page's end */
                 copy_size = 4096 - (start & 4095);
             }
-
             /* double-check that the result fits in one page */
             assert(copy_size > 0);
             assert(copy_size + (start & 4095) <= 4096);
 
-            long i;
+            /* First copy the object into the shared page, if needed */
             char *src = REAL_ADDRESS(STM_SEGMENT->segment_base, start);
-            for (i = 0; i < NB_SEGMENTS; i++) {
-                if (i != STM_SEGMENT->segment_num) {
-                    char *dst = REAL_ADDRESS(get_segment_base(i), start);
+            char *dst = REAL_ADDRESS(stm_object_pages, start);
+            if (is_private_page(myself, first_page)) {
+                if (copy_size == 4096)
+                    pagecopy(dst, src);
+                else
                     memcpy(dst, src, copy_size);
+            }
+            else {
+                assert(memcmp(dst, src, copy_size) == 0);  /* same page */
+            }
+
+            for (i = 1; i <= NB_SEGMENTS; i++) {
+                if (i == myself)
+                    continue;
+
+                src = REAL_ADDRESS(stm_object_pages, start);
+                dst = REAL_ADDRESS(get_segment_base(i), start);
+                if (is_private_page(i, first_page)) {
+                    /* The page is a private page.  We need to diffuse this
+                       fragment of object from the shared page to this private
+                       page. */
+                    if (copy_size == 4096)
+                        pagecopy(dst, src);
+                    else
+                        memcpy(dst, src, copy_size);
+                }
+                else {
+                    assert(memcmp(dst, src, copy_size) == 0);  /* same page */
                 }
             }
+
+            start = (start + 4096) & ~4095;
         }
-
-        start = (start + 4096) & ~4095;
-    } while (first_page++ < last_page);
+    }
 }
 
 static void push_overflow_objects_from_privatized_pages(void)
@@ -281,27 +405,15 @@
         return;
 
     LIST_FOREACH_R(STM_PSEGMENT->large_overflow_objects, object_t *,
-                   synchronize_overflow_object_now(item));
+                   synchronize_object_now(item));
 }
 
 static void push_modified_to_other_segments(void)
 {
-    long remote_num = 1 - STM_SEGMENT->segment_num;
-    char *local_base = STM_SEGMENT->segment_base;
-    char *remote_base = get_segment_base(remote_num);
-    bool remote_active =
-        (get_priv_segment(remote_num)->transaction_state != TS_NONE &&
-         get_segment(remote_num)->nursery_end != NSE_SIGABORT);
-
     LIST_FOREACH_R(
         STM_PSEGMENT->modified_old_objects,
         object_t * /*item*/,
         ({
-            if (remote_active) {
-                assert(!was_read_remote(remote_base, item,
-                    get_segment(remote_num)->transaction_read_version));
-            }
-
             /* clear the write-lock (note that this runs with all other
                threads paused, so no need to be careful about ordering) */
             uintptr_t lock_idx = (((uintptr_t)item) >> 4) - WRITELOCK_START;
@@ -313,11 +425,9 @@
                minor_collection() */
             assert((item->stm_flags & GCFLAG_WRITE_BARRIER) != 0);
 
-            /* copy the modified object to the other segment */
-            char *src = REAL_ADDRESS(local_base, item);
-            char *dst = REAL_ADDRESS(remote_base, item);
-            ssize_t size = stmcb_size_rounded_up((struct object_s *)src);
-            memcpy(dst, src, size);
+            /* copy the object to the shared page, and to the other
+               private pages as needed */
+            synchronize_object_now(item);
         }));
 
     list_clear(STM_PSEGMENT->modified_old_objects);
@@ -368,10 +478,12 @@
         major_collection_now_at_safe_point();
 
     /* synchronize overflow objects living in privatized pages */
+    mutex_pages_lock();
     push_overflow_objects_from_privatized_pages();
 
     /* synchronize modified old objects to other threads */
     push_modified_to_other_segments();
+    mutex_pages_unlock();
 
     /* update 'overflow_number' if needed */
     if (STM_PSEGMENT->overflow_number_has_been_used) {
@@ -406,19 +518,17 @@
 static void
 reset_modified_from_other_segments(int segment_num)
 {
-    /* pull the right versions from other threads in order
+    /* pull the right versions from segment 0 in order
        to reset our pages as part of an abort.
 
        Note that this function is also sometimes called from
        contention.c to clean up the state of a different thread,
        when we would really like it to be aborted now and it is
        suspended at a safe-point.
-
     */
     struct stm_priv_segment_info_s *pseg = get_priv_segment(segment_num);
-    long remote_num = !segment_num;
     char *local_base = get_segment_base(segment_num);
-    char *remote_base = get_segment_base(remote_num);
+    char *remote_base = get_segment_base(0);
 
     LIST_FOREACH_R(
         pseg->modified_old_objects,
@@ -489,6 +599,7 @@
 
 static void abort_with_mutex(void)
 {
+    assert(_has_mutex());
     dprintf(("~~~ ABORT\n"));
 
     switch (STM_PSEGMENT->transaction_state) {
@@ -514,8 +625,11 @@
     /* invoke the callbacks */
     invoke_and_clear_callbacks_on_abort();
 
-    if (STM_SEGMENT->nursery_end == NSE_SIGABORT)
-        STM_SEGMENT->nursery_end = NURSERY_END;   /* done aborting */
+    if (STM_SEGMENT->nursery_end == NSE_SIGABORT) {
+        /* done aborting */
+        STM_SEGMENT->nursery_end = pause_signalled ? NSE_SIGPAUSE
+                                                   : NURSERY_END;
+    }
 
     _finish_transaction();
     /* cannot access STM_SEGMENT or STM_PSEGMENT from here ! */
diff --git a/c7/stm/core.h b/c7/stm/core.h
--- a/c7/stm/core.h
+++ b/c7/stm/core.h
@@ -15,12 +15,12 @@
 
 
 #define NB_PAGES            (1500*256)    // 1500MB
-#define NB_SEGMENTS         2
+#define NB_SEGMENTS         STM_NB_SEGMENTS
 #define NB_SEGMENTS_MAX     240    /* don't increase NB_SEGMENTS past this */
 #define MAP_PAGES_FLAGS     (MAP_SHARED | MAP_ANONYMOUS | MAP_NORESERVE)
 #define NB_NURSERY_PAGES    (STM_GC_NURSERY/4)
 
-#define TOTAL_MEMORY          (NB_PAGES * 4096UL * NB_SEGMENTS)
+#define TOTAL_MEMORY          (NB_PAGES * 4096UL * (1 + NB_SEGMENTS))
 #define READMARKER_END        ((NB_PAGES * 4096UL) >> 4)
 #define FIRST_OBJECT_PAGE     ((READMARKER_END + 4095) / 4096UL)
 #define FIRST_NURSERY_PAGE    FIRST_OBJECT_PAGE
@@ -178,10 +178,6 @@
 static char *stm_object_pages;
 static stm_thread_local_t *stm_all_thread_locals = NULL;
 
-#ifdef STM_TESTS
-static char *stm_other_pages;
-#endif
-
 static uint8_t write_locks[WRITELOCK_END - WRITELOCK_START];
 
 
@@ -228,4 +224,5 @@
     asm("/* workaround for llvm bug */");
 }
 
-static void synchronize_overflow_object_now(object_t *obj);
+static void copy_object_to_shared(object_t *obj, int source_segment_num);
+static void synchronize_object_now(object_t *obj);
diff --git a/c7/stm/fprintcolor.h b/c7/stm/fprintcolor.h
--- a/c7/stm/fprintcolor.h
+++ b/c7/stm/fprintcolor.h
@@ -9,7 +9,7 @@
 #define dprintf(args)   threadcolor_printf args
 static inline int dprintfcolor(void)
 {
-    return 31 + STM_SEGMENT->segment_num % 6;
+    return 31 + (STM_SEGMENT->segment_num + 5) % 6;
 }
 
 static int threadcolor_printf(const char *format, ...)
diff --git a/c7/stm/gcpage.c b/c7/stm/gcpage.c
--- a/c7/stm/gcpage.c
+++ b/c7/stm/gcpage.c
@@ -192,129 +192,121 @@
 
 /************************************************************/
 
+static uintptr_t object_last_page(object_t *obj)
+{
+    uintptr_t lastbyte;
+    struct object_s *realobj =
+        (struct object_s *)REAL_ADDRESS(stm_object_pages, obj);
 
-static inline void mark_single_flag_private(uintptr_t pagenum)
-{
-    if (flag_page_private[pagenum] == PRIVATE_PAGE) {
-        assert(pagenum >= END_NURSERY_PAGE);
-        assert(pagenum < NB_PAGES);
-        flag_page_private[pagenum] = SEGMENT1_PAGE;
+    if (realobj->stm_flags & GCFLAG_SMALL_UNIFORM) {
+        lastbyte = (uintptr_t)obj;
     }
     else {
-        assert(flag_page_private[pagenum] == SHARED_PAGE ||
-               flag_page_private[pagenum] == SEGMENT1_PAGE);
+        /* get the size of the object */
+        size_t obj_size = stmcb_size_rounded_up(realobj);
+
+        /* that's the last byte within the object */
+        lastbyte = ((uintptr_t)obj) + obj_size - 1;
     }
+    return lastbyte / 4096UL;
 }
 
-static inline void mark_flag_page_private(object_t *obj, char *segment_base)
+/* A macro that expands to: run the 'expression' for every page that
+   touches objects in the 'modified_old_objects' list.
+*/
+#define BITOP(expression)                                       \
+    LIST_FOREACH_R(                                             \
+        get_priv_segment(segment_num)->modified_old_objects,    \
+        object_t * /* item */,                                  \
+        ({                                                      \
+            struct page_shared_s *ps;                           \
+            uintptr_t pagenum = ((uintptr_t)item) / 4096UL;     \
+            uintptr_t count = object_last_page(item) - pagenum; \
+            ps = &pages_privatized[pagenum - PAGE_FLAG_START];  \
+            do {                                                \
+                expression;                                     \
+                ps++;                                           \
+            } while (count--);                                  \
+        }));
+
+static void major_hide_private_bits_for_modified_objects(long segment_num)
 {
-    uintptr_t first_page = ((uintptr_t)obj) / 4096UL;
-
-    if (LIKELY((obj->stm_flags & GCFLAG_SMALL_UNIFORM) != 0)) {
-        mark_single_flag_private(first_page);
-    }
-    else {
-        char *realobj;
-        size_t obj_size;
-        uintptr_t end_page;
-
-        /* get the size of the object */
-        realobj = REAL_ADDRESS(segment_base, obj);
-        obj_size = stmcb_size_rounded_up((struct object_s *)realobj);
-
-        /* that's the page *following* the last page with the object */
-        end_page = (((uintptr_t)obj) + obj_size + 4095) / 4096UL;
-
-        while (first_page < end_page)
-            mark_single_flag_private(first_page++);
-    }
+    uint64_t negativebitmask = ~(1 << (segment_num - 1));
+#ifndef NDEBUG
+    BITOP(assert((ps->by_segment & negativebitmask) != ps->by_segment));
+#endif
+    BITOP(ps->by_segment &= negativebitmask);
 }
 
-static void major_reshare_pages_range(uintptr_t first_page, uintptr_t end_page)
+static void major_restore_private_bits_for_modified_objects(long segment_num)
 {
-    uintptr_t i;
-    for (i = first_page; i < end_page; i++) {
+    uint64_t positivebitmask = 1 << (segment_num - 1);
+    BITOP(ps->by_segment |= positivebitmask);
+}
 
-        switch (flag_page_private[i]) {
-
-        case SEGMENT1_PAGE:
-            /* this page stays private after major collection */
-            flag_page_private[i] = PRIVATE_PAGE;
-            break;
-
-        case PRIVATE_PAGE:;
-            /* this page becomes shared again.  No object in it was
-               traced belonging to a segment other than 0.
-
-               XXX This is maybe a too-strict condition, but the more
-               general condition "all traced objects belong to the same
-               segment" has problems with large objects in segments > 0.
-               More precisely: we'd need to keep in the shared page the
-               content of the objects (from segment > 0), but also the
-               largemalloc's chunk data (stored in segment 0).
-            */
-#if NB_SEGMENTS != 2
-#  error "limited to NB_SEGMENTS == 2"
-#endif
-            char *ppage0 = get_segment_base(0) + i * 4096;
-            char *ppage1 = get_segment_base(1) + i * 4096;
-
-            /* two cases for mapping pages to file-pages (fpages):
-                - (0->0, 1->1)
-                - (0->1, 1->0)
-               Distinguish which case it is by hacking a lot */
-
-            // 0->0,1->1 or 0->1,1->0
-            /* map page 1 to fpage 0: */
-            d_remap_file_pages(ppage1, 4096, i);
-            // 0->0,1->0 or 0->1,1->0
-
-            char oldvalue0 = *ppage0;
-            char oldvalue1 = *ppage1;
-            asm("":::"memory");
-            *ppage0 = 1 + oldvalue1;
-            asm("":::"memory");
-            char newvalue1 = *ppage1;
-            asm("":::"memory");
-            *ppage0 = oldvalue0;
-            /* if we are in 0->0,1->0, old and new are different:
-               In this case we are done. We keep the largemalloc
-               data structure and objects of ppage0/fpage0 */
-            if (oldvalue1 == newvalue1) {
-                // 0->1,1->0
-                /* ppage0/fpage1 has the data structure that we want
-                   in ppage1/fpage0, so we copy it */
-                pagecopy(ppage1, ppage0);   // copy from page0 to page1,
-                //         i.e. from the underlying memory seg1 to seg0
-                d_remap_file_pages(ppage0, 4096, i);
-                // 0->0,1->0
-            }
-            flag_page_private[i] = SHARED_PAGE;
-
-            increment_total_allocated(-4096 * (NB_SEGMENTS-1));
-            break;
-
-        case SHARED_PAGE:
-            break;     /* stay shared */
-
-        default:
-            assert(!"unexpected flag_page_private");
-        }
-    }
-}
+#undef BITOP
 
 static void major_reshare_pages(void)
 {
     /* re-share pages if possible.  Each re-sharing decreases
        total_allocated by 4096. */
-    major_reshare_pages_range(
-        END_NURSERY_PAGE,       /* not the nursery! */
-        (uninitialized_page_start - stm_object_pages) / 4096UL);
-    major_reshare_pages_range(
-        (uninitialized_page_stop - stm_object_pages) / 4096UL,
-        NB_PAGES);
+
+    long i;
+    mutex_pages_lock();
+
+    for (i = 1; i <= NB_SEGMENTS; i++) {
+        /* The 'modified_old_objects' list gives the list of objects
+           whose pages need to remain private.  We temporarily remove
+           these bits from 'pages_privatized', so that these pages will
+           be skipped by the loop below (and by copy_object_to_shared()).
+        */
+        major_hide_private_bits_for_modified_objects(i);
+
+        /* For each segment, push the current overflow objects from
+           private pages to the corresponding shared pages, if
+           necessary.  The pages that we will re-share must contain this
+           data; otherwise, it would exist only in the private pages,
+           and get lost in the loop below.
+        */
+        struct list_s *lst = get_priv_segment(i)->large_overflow_objects;
+        if (lst != NULL) {
+            LIST_FOREACH_R(lst, object_t *, copy_object_to_shared(item, i));
+        }
+    }
+
+    /* Now loop over all pages that are still in 'pages_privatized',
+       and re-share them.
+     */
+    uintptr_t pagenum, endpagenum;
+    pagenum = END_NURSERY_PAGE;   /* starts after the nursery */
+    endpagenum = (uninitialized_page_start - stm_object_pages) / 4096UL;
+
+    while (1) {
+        if (UNLIKELY(pagenum == endpagenum)) {
+            /* we reach this point usually twice, because there are
+               more pages after 'uninitialized_page_stop' */
+            if (endpagenum == NB_PAGES)
+                break;   /* done */
+            pagenum = (uninitialized_page_stop - stm_object_pages) / 4096UL;
+            endpagenum = NB_PAGES;
+            if (pagenum == endpagenum)
+                break;   /* no pages in the 2nd section, so done too */
+        }
+
+        page_check_and_reshare(pagenum);
+        pagenum++;
+    }
+
+    /* Done.  Now 'pages_privatized' should be entirely zeroes.  Restore
+       the previously-hidden bits
+    */
+    for (i = 1; i <= NB_SEGMENTS; i++) {
+        major_restore_private_bits_for_modified_objects(i);
+    }
+    mutex_pages_unlock();
 }
 
+
 /************************************************************/
 
 
@@ -323,11 +315,6 @@
     /* takes a normal pointer to a thread-local pointer to an object */
     object_t *obj = *pobj;
 
-    if (obj == NULL || mark_visited_test_and_set(obj))
-        return;    /* already visited this object */
-
-    LIST_APPEND(mark_objects_to_trace, obj);
-
     /* Note: this obj might be visited already, but from a different
        segment.  We ignore this case and skip re-visiting the object
        anyway.  The idea is that such an object is old (not from the
@@ -338,6 +325,10 @@
        segments and only needs visiting once.  (It may actually be in a
        shared page, or maybe not.)
     */
+    if (obj == NULL || mark_visited_test_and_set(obj))
+        return;    /* already visited this object */
+
+    LIST_APPEND(mark_objects_to_trace, obj);
 }
 
 static void mark_trace(object_t *obj, char *segment_base)
@@ -345,13 +336,6 @@
     assert(list_is_empty(mark_objects_to_trace));
 
     while (1) {
-
-        /* first, if we're not seeing segment 0, we must change the
-           flags in flag_page_private[] from PRIVATE_PAGE to
-           SEGMENT1_PAGE, which will mean "can't re-share" */
-        if (segment_base != stm_object_pages && RESHARE_PAGES)
-            mark_flag_page_private(obj, segment_base);
-
         /* trace into the object (the version from 'segment_base') */
         struct object_s *realobj =
             (struct object_s *)REAL_ADDRESS(segment_base, obj);
@@ -373,45 +357,33 @@
 
 static void mark_visit_from_roots(void)
 {
-
     if (testing_prebuilt_objs != NULL) {
         LIST_FOREACH_R(testing_prebuilt_objs, object_t * /*item*/,
-                       mark_visit_object(item, get_segment_base(0)));
+                       mark_visit_object(item, stm_object_pages));
     }
 
-    /* Do the following twice, so that we trace first the objects from
-       segment 0, and then all others.  XXX This is a hack to make it
-       more likely that we'll be able to re-share pages. */
+    stm_thread_local_t *tl = stm_all_thread_locals;
+    do {
+        /* If 'tl' is currently running, its 'associated_segment_num'
+           field is the segment number that contains the correct
+           version of its overflowed objects.  If not, then the
+           field is still some correct segment number, and it doesn't
+           matter which one we pick. */
+        char *segment_base = get_segment_base(tl->associated_segment_num);
 
-    int must_be_zero;
-    for (must_be_zero = 1; must_be_zero >= 0; must_be_zero--) {
+        struct stm_shadowentry_s *current = tl->shadowstack;
+        struct stm_shadowentry_s *base = tl->shadowstack_base;
+        while (current-- != base) {
+            assert(current->ss != (object_t *)-1);
+            mark_visit_object(current->ss, segment_base);
+        }
+        mark_visit_object(tl->thread_local_obj, segment_base);
 
-        stm_thread_local_t *tl = stm_all_thread_locals;
-        do {
-            /* If 'tl' is currently running, its 'associated_segment_num'
-               field is the segment number that contains the correct
-               version of its overflowed objects.  If not, then the
-               field is still some correct segment number, and it doesn't
-               matter which one we pick. */
-            char *segment_base = get_segment_base(tl->associated_segment_num);
-
-            if (must_be_zero == (segment_base == get_segment_base(0))) {
-
-                struct stm_shadowentry_s *current = tl->shadowstack;
-                struct stm_shadowentry_s *base = tl->shadowstack_base;
-                while (current-- != base) {
-                    assert(current->ss != (object_t *)-1);
-                    mark_visit_object(current->ss, segment_base);
-                }
-                mark_visit_object(tl->thread_local_obj, segment_base);
-            }
-
-            tl = tl->next;
-        } while (tl != stm_all_thread_locals);
-    }
+        tl = tl->next;
+    } while (tl != stm_all_thread_locals);
 
     long i;
-    for (i = 0; i < NB_SEGMENTS; i++) {
+    for (i = 1; i <= NB_SEGMENTS; i++) {
         if (get_priv_segment(i)->transaction_state != TS_NONE)
             mark_visit_object(
                 get_priv_segment(i)->threadlocal_at_start_of_transaction,
@@ -422,20 +394,21 @@
 static void mark_visit_from_modified_objects(void)
 {
     /* The modified objects are the ones that may exist in two different
-       versions: one in the segment that modified it, and another in
-       all other segments. */
+       versions: one in the segment that modified it, and another in all
+       other segments.  (It can also be more than two if we don't have
+       eager write locking.)
+    */
     long i;
-    for (i = 0; i < NB_SEGMENTS; i++) {
-        char *base1 = get_segment_base(i);   /* two different segments */
-        char *base2 = get_segment_base(!i);
+    for (i = 1; i <= NB_SEGMENTS; i++) {
+        char *base = get_segment_base(i);
 
         LIST_FOREACH_R(
             get_priv_segment(i)->modified_old_objects,
             object_t * /*item*/,
             ({
                 mark_visited_test_and_set(item);
-                mark_trace(item, base1);
-                mark_trace(item, base2);
+                mark_trace(item, stm_object_pages);  /* shared version */
+                mark_trace(item, base);              /* private version */
             }));
     }
 }
@@ -443,7 +416,7 @@
 static void clean_up_segment_lists(void)
 {
     long i;
-    for (i = 0; i < NB_SEGMENTS; i++) {
+    for (i = 1; i <= NB_SEGMENTS; i++) {
         struct stm_priv_segment_info_s *pseg = get_priv_segment(i);
         struct list_s *lst;
 
@@ -489,7 +462,9 @@
 
 static void sweep_large_objects(void)
 {
+    mutex_pages_lock();
     _stm_largemalloc_sweep();
+    mutex_pages_unlock();
 }
 
 static void clean_write_locks(void)
@@ -505,11 +480,11 @@
     memset(write_locks + lock2_idx, 0, sizeof(write_locks) - lock2_idx);
 }
 
-static void major_set_write_locks(void)
+static void major_restore_write_locks(void)
 {
     /* restore the write locks on the modified objects */
     long i;
-    for (i = 0; i < NB_SEGMENTS; i++) {
+    for (i = 1; i <= NB_SEGMENTS; i++) {
         struct stm_priv_segment_info_s *pseg = get_priv_segment(i);
 
         LIST_FOREACH_R(
@@ -535,6 +510,10 @@
     dprintf((" | used before collection: %ld\n",
              (long)pages_ctl.total_allocated));
 
+    /* reshare pages */
+    if (RESHARE_PAGES)
+        major_reshare_pages();
+
     /* marking */
     LIST_CREATE(mark_objects_to_trace);
     mark_visit_from_modified_objects();
@@ -548,15 +527,11 @@
     clean_up_segment_lists();
 
     /* sweeping */
-    mutex_pages_lock();
-    if (RESHARE_PAGES)
-        major_reshare_pages();
     sweep_large_objects();
     //sweep_uniform_pages();
-    mutex_pages_unlock();
 
     clean_write_locks();
-    major_set_write_locks();
+    major_restore_write_locks();
 
     dprintf((" | used after collection:  %ld\n",
              (long)pages_ctl.total_allocated));
diff --git a/c7/stm/gcpage.h b/c7/stm/gcpage.h
--- a/c7/stm/gcpage.h
+++ b/c7/stm/gcpage.h
@@ -15,7 +15,7 @@
 #define GC_MAJOR_COLLECT       1.82
 
 /* re-share pages after major collections (1 or 0) */
-#define RESHARE_PAGES 0
+#define RESHARE_PAGES 1
 
 
 
diff --git a/c7/stm/misc.c b/c7/stm/misc.c
--- a/c7/stm/misc.c
+++ b/c7/stm/misc.c
@@ -41,9 +41,10 @@
 }
 
 #ifdef STM_TESTS
-uint8_t _stm_get_page_flag(uintptr_t index)
+uintptr_t _stm_get_private_page(uintptr_t pagenum)
 {
-    return flag_page_private[index];
+    /* xxx returns 0 or 1 now */
+    return is_private_page(STM_SEGMENT->segment_num, pagenum);
 }
 
 long _stm_count_modified_old_objects(void)
@@ -79,4 +80,14 @@
     mutex_pages_unlock();
     return result;
 }
+
+void _stm_mutex_pages_lock(void)
+{
+    mutex_pages_lock();
+}
+
+void _stm_mutex_pages_unlock(void)
+{
+    mutex_pages_unlock();
+}
 #endif
diff --git a/c7/stm/nursery.c b/c7/stm/nursery.c
--- a/c7/stm/nursery.c
+++ b/c7/stm/nursery.c
@@ -26,7 +26,7 @@
     _stm_nursery_start = NURSERY_START;
 
     long i;
-    for (i = 0; i < NB_SEGMENTS; i++) {
+    for (i = 1; i <= NB_SEGMENTS; i++) {
         get_segment(i)->nursery_current = (stm_char *)NURSERY_START;
         get_segment(i)->nursery_end = NURSERY_END;
     }
@@ -198,8 +198,11 @@
                WRITE_BARRIER flag and traced into it to fix its
                content); or add the object to 'large_overflow_objects'.
             */
-            if (STM_PSEGMENT->minor_collect_will_commit_now)
-                synchronize_overflow_object_now(obj);
+            if (STM_PSEGMENT->minor_collect_will_commit_now) {
+                mutex_pages_lock();
+                synchronize_object_now(obj);
+                mutex_pages_unlock();
+            }
             else
                 LIST_APPEND(STM_PSEGMENT->large_overflow_objects, obj);
         }
@@ -378,7 +381,7 @@
     _stm_nursery_start = NURSERY_END - free_count;
 
     long i;
-    for (i = 0; i < NB_SEGMENTS; i++) {
+    for (i = 1; i <= NB_SEGMENTS; i++) {
         if ((uintptr_t)get_segment(i)->nursery_current < _stm_nursery_start)
             get_segment(i)->nursery_current = (stm_char *)_stm_nursery_start;
     }
@@ -411,7 +414,7 @@
     int original_num = STM_SEGMENT->segment_num;
     long i;
 
-    for (i = 0; i < NB_SEGMENTS; i++) {
+    for (i = 1; i <= NB_SEGMENTS; i++) {
         struct stm_priv_segment_info_s *pseg = get_priv_segment(i);
         if (MINOR_NOTHING_TO_DO(pseg))  /*TS_NONE segments have NOTHING_TO_DO*/
             continue;
diff --git a/c7/stm/pages.c b/c7/stm/pages.c
--- a/c7/stm/pages.c
+++ b/c7/stm/pages.c
@@ -25,6 +25,7 @@
 static void teardown_pages(void)
 {
     memset(&pages_ctl, 0, sizeof(pages_ctl));
+    memset(pages_privatized, 0, sizeof(pages_privatized));
 }
 
 static void mutex_pages_lock(void)
@@ -39,7 +40,6 @@
     __sync_lock_release(&pages_ctl.mutex_pages);
 }
 
-__attribute__((unused))
 static bool _has_mutex_pages(void)
 {
     return pages_ctl.mutex_pages != 0;
@@ -47,6 +47,7 @@
 
 static uint64_t increment_total_allocated(ssize_t add_or_remove)
 {
+    assert(_has_mutex_pages());
     pages_ctl.total_allocated += add_or_remove;
 
     if (pages_ctl.total_allocated >= pages_ctl.total_allocated_bound)
@@ -102,100 +103,64 @@
        segment 0. */
     uintptr_t i;
     assert(_has_mutex_pages());
-    for (i = 1; i < NB_SEGMENTS; i++) {
+    for (i = 1; i <= NB_SEGMENTS; i++) {
         char *segment_base = get_segment_base(i);
         d_remap_file_pages(segment_base + pagenum * 4096UL,
                            count * 4096UL, pagenum);
     }
-    for (i = 0; i < count; i++)
-        flag_page_private[pagenum + i] = SHARED_PAGE;
 }
 
-#if 0
-static void pages_make_shared_again(uintptr_t pagenum, uintptr_t count)
+static void page_privatize(uintptr_t pagenum)
 {
-    /* Same as pages_initialize_shared(), but tries hard to minimize the
-       total number of pages that remap_file_pages() must handle, by
-       fragmenting calls as much as possible (the overhead of one system
-       call appears smaller as the overhead per page). */
-    uintptr_t start, i = 0;
-    while (i < count) {
-        if (flag_page_private[pagenum + (i++)] == SHARED_PAGE)
-            continue;
-        start = i;    /* first index of a private page */
-        while (1) {
-            i++;
-            if (i == count || flag_page_private[pagenum + i] == SHARED_PAGE)
-                break;
-        }
-        pages_initialize_shared(pagenum + start, i - start);
-    }
-}
-#endif
-
-static void privatize_range(uintptr_t pagenum, uintptr_t count, bool full)
-{
-    ssize_t pgoff1 = pagenum;
-    ssize_t pgoff2 = pagenum + NB_PAGES;
-    ssize_t localpgoff = pgoff1 + NB_PAGES * STM_SEGMENT->segment_num;
-    ssize_t otherpgoff = pgoff1 + NB_PAGES * (1 - STM_SEGMENT->segment_num);
-
-    void *localpg = stm_object_pages + localpgoff * 4096UL;
-    void *otherpg = stm_object_pages + otherpgoff * 4096UL;
-
-    memset(flag_page_private + pagenum, REMAPPING_PAGE, count);
-    d_remap_file_pages(localpg, count * 4096, pgoff2);
-    uintptr_t i;
-    if (full) {
-        for (i = 0; i < count; i++) {
-            pagecopy(localpg + 4096 * i, otherpg + 4096 * i);
-        }
-    }
-    else {
-        pagecopy(localpg, otherpg);
-        if (count > 1)
-            pagecopy(localpg + 4096 * (count-1), otherpg + 4096 * (count-1));
-    }
-    write_fence();
-    memset(flag_page_private + pagenum, PRIVATE_PAGE, count);
-    increment_total_allocated(4096 * count);
-}
-
-static void _pages_privatize(uintptr_t pagenum, uintptr_t count, bool full)
-{
-    /* narrow the range of pages to privatize from the end: */
-    while (flag_page_private[pagenum + count - 1] == PRIVATE_PAGE) {
-        if (!--count)
-            return;
+    if (is_private_page(STM_SEGMENT->segment_num, pagenum)) {
+        /* the page is already privatized */
+        return;
     }
 
+    /* lock, to prevent concurrent threads from looking up this thread's
+       'pages_privatized' bits in parallel */
     mutex_pages_lock();
 
-    uintptr_t page_start_range = pagenum;
-    uintptr_t pagestop = pagenum + count;
+    /* "unmaps" the page to make the address space location correspond
+       again to its underlying file offset (XXX later we should again
+       attempt to group together many calls to d_remap_file_pages() in
+       succession) */
+    uintptr_t pagenum_in_file = NB_PAGES * STM_SEGMENT->segment_num + pagenum;
+    char *new_page = stm_object_pages + pagenum_in_file * 4096UL;
+    d_remap_file_pages(new_page, 4096, pagenum_in_file);
+    increment_total_allocated(4096);
 
-    for (; pagenum < pagestop; pagenum++) {
-        uint8_t prev = flag_page_private[pagenum];
-        if (prev == PRIVATE_PAGE) {
-            if (pagenum > page_start_range) {
-                privatize_range(page_start_range,
-                                pagenum - page_start_range, full);
-            }
-            page_start_range = pagenum + 1;
-        }
-        else {
-            assert(prev == SHARED_PAGE);
-        }
-    }
+    /* copy the content from the shared (segment 0) source */
+    pagecopy(new_page, stm_object_pages + pagenum * 4096UL);
 
-    if (pagenum > page_start_range) {
-        privatize_range(page_start_range,
-                        pagenum - page_start_range, full);
-    }
+    /* add this thread's 'pages_privatized' bit */
+    uint64_t bitmask = 1UL << (STM_SEGMENT->segment_num - 1);
+    pages_privatized[pagenum - PAGE_FLAG_START].by_segment |= bitmask;
 
     mutex_pages_unlock();
 }
 
+static void page_reshare(uintptr_t pagenum)
+{
+    struct page_shared_s ps = pages_privatized[pagenum - PAGE_FLAG_START];
+    pages_privatized[pagenum - PAGE_FLAG_START].by_segment = 0;
+
+    long j, total = 0;
+    for (j = 0; j < NB_SEGMENTS; j++) {
+        if (ps.by_segment & (1 << j)) {
+            /* Page 'pagenum' is private in segment 'j + 1'. Reshare */
+            char *segment_base = get_segment_base(j + 1);
+
+            madvise(segment_base + pagenum * 4096UL, 4096, MADV_DONTNEED);
+            d_remap_file_pages(segment_base + pagenum * 4096UL,
+                               4096, pagenum);
+            total -= 4096;
+        }
+    }
+    increment_total_allocated(total);
+}
+
+
 #if 0
 static bool is_fully_in_shared_pages(object_t *obj)
 {
diff --git a/c7/stm/pages.h b/c7/stm/pages.h
--- a/c7/stm/pages.h
+++ b/c7/stm/pages.h
@@ -1,48 +1,61 @@
 
-enum /* flag_page_private */ {
-    /* The page is not in use.  Assume that each segment sees its own copy. */
-    FREE_PAGE=0,
+/* This handles pages of objects outside the nursery.  Every page
+   has a "shared copy" and zero or more "private copies".
 
-    /* The page is shared by all segments.  Each segment sees the same
-       physical page (the one that is within the segment 0 mmap address). */
-    SHARED_PAGE,
+   The shared copy of a page is stored in the mmap at the file offset
+   corresponding to the segment 0 offset.  Initially, accessing a page
+   from segment N remaps to segment 0.  If the page is turned private,
+   then we "un-remap" it to its initial location.  The 'pages_privatized'
+   global array records if a page is currently mapped to segment 0
+   (shared page) or to its natural location (private page).
 
-    /* For only one range of pages at a time, around the call to
-       remap_file_pages() that un-shares the pages (SHARED -> PRIVATE). */
-    REMAPPING_PAGE,
+   Note that this page manipulation logic uses remap_file_pages() to
+   fully hide its execution cost behind the CPU's memory management unit.
+   It should not be confused with the logic of tracking which objects
+   are old-and-committed, old-but-modified, overflow objects, and so on
+   (which works at the object granularity, not the page granularity).
+*/
 
-    /* Page is private for each segment. */
-    PRIVATE_PAGE,
+#define PAGE_FLAG_START   END_NURSERY_PAGE
+#define PAGE_FLAG_END     NB_PAGES
 
-    /* gcpage.c: page contains objects that have been traced in the
-       segment > 0 */
-    SEGMENT1_PAGE,
+struct page_shared_s {
+#if NB_SEGMENTS <= 8
+    uint8_t by_segment;
+#elif NB_SEGMENTS <= 16
+    uint16_t by_segment;
+#elif NB_SEGMENTS <= 32
+    uint32_t by_segment;
+#elif NB_SEGMENTS <= 64
+    uint64_t by_segment;
+#else
+#   error "NB_SEGMENTS > 64 not supported right now"
+#endif
 };
 
-static uint8_t flag_page_private[NB_PAGES];
+static struct page_shared_s pages_privatized[PAGE_FLAG_END - PAGE_FLAG_START];
 
-static void _pages_privatize(uintptr_t pagenum, uintptr_t count, bool full);
 static void pages_initialize_shared(uintptr_t pagenum, uintptr_t count);
-//static void pages_make_shared_again(uintptr_t pagenum, uintptr_t count);
+static void page_privatize(uintptr_t pagenum);
+static void page_reshare(uintptr_t pagenum);
 
 static void mutex_pages_lock(void);
 static void mutex_pages_unlock(void);
+static bool _has_mutex_pages(void) __attribute__((unused));
 static uint64_t increment_total_allocated(ssize_t add_or_remove);
 static bool is_major_collection_requested(void);
 static void force_major_collection_request(void);
 static void reset_major_collection_requested(void);
 
-inline static void pages_privatize(uintptr_t pagenum, uintptr_t count,
-                                   bool full) {
-    /* This is written a bit carefully so that a call with a constant
-       count == 1 will turn this loop into just one "if". */
-    while (flag_page_private[pagenum] == PRIVATE_PAGE) {
-        if (!--count) {
-            return;
-        }
-        pagenum++;
-    }
-    _pages_privatize(pagenum, count, full);
+static inline bool is_private_page(long segnum, uintptr_t pagenum)
+{
+    assert(pagenum >= PAGE_FLAG_START);
+    uint64_t bitmask = 1UL << (segnum - 1);
+    return (pages_privatized[pagenum - PAGE_FLAG_START].by_segment & bitmask);
 }
 
-/* static bool is_fully_in_shared_pages(object_t *obj); */
+static inline void page_check_and_reshare(uintptr_t pagenum)
+{
+    if (pages_privatized[pagenum - PAGE_FLAG_START].by_segment != 0)
+        page_reshare(pagenum);
+}
diff --git a/c7/stm/setup.c b/c7/stm/setup.c
--- a/c7/stm/setup.c
+++ b/c7/stm/setup.c
@@ -26,12 +26,15 @@
     if (stm_object_pages == MAP_FAILED)
         stm_fatalerror("initial stm_object_pages mmap() failed: %m\n");
 
+    /* The segment 0 is not used to run transactions, but to contain the
+       shared copy of the pages.  We mprotect all pages before so that
+       accesses fail, up to and including the pages corresponding to the
+       nurseries of the other segments. */
+    mprotect(stm_object_pages, END_NURSERY_PAGE * 4096UL, PROT_NONE);
+
     long i;
-    for (i = 0; i < NB_SEGMENTS; i++) {
+    for (i = 1; i <= NB_SEGMENTS; i++) {
         char *segment_base = get_segment_base(i);
-#ifdef STM_TESTS
-        stm_other_pages = segment_base;
-#endif
 
         /* In each segment, the first page is where TLPREFIX'ed
            NULL accesses land.  We mprotect it so that accesses fail. */
@@ -39,7 +42,7 @@
 
         /* Fill the TLS page (page 1) with 0xDC, for debugging */
         memset(REAL_ADDRESS(segment_base, 4096), 0xDC, 4096);
-        /* Make a "hole" at STM_PSEGMENT */
+        /* Make a "hole" at STM_PSEGMENT (which includes STM_SEGMENT) */
         memset(REAL_ADDRESS(segment_base, STM_PSEGMENT), 0,
                sizeof(*STM_PSEGMENT));
 
@@ -49,9 +52,10 @@
                      (FIRST_READMARKER_PAGE - 2) * 4096UL,
                      PROT_NONE);
 
+        /* Initialize STM_PSEGMENT */
         struct stm_priv_segment_info_s *pr = get_priv_segment(i);
-        assert(i + 1 < 255);   /* 255 is WL_VISITED in gcpage.c */
-        pr->write_lock_num = i + 1;
+        assert(1 <= i && i < 255);   /* 255 is WL_VISITED in gcpage.c */
+        pr->write_lock_num = i;
         pr->pub.segment_num = i;
         pr->pub.segment_base = segment_base;
         pr->objects_pointing_to_nursery = NULL;
@@ -62,7 +66,7 @@
         pr->young_outside_nursery = tree_create();
         pr->nursery_objects_shadows = tree_create();
         pr->callbacks_on_abort = tree_create();
-        pr->overflow_number = GCFLAG_OVERFLOW_NUMBER_bit0 * (i + 1);
+        pr->overflow_number = GCFLAG_OVERFLOW_NUMBER_bit0 * i;
         highest_overflow_number = pr->overflow_number;
     }
 
@@ -73,10 +77,6 @@
        STM_SEGMENT->transaction_read_version never contains zero,
        so a null read marker means "not read" whatever the
        current transaction_read_version is.
-
-       The creation markers are initially zero, which is correct:
-       it means "objects of this line of 256 bytes have not been
-       allocated by the current transaction."
     */
 
     setup_sync();
@@ -92,7 +92,7 @@
     assert(!_has_mutex());
 
     long i;
-    for (i = 0; i < NB_SEGMENTS; i++) {
+    for (i = 1; i <= NB_SEGMENTS; i++) {
         struct stm_priv_segment_info_s *pr = get_priv_segment(i);
         assert(pr->objects_pointing_to_nursery == NULL);
         assert(pr->large_overflow_objects == NULL);
@@ -107,8 +107,6 @@
     munmap(stm_object_pages, TOTAL_MEMORY);
     stm_object_pages = NULL;
 
-    memset(flag_page_private, 0, sizeof(flag_page_private));
-
     teardown_core();
     teardown_sync();
     teardown_gcpage();
@@ -146,14 +144,14 @@
         tl->prev = stm_all_thread_locals->prev;
         stm_all_thread_locals->prev->next = tl;
         stm_all_thread_locals->prev = tl;
-        num = tl->prev->associated_segment_num + 1;
+        num = tl->prev->associated_segment_num;
     }
     tl->thread_local_obj = NULL;
 
     /* assign numbers consecutively, but that's for tests; we could also
        assign the same number to all of them and they would get their own
        numbers automatically. */
-    num = num % NB_SEGMENTS;
+    num = (num % NB_SEGMENTS) + 1;
     tl->associated_segment_num = num;
     _init_shadow_stack(tl);
     set_gs_register(get_segment_base(num));
diff --git a/c7/stm/sync.c b/c7/stm/sync.c
--- a/c7/stm/sync.c
+++ b/c7/stm/sync.c
@@ -30,7 +30,7 @@
         pthread_mutex_t global_mutex;
         pthread_cond_t cond[_C_TOTAL];
         /* some additional pieces of global state follow */
-        uint8_t in_use[NB_SEGMENTS];   /* 1 if running a pthread */
+        uint8_t in_use1[NB_SEGMENTS];   /* 1 if running a pthread */
         uint64_t global_time;
     };
     char reserved[192];
@@ -124,12 +124,12 @@
 {
     long i;
  restart:
-    for (i = 0; i < NB_SEGMENTS; i++) {
+    for (i = 1; i <= NB_SEGMENTS; i++) {
         if (get_priv_segment(i)->transaction_state == TS_INEVITABLE) {
             if (can_abort) {
                 /* handle this case like a contention: it will either
                    abort us (not the other thread, which is inevitable),
-                   or for a while.  If we go past this call, then we
+                   or wait for a while.  If we go past this call, then we
                    waited; in this case we have to re-check if no other
                    thread is inevitable. */
                 inevitable_contention_management(i);
@@ -152,7 +152,7 @@
     assert(_is_tl_registered(tl));
 
     int num = tl->associated_segment_num;
-    if (sync_ctl.in_use[num] == 0) {
+    if (sync_ctl.in_use1[num - 1] == 0) {
         /* fast-path: we can get the same segment number than the one
            we had before.  The value stored in GS is still valid. */
 #ifdef STM_TESTS
@@ -165,10 +165,10 @@
     }
     /* Look for the next free segment.  If there is none, wait for
        the condition variable. */
-    int i;
-    for (i = 0; i < NB_SEGMENTS; i++) {
-        num = (num + 1) % NB_SEGMENTS;
-        if (sync_ctl.in_use[num] == 0) {
+    int retries;
+    for (retries = 0; retries < NB_SEGMENTS; retries++) {
+        num = (num % NB_SEGMENTS) + 1;
+        if (sync_ctl.in_use1[num - 1] == 0) {
             /* we're getting 'num', a different number. */
             dprintf(("acquired different segment: %d->%d\n", tl->associated_segment_num, num));
             tl->associated_segment_num = num;
@@ -184,7 +184,7 @@
     return false;
 
  got_num:
-    sync_ctl.in_use[num] = 1;
+    sync_ctl.in_use1[num - 1] = 1;
     assert(STM_SEGMENT->segment_num == num);
     assert(STM_SEGMENT->running_thread == NULL);
     STM_SEGMENT->running_thread = tl;
@@ -208,8 +208,8 @@
     assert(STM_SEGMENT->running_thread == tl);
     STM_SEGMENT->running_thread = NULL;
 
-    assert(sync_ctl.in_use[tl->associated_segment_num] == 1);
-    sync_ctl.in_use[tl->associated_segment_num] = 0;
+    assert(sync_ctl.in_use1[tl->associated_segment_num - 1] == 1);
+    sync_ctl.in_use1[tl->associated_segment_num - 1] = 0;
 }
 
 __attribute__((unused))
@@ -221,7 +221,7 @@
 bool _stm_in_transaction(stm_thread_local_t *tl)
 {
     int num = tl->associated_segment_num;
-    assert(num < NB_SEGMENTS);
+    assert(1 <= num && num <= NB_SEGMENTS);
     return get_segment(num)->running_thread == tl;
 }
 
@@ -260,12 +260,15 @@
 {
     assert(_safe_points_requested == false);
     assert((_safe_points_requested = true, 1));
+    assert(_has_mutex());
 
     long i;
-    for (i = 0; i < NB_SEGMENTS; i++) {
+    for (i = 1; i <= NB_SEGMENTS; i++) {
         if (get_segment(i)->nursery_end == NURSERY_END)
             get_segment(i)->nursery_end = NSE_SIGPAUSE;
     }
+    assert(!pause_signalled);
+    pause_signalled = true;
 }
 
 static inline long count_other_threads_sp_running(void)
@@ -276,7 +279,7 @@
     long result = 0;
     int my_num = STM_SEGMENT->segment_num;
 
-    for (i = 0; i < NB_SEGMENTS; i++) {
+    for (i = 1; i <= NB_SEGMENTS; i++) {
         if (i != my_num && get_priv_segment(i)->safe_point == SP_RUNNING) {
             assert(get_segment(i)->nursery_end <= _STM_NSE_SIGNAL_MAX);
             result++;
@@ -287,11 +290,13 @@
 
 static void remove_requests_for_safe_point(void)
 {
+    assert(pause_signalled);
+    pause_signalled = false;
     assert(_safe_points_requested == true);
     assert((_safe_points_requested = false, 1));
 
     long i;
-    for (i = 0; i < NB_SEGMENTS; i++) {
+    for (i = 1; i <= NB_SEGMENTS; i++) {
         assert(get_segment(i)->nursery_end != NURSERY_END);
         if (get_segment(i)->nursery_end == NSE_SIGPAUSE)
             get_segment(i)->nursery_end = NURSERY_END;
diff --git a/c7/stm/sync.h b/c7/stm/sync.h
--- a/c7/stm/sync.h
+++ b/c7/stm/sync.h
@@ -30,3 +30,5 @@
 
 static void wait_for_end_of_inevitable_transaction(bool can_abort);
 static void synchronize_all_threads(void);
+
+static bool pause_signalled;
diff --git a/c7/stm/weakref.c b/c7/stm/weakref.c
--- a/c7/stm/weakref.c
+++ b/c7/stm/weakref.c
@@ -32,23 +32,18 @@
     ssize_t size = 16;
 
     stm_char *point_to_loc = (stm_char*)WEAKREF_PTR(weakref, size);
-    if (flag_page_private[(uintptr_t)point_to_loc / 4096UL] == PRIVATE_PAGE) {
-        long i;
-        for (i = 0; i < NB_SEGMENTS; i++) {
-            char *base = get_segment_base(i);   /* two different segments */
 
-            object_t ** ref_loc = (object_t **)REAL_ADDRESS(base, point_to_loc);
-            *ref_loc = value;
-        }
-    }
-    else {
-        *WEAKREF_PTR(weakref, size) = value;
+    long i;
+    for (i = 1; i <= NB_SEGMENTS; i++) {
+        char *base = get_segment_base(i);
+        object_t ** ref_loc = (object_t **)REAL_ADDRESS(base, point_to_loc);
+        *ref_loc = value;
     }
 }
 
 /***** Minor collection *****/
 
-static void stm_move_young_weakrefs()
+static void stm_move_young_weakrefs(void)
 {
     /* The code relies on the fact that no weakref can be an old object
        weakly pointing to a young object.  Indeed, weakrefs are immutable
@@ -115,7 +110,7 @@
 static void stm_visit_old_weakrefs(void)
 {
     long i;
-    for (i = 0; i < NB_SEGMENTS; i++) {
+    for (i = 1; i <= NB_SEGMENTS; i++) {
         struct stm_priv_segment_info_s *pseg = get_priv_segment(i);
         struct list_s *lst;
 
diff --git a/c7/stmgc.h b/c7/stmgc.h
--- a/c7/stmgc.h
+++ b/c7/stmgc.h
@@ -87,7 +87,7 @@
 #include <stdbool.h>
 bool _stm_was_read(object_t *obj);
 bool _stm_was_written(object_t *obj);
-uint8_t _stm_get_page_flag(uintptr_t index);
+uintptr_t _stm_get_private_page(uintptr_t pagenum);
 bool _stm_in_transaction(stm_thread_local_t *tl);
 char *_stm_get_segment_base(long index);
 void _stm_test_switch(stm_thread_local_t *tl);
@@ -107,6 +107,8 @@
 object_t *_stm_enum_modified_old_objects(long index);
 object_t *_stm_enum_objects_pointing_to_nursery(long index);
 uint64_t _stm_total_allocated(void);
+void _stm_mutex_pages_lock(void);
+void _stm_mutex_pages_unlock(void);
 #endif
 
 #define _STM_GCFLAG_WRITE_BARRIER      0x01
@@ -127,6 +129,14 @@
 
 /* ==================== PUBLIC API ==================== */
 
+/* Number of segments (i.e. how many transactions can be executed in
+   parallel, in maximum).  If you try to start transactions in more
+   threads than the number of segments, it will block, waiting for the
+   next segment to become free.
+*/
+#define STM_NB_SEGMENTS    4
+
+
 /* Structure of objects
    --------------------
 
diff --git a/c7/test/support.py b/c7/test/support.py
--- a/c7/test/support.py
+++ b/c7/test/support.py
@@ -9,6 +9,7 @@
 typedef ... object_t;
 typedef ... stm_jmpbuf_t;
 #define SIZEOF_MYOBJ ...
+#define STM_NB_SEGMENTS ...
 #define _STM_FAST_ALLOC ...
 #define _STM_GCFLAG_WRITE_BARRIER ...
 
@@ -46,7 +47,7 @@
 char *_stm_get_segment_base(long index);
 bool _stm_in_transaction(stm_thread_local_t *tl);
 void _stm_test_switch(stm_thread_local_t *tl);
-uint8_t _stm_get_page_flag(uintptr_t index);
+uintptr_t _stm_get_private_page(uintptr_t pagenum);
 int _stm_get_flags(object_t *obj);
 
 void _stm_start_transaction(stm_thread_local_t *tl, stm_jmpbuf_t *jmpbuf);
@@ -87,6 +88,8 @@
 
 void stm_collect(long level);
 uint64_t _stm_total_allocated(void);
+void _stm_mutex_pages_lock(void);
+void _stm_mutex_pages_unlock(void);
 
 long stm_identityhash(object_t *obj);
 long stm_id(object_t *obj);
@@ -98,8 +101,6 @@
 
 
 GC_N_SMALL_REQUESTS = 36      # from gcpage.c
-SHARED_PAGE         = 1       # from pages.h
-PRIVATE_PAGE        = 3       # from pages.h
 LARGE_MALLOC_OVERHEAD = 16    # from largemalloc.h
 
 lib = ffi.verify('''
@@ -262,6 +263,7 @@
 HDR = lib.SIZEOF_MYOBJ
 assert HDR == 8
 GCFLAG_WRITE_BARRIER = lib._STM_GCFLAG_WRITE_BARRIER
+NB_SEGMENTS = lib.STM_NB_SEGMENTS
 
 
 class Conflict(Exception):
@@ -361,8 +363,8 @@
 def stm_major_collect():
     lib.stm_collect(1)
 
-def stm_get_page_flag(pagenum):
-    return lib._stm_get_page_flag(pagenum)
+def stm_get_private_page(pagenum):
+    return lib._stm_get_private_page(pagenum)
 
 def stm_get_obj_size(o):
     return lib.stmcb_size_rounded_up(stm_get_real_address(o))
@@ -402,10 +404,11 @@
 
 
 class BaseTest(object):
+    NB_THREADS = 2
 
     def setup_method(self, meth):
         lib.stm_setup()
-        self.tls = [_allocate_thread_local(), _allocate_thread_local()]
+        self.tls = [_allocate_thread_local() for i in range(self.NB_THREADS)]
         self.current_thread = 0
 
     def teardown_method(self, meth):
diff --git a/c7/test/test_gcpage.py b/c7/test/test_gcpage.py
--- a/c7/test/test_gcpage.py
+++ b/c7/test/test_gcpage.py
@@ -24,8 +24,8 @@
         new = self.pop_root()
 
         assert len(stm_get_obj_pages(new)) == 2
-        assert ([stm_get_page_flag(p) for p in stm_get_obj_pages(new)]
-                == [SHARED_PAGE]*2)
+        assert ([stm_get_private_page(p) for p in stm_get_obj_pages(new)]
+                == [0, 0])
 
         assert not is_in_nursery(new)
         stm_write(new)
@@ -33,11 +33,11 @@
 
         # now proceed to write into the object in a new transaction
         self.start_transaction()
-        assert ([stm_get_page_flag(p) for p in stm_get_obj_pages(new)]
-                == [SHARED_PAGE]*2)
+        assert ([stm_get_private_page(p) for p in stm_get_obj_pages(new)]
+                == [0, 0])
         stm_write(new)
-        assert ([stm_get_page_flag(p) for p in stm_get_obj_pages(new)]
-                == [PRIVATE_PAGE]*2)
+        assert ([bool(stm_get_private_page(p)) for p in stm_get_obj_pages(new)]
+                == [True, True])
 
         # write to 2nd page of object!!
         wnew = stm_get_real_address(new)
@@ -52,8 +52,8 @@
 
         self.switch(0)
         self.abort_transaction()
-        assert ([stm_get_page_flag(p) for p in stm_get_obj_pages(new)]
-                == [PRIVATE_PAGE]*2)
+        assert ([bool(stm_get_private_page(p)) for p in stm_get_obj_pages(new)]
+                == [True, True])
 
     def test_partial_alloced_pages(self):
         self.start_transaction()
@@ -62,14 +62,14 @@
         stm_minor_collect()
         new = self.pop_root()
 
-        assert stm_get_page_flag(stm_get_obj_pages(new)[0]) == SHARED_PAGE
+        assert stm_get_private_page(stm_get_obj_pages(new)[0]) == 0
         assert stm_get_flags(new) & GCFLAG_WRITE_BARRIER
 
         stm_write(new)
         assert not (stm_get_flags(new) & GCFLAG_WRITE_BARRIER)
 
         self.commit_transaction()
-        assert stm_get_page_flag(stm_get_obj_pages(new)[0]) == SHARED_PAGE
+        assert stm_get_private_page(stm_get_obj_pages(new)[0]) == 0
         assert stm_get_flags(new) & GCFLAG_WRITE_BARRIER
 
         self.start_transaction()
@@ -78,7 +78,7 @@
         stm_minor_collect()
         newer = self.pop_root()
         # 'new' is still in shared_page and committed
-        assert stm_get_page_flag(stm_get_obj_pages(new)[0]) == SHARED_PAGE
+        assert stm_get_private_page(stm_get_obj_pages(new)[0]) == 0
         assert stm_get_flags(new) & GCFLAG_WRITE_BARRIER
         # 'newer' is now part of the SHARED page with 'new', but
         # uncommitted, so no privatization has to take place:
@@ -86,10 +86,10 @@
         assert stm_get_flags(newer) & GCFLAG_WRITE_BARRIER
         stm_write(newer) # does not privatize
         assert not (stm_get_flags(newer) & GCFLAG_WRITE_BARRIER)
-        assert stm_get_page_flag(stm_get_obj_pages(newer)[0]) == SHARED_PAGE
+        assert stm_get_private_page(stm_get_obj_pages(newer)[0]) == 0
         self.commit_transaction()
 
-        assert stm_get_page_flag(stm_get_obj_pages(newer)[0]) == SHARED_PAGE
+        assert stm_get_private_page(stm_get_obj_pages(newer)[0]) == 0
         assert stm_get_flags(newer) & GCFLAG_WRITE_BARRIER
 
     def test_major_collection(self):
@@ -202,7 +202,6 @@
         #
         self.start_transaction()
         stm_major_collect()
-        py.test.skip("XXX implement me")
         assert lib._stm_total_allocated() == 5000 + LMO    # shared again
 
     def test_reshare_if_no_longer_modified_1(self):
diff --git a/c7/test/test_largemalloc.py b/c7/test/test_largemalloc.py
--- a/c7/test/test_largemalloc.py
+++ b/c7/test/test_largemalloc.py
@@ -14,6 +14,7 @@
 
         lib.memset(self.rawmem, 0xcd, self.size)
         lib._stm_largemalloc_init_arena(self.rawmem, self.size)
+        lib._stm_mutex_pages_lock()   # for this file
 
     def test_simple(self):
         d1 = lib._stm_large_malloc(7000)
diff --git a/c7/test/test_random.py b/c7/test/test_random.py
--- a/c7/test/test_random.py
+++ b/c7/test/test_random.py
@@ -54,7 +54,7 @@
     e.g. maintains read/write sets. The state will be
     discarded on abort or pushed to other threads"""
 
-    def __init__(self, start_time):
+    def __init__(self, start_time, thread_num=None):
         self.read_set = set()
         self.write_set = set()
         self.values = {}
@@ -63,6 +63,7 @@
         self.objs_in_conflict = set()
         self.inevitable = False
         self.created_in_this_transaction = set()
+        self.thread_num = thread_num
 
     def get_old_modified(self):
         # returns only the ones that are modified and not from
@@ -74,6 +75,8 @@
         if objs_in_conflict is not None:
             self.objs_in_conflict |= objs_in_conflict
         self._must_abort = True
+        color = "\033[%dm" % (31 + self.thread_num % 6)
+        print >> sys.stderr, color + "# must abort: %r\033[0m" % (objs_in_conflict,)
 
     def check_must_abort(self):
         return self._must_abort
@@ -180,10 +183,10 @@
                     r, int(ffi.cast("uintptr_t", ex.content[r])),
                     stm_get_obj_size(ex.content[r])))
 
-    def start_transaction(self):
+    def start_transaction(self, thread_num):
         assert self.transaction_state is None
         start_time = self.global_state.inc_and_get_global_time()
-        trs = TransactionState(start_time)
+        trs = TransactionState(start_time, thread_num)
         trs.update_from_committed(
             self.global_state.committed_transaction_state)
         self.transaction_state = trs
@@ -291,6 +294,8 @@
             if confl_set:
                 contention_management(trs, other_trs,
                                       objs_in_conflict=confl_set)
+                if trs.check_must_abort():
+                    break
 
         if trs.check_must_abort():
             self.ex.do('# write-read conflict: %s' %
@@ -305,7 +310,7 @@
 
 
 def op_start_transaction(ex, global_state, thread_state):
-    thread_state.start_transaction()
+    thread_state.start_transaction(ex.thread_num)
     #
     ex.do('self.start_transaction()')
     thread_state.reload_roots(ex)
@@ -533,12 +538,13 @@
 
 
 class TestRandom(BaseTest):
+    NB_THREADS = NB_SEGMENTS
 
     def test_fixed_16_bytes_objects(self, seed=1010):
         rnd = random.Random(seed)
 
         N_OBJECTS = 3
-        N_THREADS = 2
+        N_THREADS = self.NB_THREADS
         ex = Exec(self)
         ex.do("################################################################\n"*10)
         ex.do('# initialization')
diff --git a/c7/test/test_weakref.py b/c7/test/test_weakref.py
--- a/c7/test/test_weakref.py
+++ b/c7/test/test_weakref.py
@@ -279,7 +279,7 @@
 
         stm_write(lp0) # privatize page with weakref in it too
 
-        assert stm_get_page_flag(stm_get_obj_pages(lp1)[0]) == PRIVATE_PAGE
+        assert stm_get_private_page(stm_get_obj_pages(lp1)[0]) != 0
         assert stm_get_weakref(lp1) == lp0
 
         self.commit_transaction()
diff --git a/duhton/duhton.c b/duhton/duhton.c
--- a/duhton/duhton.c
+++ b/duhton/duhton.c
@@ -7,7 +7,7 @@
     char *filename = NULL;
     int interactive = 1;
 	int i;
-	int num_threads = DEFAULT_NUM_THREADS;
+	int num_threads = STM_NB_SEGMENTS;
 
 	for (i = 1; i < argc; ++i) {
 		if (strcmp(argv[i], "--help") == 0) {
diff --git a/duhton/duhton.h b/duhton/duhton.h
--- a/duhton/duhton.h
+++ b/duhton/duhton.h
@@ -13,8 +13,6 @@
 #endif
 
 
-#define DEFAULT_NUM_THREADS 2
-
 extern __thread stm_thread_local_t stm_thread_local;
 
 struct DuObject_s {


More information about the pypy-commit mailing list