[pypy-commit] stmgc c7-refactor: In-progress: another refactoring simplifying various things, after discovering that we

arigo noreply at buildbot.pypy.org
Sun Feb 23 22:21:14 CET 2014


Author: Armin Rigo <arigo at tunes.org>
Branch: c7-refactor
Changeset: r817:8ee070f3575c
Date: 2014-02-23 22:21 +0100
http://bitbucket.org/pypy/stmgc/changeset/8ee070f3575c/

Log:	In-progress: another refactoring simplifying various things, after
	discovering that we can after all run a minor collection after each
	transaction... At least running about 30'000 opcodes in PyPy
	consumes typically 400 KB of nursery, and lowering the nursery size
	from 4 MB down to 400 KB has only a 1.5% performance impact.

diff --git a/c7/stm/core.c b/c7/stm/core.c
--- a/c7/stm/core.c
+++ b/c7/stm/core.c
@@ -12,6 +12,9 @@
 void _stm_write_slowpath(object_t *obj)
 {
     assert(_running_transaction());
+    assert(!_is_in_nursery(obj));
+    abort();//...
+#if 0
 
     /* for old objects from the same transaction, we are done now */
     if (obj_from_same_transaction(obj)) {
@@ -51,9 +54,10 @@
 
     /* claim the write-lock for this object */
  retry:;
-    uintptr_t lock_idx = (((uintptr_t)obj) >> 4) - READMARKER_START;
+    uintptr_t lock_idx = (((uintptr_t)obj) >> 4) - WRITELOCK_START;
     uint8_t lock_num = STM_PSEGMENT->write_lock_num;
     uint8_t prev_owner;
+    assert((intptr_t)lock_idx >= 0);
     prev_owner = __sync_val_compare_and_swap(&write_locks[lock_idx],
                                              0, lock_num);
 
@@ -73,6 +77,7 @@
     assert(!(obj->stm_flags & GCFLAG_WRITE_BARRIER_CALLED));
     obj->stm_flags |= GCFLAG_WRITE_BARRIER_CALLED;
     LIST_APPEND(STM_PSEGMENT->modified_objects, obj);
+#endif
 }
 
 static void reset_transaction_read_version(void)
@@ -137,13 +142,11 @@
         reset_transaction_read_version();
     }
 
-    STM_PSEGMENT->min_read_version_outside_nursery =
-        STM_SEGMENT->transaction_read_version;
+    assert(list_is_empty(STM_PSEGMENT->modified_old_objects));
 
-    assert(list_is_empty(STM_PSEGMENT->modified_objects));
-    assert(list_is_empty(STM_PSEGMENT->creation_markers));
-
-    align_nursery_at_transaction_start();
+#ifdef STM_TESTS
+    check_nursery_at_transaction_start();
+#endif
 }
 
 
@@ -158,8 +161,6 @@
     long remote_num = 1 - STM_SEGMENT->segment_num;
     char *remote_base = get_segment_base(remote_num);
     uint8_t remote_version = get_segment(remote_num)->transaction_read_version;
-    uint8_t remote_min_outside_nursery =
-        get_priv_segment(remote_num)->min_read_version_outside_nursery;
 
     switch (get_priv_segment(remote_num)->transaction_state) {
     case TS_NONE:
@@ -169,11 +170,10 @@
     }
 
     LIST_FOREACH_R(
-        STM_PSEGMENT->modified_objects,
+        STM_PSEGMENT->modified_old_objects,
         object_t * /*item*/,
         ({
-            if (was_read_remote(remote_base, item, remote_version,
-                                remote_min_outside_nursery)) {
+            if (was_read_remote(remote_base, item, remote_version)) {
                 /* A write-read conflict! */
                 contention_management(remote_num, false);
 
@@ -196,25 +196,24 @@
          get_priv_segment(remote_num)->transaction_state == TS_INEVITABLE);
 
     LIST_FOREACH_R(
-        STM_PSEGMENT->modified_objects,
+        STM_PSEGMENT->modified_old_objects,
         object_t * /*item*/,
         ({
             if (remote_active) {
                 assert(!was_read_remote(remote_base, item,
-                    get_segment(remote_num)->transaction_read_version,
-                    get_priv_segment(remote_num)->
-                        min_read_version_outside_nursery));
+                    get_segment(remote_num)->transaction_read_version));
             }
 
             /* clear the write-lock (note that this runs with all other
                threads paused, so no need to be careful about ordering) */
-            uintptr_t lock_idx = (((uintptr_t)item) >> 4) - READMARKER_START;
+            uintptr_t lock_idx = (((uintptr_t)item) >> 4) - WRITELOCK_START;
+            assert((intptr_t)lock_idx >= 0);
             assert(write_locks[lock_idx] == STM_PSEGMENT->write_lock_num);
             write_locks[lock_idx] = 0;
 
-            /* remove again the WRITE_BARRIER_CALLED flag */
-            assert(item->stm_flags & GCFLAG_WRITE_BARRIER_CALLED);
-            item->stm_flags &= ~GCFLAG_WRITE_BARRIER_CALLED;
+            /* set again the WRITE_BARRIER flag */
+            assert((item->stm_flags & GCFLAG_WRITE_BARRIER) == 0);
+            item->stm_flags |= GCFLAG_WRITE_BARRIER;
 
             /* copy the modified object to the other segment */
             char *src = REAL_ADDRESS(local_base, item);
@@ -223,7 +222,7 @@
             memcpy(dst, src, size);
         }));
 
-    list_clear(STM_PSEGMENT->modified_objects);
+    list_clear(STM_PSEGMENT->modified_old_objects);
 }
 
 static void _finish_transaction(void)
@@ -232,29 +231,23 @@
     release_thread_segment(tl);
     STM_PSEGMENT->safe_point = SP_NO_TRANSACTION;
     STM_PSEGMENT->transaction_state = TS_NONE;
-    list_clear(STM_PSEGMENT->old_objects_pointing_to_young);
+    if (STM_PSEGMENT->overflow_objects_pointing_to_nursery != NULL) {
+        list_free(STM_PSEGMENT->overflow_objects_pointing_to_nursery);
+        STM_PSEGMENT->overflow_objects_pointing_to_nursery = NULL;
+    }
 }
 
 void stm_commit_transaction(void)
 {
+    minor_collection();
+
     mutex_lock();
 
     assert(STM_PSEGMENT->safe_point = SP_RUNNING);
     STM_PSEGMENT->safe_point = SP_SAFE_POINT_CAN_COLLECT;
 
  restart:
-    switch (STM_PSEGMENT->transaction_state) {
-
-    case TS_REGULAR:
-    case TS_INEVITABLE:
-        break;
-
-    case TS_MUST_ABORT:
-        abort_with_mutex();
-
-    default:
-        assert(!"commit: bad transaction_state");
-    }
+    abort_if_needed();
 
     /* wait until the other thread is at a safe-point */
     if (!try_wait_for_other_safe_points(SP_SAFE_POINT_CANNOT_COLLECT))
@@ -275,10 +268,11 @@
     /* copy modified object versions to other threads */
     push_modified_to_other_segments();
 
-    /* reset the creation markers, and if necessary (i.e. if the page the
-       data is on is not SHARED) copy the data to other threads.  The
-       hope is that it's rarely necessary. */
-    reset_all_creation_markers_and_push_created_data();
+    /* update 'overflow_number' if needed */
+    if (STM_PSEGMENT->overflow_objects_pointing_to_nursery != NULL) {
+        highest_overflow_number += GCFLAG_OVERFLOW_NUMBER_bit0;
+        STM_PSEGMENT->overflow_number = highest_overflow_number;
+    }
 
     /* done */
     _finish_transaction();
@@ -297,6 +291,8 @@
 
 static void reset_modified_from_other_segments(void)
 {
+    abort();//...
+#if 0
     /* pull the right versions from other threads in order
        to reset our pages as part of an abort */
     long remote_num = 1 - STM_SEGMENT->segment_num;
@@ -304,7 +300,7 @@
     char *remote_base = get_segment_base(remote_num);
 
     LIST_FOREACH_R(
-        STM_PSEGMENT->modified_objects,
+        STM_PSEGMENT->modified_old_objects,
         object_t * /*item*/,
         ({
             /* all objects in 'modified_objects' have this flag */
@@ -331,12 +327,14 @@
             write_fence();
 
             /* clear the write-lock */
-            uintptr_t lock_idx = (((uintptr_t)item) >> 4) - READMARKER_START;
+            uintptr_t lock_idx = (((uintptr_t)item) >> 4) - WRITELOCK_START;
+            assert((intptr_t)lock_idx >= 0);
             assert(write_locks[lock_idx]);
             write_locks[lock_idx] = 0;
         }));
 
     list_clear(STM_PSEGMENT->modified_objects);
+#endif
 }
 
 static void abort_with_mutex(void)
@@ -356,8 +354,6 @@
     /* reset all the modified objects (incl. re-adding GCFLAG_WRITE_BARRIER) */
     reset_modified_from_other_segments();
 
-    reset_all_creation_markers();
-
     stm_jmpbuf_t *jmpbuf_ptr = STM_SEGMENT->jmpbuf_ptr;
     stm_thread_local_t *tl = STM_SEGMENT->running_thread;
     tl->shadowstack = STM_PSEGMENT->shadowstack_at_start_of_transaction;
diff --git a/c7/stm/core.h b/c7/stm/core.h
--- a/c7/stm/core.h
+++ b/c7/stm/core.h
@@ -11,8 +11,8 @@
 
 #define NB_PAGES            (1500*256)    // 1500MB
 #define NB_SEGMENTS         2
+#define NB_SEGMENTS_MAX     240    /* don't increase NB_SEGMENTS past this */
 #define MAP_PAGES_FLAGS     (MAP_SHARED | MAP_ANONYMOUS | MAP_NORESERVE)
-#define LARGE_OBJECT_WORDS  36
 #define NB_NURSERY_PAGES    1024          // 4MB
 
 #define TOTAL_MEMORY          (NB_PAGES * 4096UL * NB_SEGMENTS)
@@ -25,23 +25,31 @@
 #define FIRST_READMARKER_PAGE (READMARKER_START / 4096UL)
 #define NB_READMARKER_PAGES   (FIRST_OBJECT_PAGE - FIRST_READMARKER_PAGE)
 
-#define CREATMARKER_START     ((FIRST_OBJECT_PAGE * 4096UL) >> 8)
-#define FIRST_CREATMARKER_PAGE (CREATMARKER_START / 4096UL)
+#define WRITELOCK_START       ((END_NURSERY_PAGE * 4096UL) >> 4)
+#define WRITELOCK_END         READMARKER_END
 
 
-enum {
-    /* this flag is not set on most objects.  when stm_write() is called
-       on an object that is not from the current transaction, then
-       _stm_write_slowpath() is called, and then the flag is set to
-       say "called once already, no need to call again". */
-    GCFLAG_WRITE_BARRIER_CALLED = _STM_GCFLAG_WRITE_BARRIER_CALLED,
-    /* allocated by gcpage.c in uniformly-sized pages of small objects */
+enum /* stm_flags */ {
+    /* This flag is set on non-nursery objects.  It forces stm_write()
+       to call _stm_write_slowpath().
+    */
+    GCFLAG_WRITE_BARRIER = _STM_GCFLAG_WRITE_BARRIER,
+
+    /* This flag is set by gcpage.c for all objects living in
+       uniformly-sized pages of small objects.
+    */
     GCFLAG_SMALL_UNIFORM = 0x02,
+
+    /* All remaining bits of the 32-bit 'stm_flags' field are taken by
+       the "overflow number".  This is a number that identifies the
+       "overflow objects" from the current transaction among all old
+       objects.  More precisely, overflow objects are objects from the
+       current transaction that have been flushed out of the nursery,
+       which occurs if the same transaction allocates too many objects.
+    */
+    GCFLAG_OVERFLOW_NUMBER_bit0 = 0x04   /* must be last */
 };
 
-#define CROSS_PAGE_BOUNDARY(start, stop)                                \
-    (((uintptr_t)(start)) / 4096UL != ((uintptr_t)(stop)) / 4096UL)
-
 
 /************************************************************/
 
@@ -52,44 +60,61 @@
 
 struct stm_priv_segment_info_s {
     struct stm_segment_info_s pub;
-    struct list_s *old_objects_pointing_to_young;
-    struct list_s *modified_objects;
-    struct list_s *creation_markers;
+
+    /* List of overflowed objects (from the same transaction but outside
+       the nursery) on which the write-barrier was triggered, so that
+       they likely contain a pointer to a nursery object */
+    struct list_s *overflow_objects_pointing_to_nursery;
+
+    /* List of old objects (older than the current transaction) that the
+       current transaction attempts to modify */
+    struct list_s *modified_old_objects;
+
+    /* Start time: to know approximately for how long a transaction has
+       been running, in contention management */
     uint64_t start_time;
+
+    /* This is the number stored in the overflowed objects (a multiple of
+       GCFLAG_OVERFLOW_NUMBER_bit0).  It is incremented when the
+       transaction is done, but only if we actually overflowed any
+       object; otherwise, no object has got this number. */
+    uint32_t overflow_number;
+
+    /* The marker stored in the global 'write_locks' array to mean
+       "this segment has modified this old object". */
     uint8_t write_lock_num;
-    uint8_t safe_point;         /* one of the SP_xxx constants */
-    uint8_t transaction_state;  /* one of the TS_xxx constants */
-    uint8_t min_read_version_outside_nursery;   /* see was_read_remote() */
-    uintptr_t real_nursery_section_end;
+
+    /* The thread's safe-point state, one of the SP_xxx constants */
+    uint8_t safe_point;
+
+    /* The transaction status, one of the TS_xxx constants */
+    uint8_t transaction_state;
+
+    /* In case of abort, we restore the 'shadowstack' field. */
     object_t **shadowstack_at_start_of_transaction;
 };
 
-enum {
+enum /* safe_point */ {
     SP_NO_TRANSACTION=0,
     SP_RUNNING,
     SP_SAFE_POINT_CANNOT_COLLECT,
     SP_SAFE_POINT_CAN_COLLECT,
 };
-enum {
+enum /* transaction_state */ {
     TS_NONE=0,
     TS_REGULAR,
     TS_INEVITABLE,
     TS_MUST_ABORT,
 };
-enum {   /* for stm_creation_marker_t */
-    CM_NOT_CURRENT_TRANSACTION             = 0x00,
-    CM_CURRENT_TRANSACTION_OUTSIDE_NURSERY = 0x01,
-    CM_CURRENT_TRANSACTION_IN_NURSERY      = 0xff,
-};
 
 static char *stm_object_pages;
-static stm_thread_local_t *stm_thread_locals = NULL;
+static stm_thread_local_t *stm_all_thread_locals = NULL;
 
 #ifdef STM_TESTS
 static char *stm_other_pages;
 #endif
 
-static uint8_t write_locks[READMARKER_END - READMARKER_START];
+static uint8_t write_locks[WRITELOCK_END - WRITELOCK_START];
 
 
 #define REAL_ADDRESS(segment_base, src)   ((segment_base) + (uintptr_t)(src))
@@ -113,14 +138,18 @@
 static bool _is_tl_registered(stm_thread_local_t *tl);
 static bool _running_transaction(void);
 
-static inline bool obj_from_same_transaction(object_t *obj) {
-    return ((stm_creation_marker_t *)(((uintptr_t)obj) >> 8))->cm !=
-        CM_NOT_CURRENT_TRANSACTION;
-}
-
 static void teardown_core(void);
 static void abort_with_mutex(void) __attribute__((noreturn));
 
+static inline bool was_read_remote(char *base, object_t *obj,
+                                   uint8_t other_transaction_read_version)
+{
+    uint8_t rm = ((struct stm_read_marker_s *)
+                  (base + (((uintptr_t)obj) >> 4)))->rm;
+    assert(rm <= other_transaction_read_version);
+    return rm == other_transaction_read_version;
+}
+
 static inline void _duck(void) {
     /* put a call to _duck() between two instructions that set 0 into
        a %gs-prefixed address and that may otherwise be replaced with
@@ -129,3 +158,17 @@
        llvmfix/no-memset-creation-with-addrspace.diff. */
     asm("/* workaround for llvm bug */");
 }
+
+static inline void abort_if_needed(void) {
+    switch (STM_PSEGMENT->transaction_state) {
+    case TS_REGULAR:
+    case TS_INEVITABLE:
+        break;
+
+    case TS_MUST_ABORT:
+        abort_with_mutex();
+
+    default:
+        assert(!"commit: bad transaction_state");
+    }
+}
diff --git a/c7/stm/gcpage.c b/c7/stm/gcpage.c
--- a/c7/stm/gcpage.c
+++ b/c7/stm/gcpage.c
@@ -19,8 +19,7 @@
 
 static void teardown_gcpage(void)
 {
-    memset(small_alloc_shared, 0, sizeof(small_alloc_shared));
-    memset(small_alloc_privtz, 0, sizeof(small_alloc_privtz));
+    memset(small_alloc, 0, sizeof(small_alloc));
     free_uniform_pages = NULL;
 }
 
@@ -56,11 +55,10 @@
     return;
 
  out_of_memory:
-    stm_fatalerror("out of memory!\n");
+    stm_fatalerror("out of memory!\n");   /* XXX */
 }
 
-static char *_allocate_small_slowpath(
-        struct small_alloc_s small_alloc[], uint64_t size)
+static char *_allocate_small_slowpath(uint64_t size)
 {
     /* not thread-safe!  Use only when holding the mutex */
     assert(_has_mutex());
@@ -72,8 +70,10 @@
 }
 
 
+#if 0
 static char *allocate_outside_nursery_large(uint64_t size)
 {
+    abort(); //XXX review
     /* not thread-safe!  Use only when holding the mutex */
     assert(_has_mutex());
 
@@ -92,11 +92,9 @@
         setup_N_pages(uninitialized_page_start, npages);
         uninitialized_page_start += npages * 4096UL;
     }
-
-    assert(get_single_creation_marker((stm_char *)(addr - stm_object_pages))
-           == 0);
     return addr;
 }
+#endif
 
 object_t *_stm_allocate_old(ssize_t size_rounded_up)
 {
diff --git a/c7/stm/gcpage.h b/c7/stm/gcpage.h
--- a/c7/stm/gcpage.h
+++ b/c7/stm/gcpage.h
@@ -22,35 +22,17 @@
                             contiguous range of unallocated objs */
 };
 
-/* For each small request size, we have three independent chained lists
-   of address ranges:
-
-   - 'small_alloc_shared': ranges are within pages that are likely to be
-     shared.  We don't know for sure, because pages can be privatized
-     by normal run of stm_write().
-
-   - 'small_alloc_sh_old': moved from 'small_alloc_shared' when we're
-     looking for a range with the creation_marker set; this collects
-     the unsuitable ranges, i.e. the ones with already at least one
-     object and no creation marker.
-
-   - 'small_alloc_privtz': ranges are within pages that are privatized.
-*/
-static struct small_alloc_s small_alloc_shared[GC_N_SMALL_REQUESTS];
-static struct small_alloc_s small_alloc_sh_old[GC_N_SMALL_REQUESTS];
-static struct small_alloc_s small_alloc_privtz[GC_N_SMALL_REQUESTS];
+static struct small_alloc_s small_alloc[GC_N_SMALL_REQUESTS];
 static char *free_uniform_pages;
 
 static void setup_gcpage(void);
 static void teardown_gcpage(void);
-static char *allocate_outside_nursery_large(uint64_t size);
+//static char *allocate_outside_nursery_large(uint64_t size);
 
 
-static char *_allocate_small_slowpath(
-        struct small_alloc_s small_alloc[], uint64_t size);
+static char *_allocate_small_slowpath(uint64_t size);
 
-static inline char *allocate_outside_nursery_small(
-        struct small_alloc_s small_alloc[], uint64_t size)
+static inline char *allocate_outside_nursery_small(uint64_t size)
 {
     uint64_t index = size / 8;
     OPT_ASSERT(2 <= index);
@@ -58,7 +40,7 @@
 
     char *result = small_alloc[index].next_object;
     if (result == NULL)
-        return _allocate_small_slowpath(small_alloc, size);
+        return _allocate_small_slowpath(size);
 
     char *following;
     if (small_alloc[index].range_last == result) {
diff --git a/c7/stm/misc.c b/c7/stm/misc.c
--- a/c7/stm/misc.c
+++ b/c7/stm/misc.c
@@ -38,35 +38,28 @@
 bool _stm_was_read(object_t *obj)
 {
     return was_read_remote(STM_SEGMENT->segment_base, obj,
-                           STM_SEGMENT->transaction_read_version,
-                           STM_PSEGMENT->min_read_version_outside_nursery);
+                           STM_SEGMENT->transaction_read_version);
 }
 
 bool _stm_was_written(object_t *obj)
 {
-    return !!((((stm_creation_marker_t *)(((uintptr_t)obj) >> 8))->cm |
-               obj->stm_flags) & _STM_GCFLAG_WRITE_BARRIER_CALLED);
-}
-
-uint8_t _stm_creation_marker(object_t *obj)
-{
-    return ((stm_creation_marker_t *)(((uintptr_t)obj) >> 8))->cm;
+    return (obj->stm_flags & _STM_GCFLAG_WRITE_BARRIER) == 0;
 }
 
 #ifdef STM_TESTS
-object_t *_stm_enum_old_objects_pointing_to_young(void)
+object_t *_stm_enum_overflow_objects_pointing_to_nursery(void)
 {
     static long index = 0;
-    struct list_s *lst = STM_PSEGMENT->old_objects_pointing_to_young;
+    struct list_s *lst = STM_PSEGMENT->overflow_objects_pointing_to_nursery;
     if (index < list_count(lst))
         return (object_t *)list_item(lst, index++);
     index = 0;
     return (object_t *)-1;
 }
-object_t *_stm_enum_modified_objects(void)
+object_t *_stm_enum_modified_old_objects(void)
 {
     static long index = 0;
-    struct list_s *lst = STM_PSEGMENT->modified_objects;
+    struct list_s *lst = STM_PSEGMENT->modified_old_objects;
     if (index < list_count(lst))
         return (object_t *)list_item(lst, index++);
     index = 0;
diff --git a/c7/stm/nursery.c b/c7/stm/nursery.c
--- a/c7/stm/nursery.c
+++ b/c7/stm/nursery.c
@@ -4,61 +4,38 @@
 
 /************************************************************/
 
+/* xxx later: divide the nursery into sections, and zero them
+   incrementally.  For now we avoid the mess of maintaining a
+   description of which parts of the nursery are already zeroed
+   and which ones are not (caused by the fact that each
+   transaction fills up a different amount).
+*/
+
 #define NURSERY_START         (FIRST_NURSERY_PAGE * 4096UL)
 #define NURSERY_SIZE          (NB_NURSERY_PAGES * 4096UL)
+#define NURSERY_END           (NURSERY_START + NURSERY_SIZE)
 
-/* an object larger than LARGE_OBJECT will never be allocated in
-   the nursery. */
-#define LARGE_OBJECT          (65*1024)
-
-/* the nursery is divided in "sections" this big.  Each section is
-   allocated to a single running thread. */
-#define NURSERY_SECTION_SIZE  (128*1024)
-
-/* if objects are larger than this limit but smaller than LARGE_OBJECT,
-   then they might be allocted outside sections but still in the nursery. */
-#define MEDIUM_OBJECT         (6*1024)
-
-/* size in bytes of the "line".  Should be equal to the line used by
-   stm_creation_marker_t. */
-#define NURSERY_LINE          256
-
-/************************************************************/
-
-
-static union {
-    struct {
-        uint64_t used;    /* number of bytes from the nursery used so far */
-        uint64_t initial_value_of_used;
-    };
-    char reserved[64];
-} nursery_ctl __attribute__((aligned(64)));
-
-static struct list_s *old_objects_pointing_to_young;
+static uintptr_t _stm_nursery_start;
+uintptr_t _stm_nursery_end;
 
 
 /************************************************************/
 
 static void setup_nursery(void)
 {
-    assert(NURSERY_LINE == (1 << 8));  /* from stm_creation_marker_t */
-    assert((NURSERY_SECTION_SIZE % NURSERY_LINE) == 0);
-    assert(MEDIUM_OBJECT < LARGE_OBJECT);
-    assert(LARGE_OBJECT < NURSERY_SECTION_SIZE);
-    nursery_ctl.used = 0;
-    old_objects_pointing_to_young = list_create();
+    assert(_STM_FAST_ALLOC <= NURSERY_SIZE);
+    _stm_nursery_start = NURSERY_START;
+    _stm_nursery_end   = NURSERY_END;
 }
 
 static void teardown_nursery(void)
 {
-    list_free(old_objects_pointing_to_young);
-    nursery_ctl.initial_value_of_used = 0;
 }
 
 static inline bool _is_in_nursery(object_t *obj)
 {
     assert((uintptr_t)obj >= NURSERY_START);
-    return (uintptr_t)obj < NURSERY_START + NURSERY_SIZE;
+    return (uintptr_t)obj < NURSERY_END;
 }
 
 bool _stm_in_nursery(object_t *obj)
@@ -66,29 +43,12 @@
     return _is_in_nursery(obj);
 }
 
+#if 0
 static bool _is_young(object_t *obj)
 {
     return _is_in_nursery(obj);    /* for now */
 }
-
-static inline bool was_read_remote(char *base, object_t *obj,
-                                   uint8_t other_transaction_read_version,
-                                   uint8_t min_read_version_outside_nursery)
-{
-    uint8_t rm = ((struct stm_read_marker_s *)
-                  (base + (((uintptr_t)obj) >> 4)))->rm;
-
-    assert(min_read_version_outside_nursery <=
-           other_transaction_read_version);
-    assert(rm <= other_transaction_read_version);
-
-    if (_is_in_nursery(obj)) {
-        return rm == other_transaction_read_version;
-    }
-    else {
-        return rm >= min_read_version_outside_nursery;
-    }
-}
+#endif
 
 
 /************************************************************/
@@ -110,8 +70,10 @@
     }
 }
 
+#if 0
 static void minor_trace_if_young(object_t **pobj)
 {
+    abort(); //...
     /* takes a normal pointer to a thread-local pointer to an object */
     object_t *obj = *pobj;
     if (obj == NULL)
@@ -145,7 +107,7 @@
        which has a granularity of 256 bytes.
     */
     size_t size = stmcb_size_rounded_up((struct object_s *)realobj);
-    uintptr_t lock_idx = (((uintptr_t)obj) >> 4) - READMARKER_START;
+    uintptr_t lock_idx = (((uintptr_t)obj) >> 4) - WRITELOCK_START;
     uint8_t write_lock = write_locks[lock_idx];
     object_t *nobj;
     long i;
@@ -183,7 +145,7 @@
             uintptr_t lastpage= (dataofs + size - 1) / 4096UL;
             pages_privatize(pagenum, lastpage - pagenum + 1, false);
 
-            lock_idx = (dataofs >> 4) - READMARKER_START;
+            lock_idx = (dataofs >> 4) - WRITELOCK_START;
             assert(write_locks[lock_idx] == 0);
             write_locks[lock_idx] = write_lock;
 
@@ -238,7 +200,7 @@
 
 static void collect_roots_in_nursery(void)
 {
-    stm_thread_local_t *tl = stm_thread_locals;
+    stm_thread_local_t *tl = stm_all_thread_locals;
     do {
         object_t **current = tl->shadowstack;
         object_t **base = tl->shadowstack_base;
@@ -246,7 +208,7 @@
             minor_trace_if_young(current);
         }
         tl = tl->next;
-    } while (tl != stm_thread_locals);
+    } while (tl != stm_all_thread_locals);
 }
 
 static void trace_and_drag_out_of_nursery(object_t *obj)
@@ -256,7 +218,7 @@
         struct object_s *realobj =
             (struct object_s *)REAL_ADDRESS(get_segment_base(i), obj);
 
-        realobj->stm_flags &= ~GCFLAG_WRITE_BARRIER_CALLED;
+        realobj->stm_flags |= GCFLAG_WRITE_BARRIER;
 
         stmcb_trace((struct object_s *)realobj, &minor_trace_if_young);
 
@@ -278,8 +240,9 @@
            don't, it's because the same object was stored in several
            segment's old_objects_pointing_to_young.  It's fine to
            ignore duplicates. */
-        if ((obj->stm_flags & GCFLAG_WRITE_BARRIER_CALLED) == 0)
-            continue;
+        abort();//...
+        //if ((obj->stm_flags & GCFLAG_WRITE_BARRIER_CALLED) == 0)
+        //    continue;
 
         /* The flag GCFLAG_WRITE_BARRIER_CALLED is going to be removed:
            no live object should have this flag set after a nursery
@@ -294,13 +257,10 @@
 
 static void reset_nursery(void)
 {
+    abort();//...
     /* reset the global amount-of-nursery-used-so-far */
     nursery_ctl.used = nursery_ctl.initial_value_of_used;
 
-    /* reset the write locks */
-    memset(write_locks + ((NURSERY_START >> 4) - READMARKER_START),
-           0, NURSERY_SIZE >> 4);
-
     long i;
     for (i = 0; i < NB_SEGMENTS; i++) {
         struct stm_priv_segment_info_s *other_pseg = get_priv_segment(i);
@@ -321,9 +281,10 @@
         }
         else if (other_pseg->pub.transaction_read_version < 0xff) {
             other_pseg->pub.transaction_read_version++;
-            assert(0 < other_pseg->min_read_version_outside_nursery &&
+            abort();//...
+            /*assert(0 < other_pseg->min_read_version_outside_nursery &&
                    other_pseg->min_read_version_outside_nursery
-                     < other_pseg->pub.transaction_read_version);
+                   < other_pseg->pub.transaction_read_version);*/
         }
         else {
             /* however, when the value 0xff is reached, we are stuck
@@ -338,7 +299,7 @@
         if (old_end > NURSERY_START) {
             char *creation_markers = REAL_ADDRESS(other_pseg->pub.segment_base,
                                                   NURSERY_START >> 8);
-            assert(old_end <= NURSERY_START + NURSERY_SIZE);
+            assert(old_end <= NURSERY_END);
             memset(creation_markers, 0, (old_end - NURSERY_START) >> 8);
         }
         else {
@@ -346,14 +307,17 @@
         }
     }
 }
+#endif
 
-static void do_minor_collection(void)
+static void minor_collection(void)
 {
-    /* all other threads are paused in safe points during the whole
-       minor collection */
+    assert(!_has_mutex());
+    abort_if_needed();
+
     dprintf(("minor_collection\n"));
-    assert(_has_mutex());
-    assert(list_is_empty(old_objects_pointing_to_young));
+
+    abort();//...
+#if 0
 
     /* List of what we need to do and invariants we need to preserve
        -------------------------------------------------------------
@@ -390,142 +354,66 @@
     reset_nursery();
 
     pages_make_shared_again(FIRST_NURSERY_PAGE, NB_NURSERY_PAGES);
+#endif
 }
 
 
-static void restore_nursery_section_end(uintptr_t prev_value)
-{
-    __sync_bool_compare_and_swap(&STM_SEGMENT->v_nursery_section_end,
-                                 prev_value,
-                                 STM_PSEGMENT->real_nursery_section_end);
-}
-
-static void stm_minor_collection(uint64_t request_size)
-{
-    /* Run a minor collection --- but only if we can't get 'request_size'
-       bytes out of the nursery; if we can, no-op. */
-    mutex_lock();
-
-    assert(STM_PSEGMENT->safe_point == SP_RUNNING);
-    STM_PSEGMENT->safe_point = SP_SAFE_POINT_CAN_COLLECT;
-
- restart:
-    /* We just waited here, either from mutex_lock() or from cond_wait(),
-       so we should check again if another thread did the minor
-       collection itself */
-    if (request_size <= NURSERY_SIZE - nursery_ctl.used)
-        goto exit;
-
-    if (!try_wait_for_other_safe_points(SP_SAFE_POINT_CAN_COLLECT))
-        goto restart;
-
-    /* now we can run our minor collection */
-    do_minor_collection();
-
- exit:
-    STM_PSEGMENT->safe_point = SP_RUNNING;
-
-    mutex_unlock();
-}
-
 void stm_collect(long level)
 {
     assert(level == 0);
-    stm_minor_collection(-1);
+    minor_collection();
 }
 
 
 /************************************************************/
 
-#define NURSERY_ALIGN(bytes)  \
-    (((bytes) + NURSERY_LINE - 1) & ~(NURSERY_LINE - 1))
 
-static stm_char *allocate_from_nursery(uint64_t bytes)
-{
-    /* may collect! */
-    /* thread-safe; allocate a chunk of memory from the nursery */
-    bytes = NURSERY_ALIGN(bytes);
-    while (1) {
-        uint64_t p = __sync_fetch_and_add(&nursery_ctl.used, bytes);
-        if (LIKELY(p + bytes <= NURSERY_SIZE)) {
-            return (stm_char *)(NURSERY_START + p);
-        }
-
-        /* nursery full! */
-        stm_minor_collection(bytes);
-    }
-}
-
-
-stm_char *_stm_allocate_slowpath(ssize_t size_rounded_up)
+object_t *_stm_allocate_slowpath(ssize_t size_rounded_up)
 {
     /* may collect! */
     STM_SEGMENT->nursery_current -= size_rounded_up;  /* restore correct val */
 
-    if (_stm_collectable_safe_point())
-        return (stm_char *)stm_allocate(size_rounded_up);
+ restart:
+    stm_safe_point();
 
-    if (size_rounded_up < MEDIUM_OBJECT) {
-        /* This is a small object.  The current section is really full.
-           Allocate the next section and initialize it with zeroes. */
-        stm_char *p = allocate_from_nursery(NURSERY_SECTION_SIZE);
-        STM_SEGMENT->nursery_current = p + size_rounded_up;
+    OPT_ASSERT(size_rounded_up >= 16);
+    OPT_ASSERT((size_rounded_up & 7) == 0);
+    OPT_ASSERT(size_rounded_up < _STM_FAST_ALLOC);
 
-        /* Set v_nursery_section_end, but carefully: another thread may
-           have forced it to be equal to NSE_SIGNAL. */
-        uintptr_t end = (uintptr_t)p + NURSERY_SECTION_SIZE;
-        uintptr_t prev_end = STM_PSEGMENT->real_nursery_section_end;
-        STM_PSEGMENT->real_nursery_section_end = end;
-        restore_nursery_section_end(prev_end);
-
-        memset(REAL_ADDRESS(STM_SEGMENT->segment_base, p), 0,
-               NURSERY_SECTION_SIZE);
-
-        /* Also fill the corresponding creation markers with 0xff. */
-        set_creation_markers(p, NURSERY_SECTION_SIZE,
-                             CM_CURRENT_TRANSACTION_IN_NURSERY);
-        return p;
+    stm_char *p = STM_SEGMENT->nursery_current;
+    stm_char *end = p + size_rounded_up;
+    if ((uintptr_t)end <= NURSERY_END) {
+        STM_SEGMENT->nursery_current = end;
+        return (object_t *)p;
     }
 
-    if (size_rounded_up < LARGE_OBJECT) {
-        /* A medium-sized object that doesn't fit into the current
-           nursery section.  Note that if by chance it does fit, then
-           _stm_allocate_slowpath() is not even called.  This case here
-           is to prevent too much of the nursery to remain not used
-           just because we tried to allocate a medium-sized object:
-           doing so doesn't end the current section. */
-        stm_char *p = allocate_from_nursery(size_rounded_up);
-        memset(REAL_ADDRESS(STM_SEGMENT->segment_base, p), 0,
-               size_rounded_up);
-        set_single_creation_marker(p, CM_CURRENT_TRANSACTION_IN_NURSERY);
-        return p;
-    }
-
-    abort();
+    minor_collection();
+    goto restart;
 }
 
-static void align_nursery_at_transaction_start(void)
+object_t *_stm_allocate_external(ssize_t size_rounded_up)
 {
-    /* When the transaction starts, we must align the 'nursery_current'
-       and set creation markers for the part of the section the follows.
-    */
-    uintptr_t c = (uintptr_t)STM_SEGMENT->nursery_current;
-    c = NURSERY_ALIGN(c);
-    STM_SEGMENT->nursery_current = (stm_char *)c;
-
-    uint64_t size = STM_PSEGMENT->real_nursery_section_end - c;
-    if (size > 0) {
-        set_creation_markers((stm_char *)c, size,
-                             CM_CURRENT_TRANSACTION_IN_NURSERY);
-    }
+    abort();//...
 }
 
 #ifdef STM_TESTS
 void _stm_set_nursery_free_count(uint64_t free_count)
 {
-    assert(free_count == NURSERY_ALIGN(free_count));
-    assert(nursery_ctl.used <= NURSERY_SIZE - free_count);
-    nursery_ctl.used = NURSERY_SIZE - free_count;
-    nursery_ctl.initial_value_of_used = nursery_ctl.used;
+    assert(free_count <= NURSERY_SIZE);
+    _stm_nursery_start = NURSERY_END - free_count;
+
+    long i;
+    for (i = 0; i < NB_SEGMENTS; i++) {
+        if ((uintptr_t)get_segment(i)->nursery_current < _stm_nursery_start)
+            get_segment(i)->nursery_current = (stm_char *)_stm_nursery_start;
+    }
 }
 #endif
+
+static void check_nursery_at_transaction_start(void)
+{
+    assert((uintptr_t)STM_SEGMENT->nursery_current == _stm_nursery_start);
+    uintptr_t i;
+    for (i = 0; i < _stm_nursery_end - _stm_nursery_start; i++)
+        assert(STM_SEGMENT->nursery_current[i] == 0);
+}
diff --git a/c7/stm/nursery.h b/c7/stm/nursery.h
--- a/c7/stm/nursery.h
+++ b/c7/stm/nursery.h
@@ -1,40 +1,8 @@
 
-/* special values of 'v_nursery_section_end' */
-#define NSE_SIGNAL        1
-#define NSE_SIGNAL_DONE   2
+/* '_stm_nursery_section_end' is either NURSERY_END or NSE_SIGNAL */
+#define NSE_SIGNAL     _STM_NSE_SIGNAL
 
-#if _STM_NSE_SIGNAL != NSE_SIGNAL
-# error "adapt _STM_NSE_SIGNAL"
-#endif
 
-/* Rules for 'v_nursery_section_end':
+static uint32_t highest_overflow_number;
 
-   - Its main purpose is to be read by the owning thread in stm_allocate().
-
-   - The owning thread can change its value without acquiring the mutex,
-     but it must do so carefully, with a compare_and_swap.
-
-   - If a different thread has the mutex, it can force the field to the
-     value NSE_SIGNAL or NSE_SIGNAL_DONE with a regular write.  This should
-     not be hidden by the compare_and_swap done by the owning thread:
-     even if it occurs just before or just after a compare_and_swap,
-     the end result is that the special value NSE_SIGNAL(_DONE) is still
-     in the field.
-
-   - When the owning thread sees NSE_SIGNAL, it must signal and wait until
-     the other thread restores the value to NSE_SIGNAL_DONE.  When the
-     owning thread sees NSE_SIGNAL_DONE, it can replace it, again with
-     compare_and_swap, with the real value.
-
-   - This should in theory be a volatile field, because it can be read
-     from stm_allocate() while at the same time being changed to the value
-     NSE_SIGNAL by another thread.  In practice, making it volatile has
-     probably just a small negative impact on performance for no good reason.
-*/
-
-static void align_nursery_at_transaction_start(void);
-static void restore_nursery_section_end(uintptr_t prev_value);
-
-static inline bool was_read_remote(char *base, object_t *obj,
-                                   uint8_t other_transaction_read_version,
-                                   uint8_t min_read_version_outside_nursery);
+static void check_nursery_at_transaction_start(void) __attribute__((unused));
diff --git a/c7/stm/pagecopy.c b/c7/stm/pagecopy.c
--- a/c7/stm/pagecopy.c
+++ b/c7/stm/pagecopy.c
@@ -28,11 +28,13 @@
     }
 }
 
+#if 0
 static void pagecopy_256(void *dest, const void *src)
 {
     PAGECOPY_128(dest,       src      );
     PAGECOPY_128(dest + 128, src + 128);
 }
+#endif
 
 #if 0   /* XXX enable if detected on the cpu */
 static void pagecopy_ymm8(void *dest, const void *src)
diff --git a/c7/stm/pagecopy.h b/c7/stm/pagecopy.h
--- a/c7/stm/pagecopy.h
+++ b/c7/stm/pagecopy.h
@@ -1,3 +1,2 @@
 
 static void pagecopy(void *dest, const void *src);      // 4096 bytes
-static void pagecopy_256(void *dest, const void *src);  // 256 bytes
diff --git a/c7/stm/pages.c b/c7/stm/pages.c
--- a/c7/stm/pages.c
+++ b/c7/stm/pages.c
@@ -23,6 +23,7 @@
         flag_page_private[pagenum + i] = SHARED_PAGE;
 }
 
+#if 0
 static void pages_make_shared_again(uintptr_t pagenum, uintptr_t count)
 {
     /* Same as pages_initialize_shared(), but tries hard to minimize the
@@ -42,6 +43,7 @@
         pages_initialize_shared(pagenum + start, i - start);
     }
 }
+#endif
 
 static void privatize_range_and_unlock(uintptr_t pagenum, uintptr_t count,
                                        bool full)
@@ -128,102 +130,7 @@
     }
 }
 
-static void set_creation_markers(stm_char *p, uint64_t size, int newvalue)
-{
-    /* Set the creation markers to 'newvalue' for all lines from 'p' to
-       'p+size'.  Both p and size should be aligned to the line size: 256. */
-
-    assert((((uintptr_t)p) & 255) == 0);
-    assert((size & 255) == 0);
-    assert(size > 0);
-
-    uintptr_t cmaddr = ((uintptr_t)p) >> 8;
-    LIST_APPEND(STM_PSEGMENT->creation_markers, cmaddr);
-
-    char *addr = REAL_ADDRESS(STM_SEGMENT->segment_base, cmaddr);
-    memset(addr, newvalue, size >> 8);
-}
-
-static uint8_t get_single_creation_marker(stm_char *p)
-{
-    uintptr_t cmaddr = ((uintptr_t)p) >> 8;
-    return ((stm_creation_marker_t *)cmaddr)->cm;
-}
-
-static void set_single_creation_marker(stm_char *p, int newvalue)
-{
-    uintptr_t cmaddr = ((uintptr_t)p) >> 8;
-    ((stm_creation_marker_t *)cmaddr)->cm = newvalue;
-    LIST_APPEND(STM_PSEGMENT->creation_markers, cmaddr);
-}
-
-static void reset_all_creation_markers(void)
-{
-    /* Note that the page 'NB_PAGES - 1' is not actually used.  This
-       ensures that the creation markers always end with some zeroes.
-       We reset the markers 8 at a time, by writing null integers
-       until we reach a place that is already null.
-    */
-    LIST_FOREACH_R(
-        STM_PSEGMENT->creation_markers,
-        uintptr_t /*item*/,
-        ({
-            TLPREFIX uint64_t *p = (TLPREFIX uint64_t *)(item & ~7);
-            while (*p != 0)
-                *p++ = 0;
-        }));
-
-    list_clear(STM_PSEGMENT->creation_markers);
-}
-
-static void reset_all_creation_markers_and_push_created_data(void)
-{
-    /* This is like reset_all_creation_markers(), but additionally
-       it looks for markers in non-SHARED pages, and pushes the
-       corresponding data (in 256-bytes blocks) to other threads.
-    */
-#if NB_SEGMENTS != 2
-# error "The logic in this function only works with two segments"
-#endif
-
-    char *local_base = STM_SEGMENT->segment_base;
-    long remote_num = 1 - STM_SEGMENT->segment_num;
-    char *remote_base = get_segment_base(remote_num);
-
-    /* this logic assumes that creation markers are in 256-bytes blocks,
-       and pages are 4096 bytes, so creation markers are handled by groups
-       of 16 --- which is two 8-bytes uint64_t. */
-
-    LIST_FOREACH_R(
-        STM_PSEGMENT->creation_markers,
-        uintptr_t /*item*/,
-        ({
-            TLPREFIX uint64_t *p = (TLPREFIX uint64_t *)(item & ~15);
-            while (p[0] != 0 || p[1] != 0) {
-
-                uint64_t pagenum = ((uint64_t)p) >> 4;
-                if (flag_page_private[pagenum] != SHARED_PAGE) {
-                    /* copying needed */
-                    uint64_t dataofs = ((uint64_t)p) << 8;
-                    stm_char *start = (stm_char *)p;
-                    stm_char *stop = start + 16;
-                    while (start < stop) {
-                        if (*start++ != 0) {
-                            pagecopy_256(remote_base + dataofs,
-                                         local_base + dataofs);
-                        }
-                        dataofs += 256;
-                    }
-                }
-                p[0] = 0; _duck();
-                p[1] = 0;
-                p += 2;
-            }
-        }));
-
-    list_clear(STM_PSEGMENT->creation_markers);
-}
-
+#if 0
 static bool is_in_shared_pages(object_t *obj)
 {
     uintptr_t first_page = ((uintptr_t)obj) / 4096UL;
@@ -234,11 +141,11 @@
     ssize_t obj_size = stmcb_size_rounded_up(
         (struct object_s *)REAL_ADDRESS(stm_object_pages, obj));
 
-    uintptr_t end_page = (((uintptr_t)obj) + obj_size + 4095) / 4096UL;
-    /* that's the page *following* the last page with the object */
+    uintptr_t last_page = (((uintptr_t)obj) + obj_size - 1) / 4096UL;
 
-    while (first_page < end_page)
+    while (first_page <= last_page)
         if (flag_page_private[first_page++] != SHARED_PAGE)
             return false;
     return true;
 }
+#endif
diff --git a/c7/stm/pages.h b/c7/stm/pages.h
--- a/c7/stm/pages.h
+++ b/c7/stm/pages.h
@@ -1,5 +1,5 @@
 
-enum {
+enum /* flag_page_private */ {
     /* The page is not in use.  Assume that each segment sees its own copy. */
     FREE_PAGE=0,
 
@@ -12,15 +12,13 @@
 
     /* Page is private for each segment. */
     PRIVATE_PAGE,
-
-};      /* used for flag_page_private */
-
+};
 
 static uint8_t flag_page_private[NB_PAGES];
 
 static void _pages_privatize(uintptr_t pagenum, uintptr_t count, bool full);
 static void pages_initialize_shared(uintptr_t pagenum, uintptr_t count);
-static void pages_make_shared_again(uintptr_t pagenum, uintptr_t count);
+//static void pages_make_shared_again(uintptr_t pagenum, uintptr_t count);
 
 inline static void pages_privatize(uintptr_t pagenum, uintptr_t count,
                                    bool full) {
@@ -32,9 +30,4 @@
     _pages_privatize(pagenum, count, full);
 }
 
-static void set_creation_markers(stm_char *p, uint64_t size, int newvalue);
-static uint8_t get_single_creation_marker(stm_char *p) __attribute__((unused));
-static void set_single_creation_marker(stm_char *p, int newvalue);
-static void reset_all_creation_markers(void);
-static void reset_all_creation_markers_and_push_created_data(void);
-static bool is_in_shared_pages(object_t *obj);
+//static bool is_in_shared_pages(object_t *obj);
diff --git a/c7/stm/setup.c b/c7/stm/setup.c
--- a/c7/stm/setup.c
+++ b/c7/stm/setup.c
@@ -5,14 +5,8 @@
 
 void stm_setup(void)
 {
-#if 0
-    _stm_reset_shared_lock();
-    _stm_reset_pages();
-
-    inevitable_lock = 0;
-#endif
-
     /* Check that some values are acceptable */
+    assert(NB_SEGMENTS <= NB_SEGMENTS_MAX);
     assert(4096 <= ((uintptr_t)STM_SEGMENT));
     assert((uintptr_t)STM_SEGMENT == (uintptr_t)STM_PSEGMENT);
     assert(((uintptr_t)STM_PSEGMENT) + sizeof(*STM_PSEGMENT) <= 8192);
@@ -21,9 +15,6 @@
     assert(READMARKER_START < READMARKER_END);
     assert(READMARKER_END <= 4096UL * FIRST_OBJECT_PAGE);
     assert(FIRST_OBJECT_PAGE < NB_PAGES);
-    assert(CREATMARKER_START >= 8192);
-    assert(2 <= FIRST_CREATMARKER_PAGE);
-    assert(FIRST_CREATMARKER_PAGE <= FIRST_READMARKER_PAGE);
     assert((NB_PAGES * 4096UL) >> 8 <= (FIRST_OBJECT_PAGE * 4096UL) >> 4);
     assert((END_NURSERY_PAGE * 4096UL) >> 8 <=
            (FIRST_READMARKER_PAGE * 4096UL));
@@ -53,10 +44,10 @@
         memset(REAL_ADDRESS(segment_base, STM_PSEGMENT), 0,
                sizeof(*STM_PSEGMENT));
 
-        /* Pages in range(2, FIRST_CREATMARKER_PAGE) are never used */
-        if (FIRST_CREATMARKER_PAGE > 2)
+        /* Pages in range(2, FIRST_READMARKER_PAGE) are never used */
+        if (FIRST_READMARKER_PAGE > 2)
             mprotect(segment_base + 8192,
-                     (FIRST_CREATMARKER_PAGE - 2) * 4096UL,
+                     (FIRST_READMARKER_PAGE - 2) * 4096UL,
                      PROT_NONE);
 
         struct stm_priv_segment_info_s *pr = get_priv_segment(i);
@@ -64,17 +55,16 @@
         pr->write_lock_num = i + 1;
         pr->pub.segment_num = i;
         pr->pub.segment_base = segment_base;
-        pr->old_objects_pointing_to_young = list_create();
-        pr->modified_objects = list_create();
-        pr->creation_markers = list_create();
+        pr->overflow_objects_pointing_to_nursery = NULL;
+        pr->modified_old_objects = list_create();
+        pr->overflow_number = GCFLAG_OVERFLOW_NUMBER_bit0 * (i + 1);
+        highest_overflow_number = pr->overflow_number;
     }
 
-    /* Make the nursery pages shared.  The other pages are
-       shared lazily, as remap_file_pages() takes a relatively
-       long time for each page. */
-    pages_initialize_shared(FIRST_NURSERY_PAGE, NB_NURSERY_PAGES);
+    /* The pages are shared lazily, as remap_file_pages() takes a relatively
+       long time for each page.
 
-    /* The read markers are initially zero, which is correct:
+       The read markers are initially zero, which is correct:
        STM_SEGMENT->transaction_read_version never contains zero,
        so a null read marker means "not read" whatever the
        current transaction_read_version is.
@@ -96,9 +86,8 @@
     long i;
     for (i = 0; i < NB_SEGMENTS; i++) {
         struct stm_priv_segment_info_s *pr = get_priv_segment(i);
-        list_free(pr->old_objects_pointing_to_young);
-        list_free(pr->modified_objects);
-        list_free(pr->creation_markers);
+        assert(pr->overflow_objects_pointing_to_nursery == NULL);
+        list_free(pr->modified_old_objects);
     }
 
     munmap(stm_object_pages, TOTAL_MEMORY);
@@ -115,15 +104,15 @@
 void stm_register_thread_local(stm_thread_local_t *tl)
 {
     int num;
-    if (stm_thread_locals == NULL) {
-        stm_thread_locals = tl->next = tl->prev = tl;
+    if (stm_all_thread_locals == NULL) {
+        stm_all_thread_locals = tl->next = tl->prev = tl;
         num = 0;
     }
     else {
-        tl->next = stm_thread_locals;
-        tl->prev = stm_thread_locals->prev;
-        stm_thread_locals->prev->next = tl;
-        stm_thread_locals->prev = tl;
+        tl->next = stm_all_thread_locals;
+        tl->prev = stm_all_thread_locals->prev;
+        stm_all_thread_locals->prev->next = tl;
+        stm_all_thread_locals->prev = tl;
         num = tl->prev->associated_segment_num + 1;
     }
 
@@ -137,10 +126,11 @@
 
 void stm_unregister_thread_local(stm_thread_local_t *tl)
 {
-    if (tl == stm_thread_locals) {
-        stm_thread_locals = stm_thread_locals->next;
-        if (tl == stm_thread_locals) {
-            stm_thread_locals = NULL;
+    assert(tl->next != NULL);
+    if (tl == stm_all_thread_locals) {
+        stm_all_thread_locals = stm_all_thread_locals->next;
+        if (tl == stm_all_thread_locals) {
+            stm_all_thread_locals = NULL;
             return;
         }
     }
diff --git a/c7/stm/sync.c b/c7/stm/sync.c
--- a/c7/stm/sync.c
+++ b/c7/stm/sync.c
@@ -191,7 +191,6 @@
     assert(STM_PSEGMENT->safe_point == SP_SAFE_POINT_CAN_COLLECT);
     STM_PSEGMENT->safe_point = SP_RUNNING;
 
-    restore_nursery_section_end(NSE_SIGNAL_DONE);
     if (STM_PSEGMENT->transaction_state == TS_MUST_ABORT)
         stm_abort_transaction();
 }
@@ -219,6 +218,8 @@
        try_wait_for_other_safe_points() while another is currently blocked
        in the cond_wait() in this same function.
     */
+    abort();//...
+#if 0
     assert(_has_mutex());
     assert(STM_PSEGMENT->safe_point == SP_SAFE_POINT_CAN_COLLECT);
 
@@ -265,36 +266,28 @@
     cond_broadcast();   /* to wake up the other threads, but later,
                            when they get the mutex again */
     return true;
+#endif
 }
 
-bool _stm_collectable_safe_point(void)
+void _stm_collectable_safe_point(void)
 {
-    bool any_operation = false;
- restart:;
-    switch (STM_SEGMENT->v_nursery_section_end) {
+    /* If nursery_section_end was set to NSE_SIGNAL by another thread,
+       we end up here as soon as we try to call stm_allocate() or do
+       a call to stm_safe_point().
+       See try_wait_for_other_safe_points() for details.
+    */
+    mutex_lock();
+    assert(STM_PSEGMENT->safe_point == SP_RUNNING);
 
-    case NSE_SIGNAL:
-        /* If nursery_section_end was set to NSE_SIGNAL by another thread,
-           we end up here as soon as we try to call stm_allocate().
-           See try_wait_for_other_safe_points() for details. */
-        mutex_lock();
-        assert(STM_PSEGMENT->safe_point == SP_RUNNING);
+    if (_stm_nursery_end == NSE_SIGNAL) {
         STM_PSEGMENT->safe_point = SP_SAFE_POINT_CAN_COLLECT;
+
         cond_broadcast();
-        cond_wait();
+
+        do { cond_wait(); } while (_stm_nursery_end == NSE_SIGNAL);
+
         STM_PSEGMENT->safe_point = SP_RUNNING;
-        mutex_unlock();
+    }
 
-        /* Once the sync point is done, retry. */
-        any_operation = true;
-        goto restart;
-
-    case NSE_SIGNAL_DONE:
-        restore_nursery_section_end(NSE_SIGNAL_DONE);
-        any_operation = true;
-        break;
-
-    default:;
-    }
-    return any_operation;
+    mutex_unlock();
 }
diff --git a/c7/stmgc.h b/c7/stmgc.h
--- a/c7/stmgc.h
+++ b/c7/stmgc.h
@@ -12,21 +12,12 @@
 #include <stdbool.h>
 #include <assert.h>
 #include <limits.h>
-#include <endian.h>
 #include <unistd.h>
 
 #if LONG_MAX == 2147483647
 # error "Requires a 64-bit environment"
 #endif
 
-#if BYTE_ORDER == 1234
-# define LENDIAN  1    // little endian
-#elif BYTE_ORDER == 4321
-# define LENDIAN  0    // big endian
-#else
-# error "Unsupported endianness"
-#endif
-
 
 #define TLPREFIX __attribute__((address_space(256)))
 
@@ -42,27 +33,16 @@
        We assume that objects are at least 16 bytes long, and use
        their address divided by 16.  The read marker is equal to
        'STM_SEGMENT->transaction_read_version' if and only if the
-       object was read in the current transaction. */
+       object was read in the current transaction.  The nurseries
+       also have corresponding read markers, but they are never used. */
     uint8_t rm;
 };
 
-struct stm_creation_marker_s {
-    /* In addition to read markers, every "line" of 256 bytes has one
-       extra byte, the creation marker, located at the address divided
-       by 256.  The creation marker is either non-zero if all objects in
-       this line come have been allocated by the current transaction,
-       or 0x00 if none of them have been.  Lines cannot contain a
-       mixture of both.  Non-zero values are 0xff if in the nursery,
-       and 0x01 if outside the nursery. */
-    uint8_t cm;
-};
-
 struct stm_segment_info_s {
     uint8_t transaction_read_version;
     int segment_num;
     char *segment_base;
     stm_char *nursery_current;
-    uintptr_t v_nursery_section_end;  /* see nursery.h */
     struct stm_thread_local_s *running_thread;
     stm_jmpbuf_t *jmpbuf_ptr;
 };
@@ -79,10 +59,13 @@
 /* this should use llvm's coldcc calling convention,
    but it's not exposed to C code so far */
 void _stm_write_slowpath(object_t *);
-stm_char *_stm_allocate_slowpath(ssize_t);
+object_t *_stm_allocate_slowpath(ssize_t);
+object_t *_stm_allocate_external(ssize_t);
 void _stm_become_inevitable(char*);
 void _stm_start_transaction(stm_thread_local_t *, stm_jmpbuf_t *);
-bool _stm_collectable_safe_point(void);
+void _stm_collectable_safe_point(void);
+
+extern uintptr_t _stm_nursery_end;
 
 #ifdef STM_TESTS
 bool _stm_was_read(object_t *obj);
@@ -98,12 +81,13 @@
 void _stm_start_safe_point(void);
 void _stm_stop_safe_point(void);
 void _stm_set_nursery_free_count(uint64_t free_count);
-object_t *_stm_enum_old_objects_pointing_to_young(void);
-object_t *_stm_enum_modified_objects(void);
+object_t *_stm_enum_overflow_objects_pointing_to_nursery(void);
+object_t *_stm_enum_modified_old_objects(void);
 #endif
 
-#define _STM_GCFLAG_WRITE_BARRIER_CALLED  0x80
-#define _STM_NSE_SIGNAL                   1
+#define _STM_GCFLAG_WRITE_BARRIER      0x01
+#define _STM_NSE_SIGNAL                   0
+#define _STM_FAST_ALLOC           (66*1024)
 #define STM_FLAGS_PREBUILT                0
 
 
@@ -133,7 +117,7 @@
 */
 
 struct object_s {
-    uint8_t stm_flags;            /* reserved for the STM library */
+    uint32_t stm_flags;            /* reserved for the STM library */
 };
 
 /* The read barrier must be called whenever the object 'obj' is read.
@@ -142,33 +126,24 @@
    transaction commit, nothing that can potentially collect or do a safe
    point (like stm_write() on a different object).  Also, if we might
    have finished the transaction and started the next one, then
-   stm_read() needs to be called again.
+   stm_read() needs to be called again.  It can be omitted if
+   stm_write() is called, or immediately after getting the object from
+   stm_allocate(), as long as the rules above are respected.
 */
 static inline void stm_read(object_t *obj)
 {
-#if 0    /* very costly check */
-    assert(((stm_read_marker_t *)(((uintptr_t)obj) >> 4))->rm
-           <= STM_SEGMENT->transaction_read_version);
-#endif
     ((stm_read_marker_t *)(((uintptr_t)obj) >> 4))->rm =
         STM_SEGMENT->transaction_read_version;
 }
 
 /* The write barrier must be called *before* doing any change to the
    object 'obj'.  If we might have finished the transaction and started
-   the next one, then stm_write() needs to be called again.
-   If stm_write() is called, it is not necessary to also call stm_read()
-   on the same object.
+   the next one, then stm_write() needs to be called again.  It is not
+   necessary to call it immediately after stm_allocate().
 */
 static inline void stm_write(object_t *obj)
 {
-    /* this is:
-           'if (cm < 0x80 && (stm_flags & WRITE_BARRIER_CALLED) == 0)'
-         where 'cm' can be 0 (not created in current transaction)
-                     or 0xff (created in current transaction)
-                     or 0x01 (same, but outside the nursery) */
-    if (UNLIKELY(!((((stm_creation_marker_t *)(((uintptr_t)obj) >> 8))->cm |
-                    obj->stm_flags) & _STM_GCFLAG_WRITE_BARRIER_CALLED)))
+    if (UNLIKELY((obj->stm_flags & _STM_GCFLAG_WRITE_BARRIER) != 0))
         _stm_write_slowpath(obj);
 }
 
@@ -190,11 +165,15 @@
     OPT_ASSERT(size_rounded_up >= 16);
     OPT_ASSERT((size_rounded_up & 7) == 0);
 
+    if (UNLIKELY(size_rounded_up >= _STM_FAST_ALLOC))
+        return _stm_allocate_external(size_rounded_up);
+
     stm_char *p = STM_SEGMENT->nursery_current;
     stm_char *end = p + size_rounded_up;
     STM_SEGMENT->nursery_current = end;
-    if (UNLIKELY((uintptr_t)end > STM_SEGMENT->v_nursery_section_end))
-        p = _stm_allocate_slowpath(size_rounded_up);
+    if (UNLIKELY((uintptr_t)end > _stm_nursery_end))
+        return _stm_allocate_slowpath(size_rounded_up);
+
     return (object_t *)p;
 }
 
@@ -250,7 +229,7 @@
 /* Forces a safe-point if needed.  Normally not needed: this is
    automatic if you call stm_allocate(). */
 static inline void stm_safe_point(void) {
-    if (STM_SEGMENT->v_nursery_section_end == _STM_NSE_SIGNAL)
+    if (_stm_nursery_end == _STM_NSE_SIGNAL)
         _stm_collectable_safe_point();
 }
 
diff --git a/c7/test/support.py b/c7/test/support.py
--- a/c7/test/support.py
+++ b/c7/test/support.py
@@ -53,7 +53,6 @@
 bool _checked_stm_write(object_t *obj);
 bool _stm_was_read(object_t *obj);
 bool _stm_was_written(object_t *obj);
-uint8_t _stm_creation_marker(object_t *obj);
 bool _stm_in_nursery(object_t *obj);
 char *_stm_real_address(object_t *obj);
 object_t *_stm_segment_address(char *ptr);
@@ -77,8 +76,8 @@
 
 ssize_t stmcb_size_rounded_up(struct object_s *obj);
 
-object_t *_stm_enum_old_objects_pointing_to_young(void);
-object_t *_stm_enum_modified_objects(void);
+object_t *_stm_enum_overflow_objects_pointing_to_nursery(void);
+object_t *_stm_enum_modified_old_objects(void);
 
 void stm_collect(long level);
 """)
@@ -248,7 +247,7 @@
                     ('STM_DEBUGPRINT', '1')],
      undef_macros=['NDEBUG'],
      include_dirs=[parent_dir],
-     extra_compile_args=['-g', '-O0', '-Werror'],
+     extra_compile_args=['-g', '-O0', '-Werror', '-ferror-limit=1'],
      force_generic_engine=True)
 
 


More information about the pypy-commit mailing list