[pypy-commit] stmgc evaluation: merge c8-efficient-serial-execution-master

Raemi pypy.commits at gmail.com
Mon Mar 26 03:06:11 EDT 2018


Author: Remi Meier <remi.meier at gmail.com>
Branch: evaluation
Changeset: r2158:ce8f8880e0e2
Date: 2018-03-26 09:05 +0200
http://bitbucket.org/pypy/stmgc/changeset/ce8f8880e0e2/

Log:	merge c8-efficient-serial-execution-master

diff --git a/c8/demo/Makefile b/c8/demo/Makefile
--- a/c8/demo/Makefile
+++ b/c8/demo/Makefile
@@ -17,7 +17,7 @@
 H_FILES = ../stmgc.h ../stm/*.h
 C_FILES = ../stmgc.c ../stm/*.c
 
-COMMON = -I.. -pthread -lrt -g -Wall -Werror -DSTM_LARGEMALLOC_TEST 
+COMMON = -I.. -pthread -lrt -lm -g -Wall -Werror -DSTM_LARGEMALLOC_TEST 
 
 CC = gcc-seg-gs
 
diff --git a/c8/stm/core.c b/c8/stm/core.c
--- a/c8/stm/core.c
+++ b/c8/stm/core.c
@@ -381,6 +381,14 @@
 static void readd_wb_executed_flags(void);
 static void check_all_write_barrier_flags(char *segbase, struct list_s *list);
 
+static void signal_commit_to_inevitable_transaction(void) {
+    struct stm_priv_segment_info_s* inevitable_segement = get_inevitable_thread_segment();
+    if (inevitable_segement != 0) {
+        // the inevitable thread is still running: set its "please commit" flag (is ignored by the inevitable thread if it is atomic)
+        inevitable_segement->commit_if_not_atomic = true;
+    }
+}
+
 static void wait_for_inevitable(void)
 {
     intptr_t detached = 0;
@@ -397,6 +405,8 @@
            try to detach an inevitable transaction regularly */
         detached = fetch_detached_transaction();
         if (detached == 0) {
+            // the inevitable trx was not detached or it was detached but is atomic
+            signal_commit_to_inevitable_transaction();
             EMIT_WAIT(STM_WAIT_OTHER_INEVITABLE);
             if (!cond_wait_timeout(C_SEGMENT_FREE_OR_SAFE_POINT_REQ, 0.00001))
                 goto wait_some_more;
@@ -1138,11 +1148,10 @@
     }
     _do_start_transaction(tl);
 
-    if (repeat_count == 0) {  /* else, 'nursery_mark' was already set
-                                 in abort_data_structures_from_segment_num() */
-        STM_SEGMENT->nursery_mark = ((stm_char *)_stm_nursery_start +
-                                     stm_fill_mark_nursery_bytes);
-    }
+    STM_PSEGMENT->commit_if_not_atomic = false;
+    STM_SEGMENT->nursery_mark = ((stm_char *)_stm_nursery_start +
+                                        stm_get_transaction_length(tl));
+
     return repeat_count;
 }
 
@@ -1271,6 +1280,8 @@
     bool was_inev = STM_PSEGMENT->transaction_state == TS_INEVITABLE;
     _validate_and_add_to_commit_log();
 
+
+    stm_thread_local_t *tl_for_trx_len = STM_SEGMENT->running_thread;
     if (external) {
         /* from this point on, unlink the original 'stm_thread_local_t *'
            from its segment.  Better do it as soon as possible, because
@@ -1318,6 +1329,8 @@
 
     s_mutex_unlock();
 
+    stm_transaction_length_handle_validation(tl_for_trx_len, false);
+
     /* between transactions, call finalizers. this will execute
        a transaction itself */
     if (tl != NULL)
@@ -1484,22 +1497,6 @@
     if (pseg->active_queues)
         queues_deactivate_all(pseg, /*at_commit=*/false);
 
-
-    /* Set the next nursery_mark: first compute the value that
-       nursery_mark must have had at the start of the aborted transaction */
-    stm_char *old_mark =pseg->pub.nursery_mark + pseg->total_throw_away_nursery;
-
-    /* This means that the limit, in term of bytes, was: */
-    uintptr_t old_limit = old_mark - (stm_char *)_stm_nursery_start;
-
-    /* If 'total_throw_away_nursery' is smaller than old_limit, use that */
-    if (pseg->total_throw_away_nursery < old_limit)
-        old_limit = pseg->total_throw_away_nursery;
-
-    /* Now set the new limit to 90% of the old limit */
-    pseg->pub.nursery_mark = ((stm_char *)_stm_nursery_start +
-                              (uintptr_t)(old_limit * 0.9));
-
 #ifdef STM_NO_AUTOMATIC_SETJMP
     did_abort = 1;
 #endif
@@ -1534,6 +1531,8 @@
     tl->self_or_0_if_atomic = (intptr_t)tl;   /* clear the 'atomic' flag */
     STM_PSEGMENT->atomic_nesting_levels = 0;
 
+    stm_transaction_length_handle_validation(tl, true);
+
     if (tl->mem_clear_on_abort)
         memset(tl->mem_clear_on_abort, 0, tl->mem_bytes_to_clear_on_abort);
     if (tl->mem_reset_on_abort) {
@@ -1588,7 +1587,7 @@
 
 void _stm_become_inevitable(const char *msg)
 {
-    int num_waits = 0;
+    int num_waits = 1;
 
     timing_become_inevitable();
 
@@ -1599,50 +1598,48 @@
         _stm_collectable_safe_point();
         dprintf(("become_inevitable: %s\n", msg));
 
-        if (any_soon_finished_or_inevitable_thread_segment() &&
-                num_waits <= NB_SEGMENTS) {
+        if (any_soon_finished_or_inevitable_thread_segment()) {
 #if STM_TESTS                           /* for tests: another transaction */
             stm_abort_transaction();    /*   is already inevitable, abort */
 #endif
 
-            bool timed_out = false;
+            signal_commit_to_inevitable_transaction();
 
             s_mutex_lock();
             if (any_soon_finished_or_inevitable_thread_segment() &&
-                    !safe_point_requested()) {
+                    !safe_point_requested() &&
+                    num_waits <= NB_SEGMENTS) {
 
                 /* wait until C_SEGMENT_FREE_OR_SAFE_POINT_REQ is signalled */
                 EMIT_WAIT(STM_WAIT_OTHER_INEVITABLE);
-                if (!cond_wait_timeout(C_SEGMENT_FREE_OR_SAFE_POINT_REQ,
-                                       0.000054321))
-                    timed_out = true;
+                if (cond_wait_timeout(C_SEGMENT_FREE_OR_SAFE_POINT_REQ, 0.00001)) {
+                    num_waits++;
+                }
             }
             s_mutex_unlock();
-
-            if (timed_out) {
-                /* try to detach another inevitable transaction, but
-                   only after waiting a bit.  This is necessary to avoid
-                   deadlocks in some situations, which are hopefully
-                   not too common.  We don't want two threads constantly
-                   detaching each other. */
-                intptr_t detached = fetch_detached_transaction();
-                if (detached != 0) {
-                    EMIT_WAIT_DONE();
-                    commit_fetched_detached_transaction(detached);
-                }
-            }
-            else {
-                num_waits++;
+            /* XXX try to detach another inevitable transaction, but
+              only after waiting a bit.  This is necessary to avoid
+              deadlocks in some situations, which are hopefully
+              not too common.  We don't want two threads constantly
+              detaching each other. */
+            intptr_t detached = fetch_detached_transaction();
+            if (detached != 0) {
+               EMIT_WAIT_DONE();
+               commit_fetched_detached_transaction(detached);
+               EMIT_WAIT(STM_WAIT_OTHER_INEVITABLE);
             }
             goto retry_from_start;
         }
-        EMIT_WAIT_DONE();
-        if (!_validate_and_turn_inevitable())
-            goto retry_from_start;
+        else {
+            EMIT_WAIT_DONE();
+            if (!_validate_and_turn_inevitable()) {
+                EMIT_WAIT(STM_WAIT_OTHER_INEVITABLE);
+                goto retry_from_start;
+            }
+        }
     }
-    else {
-        if (!_validate_and_turn_inevitable())
-            return;
+    else if (!_validate_and_turn_inevitable()) {
+        return;
     }
 
     /* There may be a concurrent commit of a detached Tx going on.
@@ -1654,6 +1651,7 @@
         stm_spin_loop();
     assert(_stm_detached_inevitable_from_thread == 0);
 
+    STM_PSEGMENT->commit_if_not_atomic = false;
     soon_finished_or_inevitable_thread_segment();
     STM_PSEGMENT->transaction_state = TS_INEVITABLE;
 
diff --git a/c8/stm/core.h b/c8/stm/core.h
--- a/c8/stm/core.h
+++ b/c8/stm/core.h
@@ -168,6 +168,9 @@
 
     /* For stm_enable_atomic() */
     uintptr_t atomic_nesting_levels;
+
+    // TODO signal flag that is checked in throw_away_nursery() for making immediate commit
+    bool commit_if_not_atomic;
 };
 
 enum /* safe_point */ {
diff --git a/c8/stm/detach.c b/c8/stm/detach.c
--- a/c8/stm/detach.c
+++ b/c8/stm/detach.c
@@ -215,6 +215,7 @@
     }
 }
 
+// TODO write tests, verify is working, verify no overflows with adaptive mode
 uintptr_t stm_is_atomic(stm_thread_local_t *tl)
 {
     assert(STM_SEGMENT->running_thread == tl);
@@ -228,14 +229,18 @@
     return STM_PSEGMENT->atomic_nesting_levels;
 }
 
+// max intptr_t value is 7FFFFFFFFFFFFFFF on 64-bit => larger than 2 * huge value
 #define HUGE_INTPTR_VALUE  0x3000000000000000L
 
 void stm_enable_atomic(stm_thread_local_t *tl)
 {
     if (!stm_is_atomic(tl)) {
+        // do for outermost atomic block only
         tl->self_or_0_if_atomic = 0;
         /* increment 'nursery_mark' by HUGE_INTPTR_VALUE, so that
-           stm_should_break_transaction() returns always false */
+           stm_should_break_transaction() returns always false.
+           preserves the previous nursery_mark, unless it is < 0
+           or >= huge value */
         intptr_t mark = (intptr_t)STM_SEGMENT->nursery_mark;
         if (mark < 0)
             mark = 0;
@@ -255,6 +260,7 @@
     STM_PSEGMENT->atomic_nesting_levels--;
 
     if (STM_PSEGMENT->atomic_nesting_levels == 0) {
+        // revert changes by stm_enable_atomic only if we left the outermost atomic block
         tl->self_or_0_if_atomic = (intptr_t)tl;
         /* decrement 'nursery_mark' by HUGE_INTPTR_VALUE, to cancel
            what was done in stm_enable_atomic() */
diff --git a/c8/stm/nursery.c b/c8/stm/nursery.c
--- a/c8/stm/nursery.c
+++ b/c8/stm/nursery.c
@@ -4,6 +4,8 @@
 #endif
 
 #include "finalizer.h"
+#include <math.h>
+#include <inttypes.h>
 
 /************************************************************/
 
@@ -13,14 +15,77 @@
 
 static uintptr_t _stm_nursery_start;
 
+#define DEFAULT_FILL_MARK_NURSERY_BYTES (NURSERY_SIZE / 4)
 
-#define DEFAULT_FILL_MARK_NURSERY_BYTES   (NURSERY_SIZE / 4)
+// corresponds to ~4 GB
+#define LARGE_FILL_MARK_NURSERY_BYTES   0x100000000L
 
-uintptr_t stm_fill_mark_nursery_bytes = DEFAULT_FILL_MARK_NURSERY_BYTES;
+// corresponds to ~4 MB nursery fill
+#define STM_DEFAULT_RELATIVE_TRANSACTION_LENGTH (0.001)
+// corresponds to ~400 KB nursery fill
+#define STM_MIN_RELATIVE_TRANSACTION_LENGTH (0.0001)
+
+#define BACKOFF_COUNT (20)
+#define BACKOFF_MULTIPLIER (BACKOFF_COUNT / -log10(STM_MIN_RELATIVE_TRANSACTION_LENGTH))
+
+static inline void set_backoff(stm_thread_local_t *tl, double rel_trx_len) {
+    /* the shorter the trx, the more backoff:
+    think a*x + b = backoff, x := -log(rel-trx-len),
+    backoff is <BACKOFF_COUNT> + b at default trx length,
+    linear decrease to b at max trx length */
+    const int b = 5;
+    int new_backoff = (int)((BACKOFF_MULTIPLIER * -log10(rel_trx_len)) + b);
+    tl->transaction_length_backoff = new_backoff;
+    // printf("thread %d, backoff %d\n", tl->thread_local_counter, tl->transaction_length_backoff);
+    tl->linear_transaction_length_increment = rel_trx_len / new_backoff;
+}
+
+static inline double get_new_transaction_length(stm_thread_local_t *tl, bool aborts) {
+    const int multiplier = 2;
+    double previous = tl->relative_transaction_length;
+    double new;
+    if (aborts) {
+        new = previous / multiplier;
+        if (new < STM_MIN_RELATIVE_TRANSACTION_LENGTH) {
+            new = STM_MIN_RELATIVE_TRANSACTION_LENGTH;
+        }
+        set_backoff(tl, new);
+    } else if (tl->transaction_length_backoff == 0) {
+        // backoff counter is zero, exponential increase up to 1
+        new = previous * multiplier;
+        if (new > 1) {
+            new = 1;
+        }
+        if (tl->linear_transaction_length_increment != 0) {
+            // thread had to abort before: slow start
+            set_backoff(tl, new);
+        }
+    } else { // not abort and backoff != 0
+        // in backoff, linear increase up to 1
+        new = previous + tl->linear_transaction_length_increment;
+        if (new > 1) {
+            new = 1;
+        }
+        tl->transaction_length_backoff -= 1;
+    }
+    return new;
+}
+
+static inline void stm_transaction_length_handle_validation(stm_thread_local_t *tl, bool aborts) {
+    tl->relative_transaction_length = get_new_transaction_length(tl, aborts);
+}
+
+static inline uintptr_t stm_get_transaction_length(stm_thread_local_t *tl) {
+    double relative_additional_length = tl->relative_transaction_length;
+    uintptr_t result =
+        (uintptr_t)(LARGE_FILL_MARK_NURSERY_BYTES * relative_additional_length);
+    // printf("%020" PRIxPTR "\n", result);
+    return result;
+}
+
 
 /************************************************************/
 
-
 static void setup_nursery(void)
 {
     assert(_STM_FAST_ALLOC <= NURSERY_SIZE);
@@ -500,6 +565,14 @@
     pseg->pub.nursery_current = (stm_char *)_stm_nursery_start;
     pseg->pub.nursery_mark -= nursery_used;
 
+    assert((pseg->transaction_state == TS_INEVITABLE) || !pseg->commit_if_not_atomic);
+    if (pseg->commit_if_not_atomic
+        && pseg->transaction_state == TS_INEVITABLE
+        && pseg->pub.running_thread->self_or_0_if_atomic != 0) {
+        // transaction is inevitable, not atomic, and commit has been signalled by waiting thread: commit immediately
+        pseg->pub.nursery_mark = 0;
+    }
+
     /* free any object left from 'young_outside_nursery' */
     if (!tree_is_cleared(pseg->young_outside_nursery)) {
         wlog_t *item;
diff --git a/c8/stm/nursery.h b/c8/stm/nursery.h
--- a/c8/stm/nursery.h
+++ b/c8/stm/nursery.h
@@ -56,4 +56,7 @@
 static inline struct object_s *mark_loc(object_t *obj);
 static inline bool _is_from_same_transaction(object_t *obj);
 
+static inline void stm_transaction_length_handle_validation(stm_thread_local_t *tl, bool aborts);
+static inline uintptr_t stm_get_transaction_length(stm_thread_local_t *tl);
+
 #endif
diff --git a/c8/stm/setup.c b/c8/stm/setup.c
--- a/c8/stm/setup.c
+++ b/c8/stm/setup.c
@@ -277,6 +277,12 @@
        numbers automatically. */
     tl->last_associated_segment_num = num + 1;
     tl->thread_local_counter = ++thread_local_counters;
+
+    /* init adaptive transaction length mode */
+    tl->relative_transaction_length = STM_DEFAULT_RELATIVE_TRANSACTION_LENGTH;
+    tl->transaction_length_backoff = 0;
+    tl->linear_transaction_length_increment = 0;
+
     *_get_cpth(tl) = pthread_self();
     _init_shadow_stack(tl);
     set_gs_register(get_segment_base(num + 1));
diff --git a/c8/stm/sync.c b/c8/stm/sync.c
--- a/c8/stm/sync.c
+++ b/c8/stm/sync.c
@@ -176,6 +176,18 @@
 
 /************************************************************/
 
+#if 0
+static uint8_t number_of_segments_in_use(void) {
+    uint8_t result = 0;
+    int num;
+    for (num = 1; num < NB_SEGMENTS; num++) {
+        if (sync_ctl.in_use1[num] > 0) {
+            result++;
+        }
+    }
+    return result;
+}
+#endif
 
 #if 0
 void stm_wait_for_current_inevitable_transaction(void)
@@ -202,7 +214,6 @@
 }
 #endif
 
-
 static void acquire_thread_segment(stm_thread_local_t *tl)
 {
     /* This function acquires a segment for the currently running thread,
@@ -293,6 +304,19 @@
     return false;
 }
 
+static struct stm_priv_segment_info_s* get_inevitable_thread_segment(void)
+{
+    struct stm_priv_segment_info_s* segment;
+    int num;
+    for (num = 1; num < NB_SEGMENTS; num++) {
+        segment = get_priv_segment(num);
+        if (segment->transaction_state == TS_INEVITABLE) {
+            return segment;
+        }
+    }
+    return 0;
+}
+
 __attribute__((unused))
 static bool _seems_to_be_running_transaction(void)
 {
diff --git a/c8/stm/sync.h b/c8/stm/sync.h
--- a/c8/stm/sync.h
+++ b/c8/stm/sync.h
@@ -29,6 +29,7 @@
 static void release_thread_segment(stm_thread_local_t *tl);
 static void soon_finished_or_inevitable_thread_segment(void);
 static bool any_soon_finished_or_inevitable_thread_segment(void);
+static struct stm_priv_segment_info_s* get_inevitable_thread_segment(void);
 
 enum sync_type_e {
     STOP_OTHERS_UNTIL_MUTEX_UNLOCK,
diff --git a/c8/stmgc.h b/c8/stmgc.h
--- a/c8/stmgc.h
+++ b/c8/stmgc.h
@@ -88,6 +88,13 @@
     struct stm_thread_local_s *prev, *next;
     intptr_t self_or_0_if_atomic;
     void *creating_pthread[2];
+    /* == adaptive single thread mode == */
+    /* factor that is multiplied with max transaction length before the start of the next transaction on this thread */
+    double relative_transaction_length;
+    /* when zero, transaction length may increase exponentially, otherwise transaction length may only increase linearly. is (re-)set to some value upon abort and counted down until zero upon successful validation. */
+    int transaction_length_backoff;
+    /* during the backoff, transaction length may increase linearly by this increment on every successful validation */
+    double linear_transaction_length_increment;
 } stm_thread_local_t;
 
 
@@ -202,7 +209,7 @@
 /* ==================== PUBLIC API ==================== */
 
 /* Number of segments (i.e. how many transactions can be executed in
-   parallel, in maximum).  If you try to start transactions in more
+   parallel, at maximum).  If you try to start transactions in more
    threads than the number of segments, it will block, waiting for the
    next segment to become free.
 */
@@ -464,14 +471,6 @@
     return ((intptr_t)STM_SEGMENT->nursery_current >=
             (intptr_t)STM_SEGMENT->nursery_mark);
 }
-extern uintptr_t stm_fill_mark_nursery_bytes;
-/* ^^^ at the start of a transaction, 'nursery_mark' is initialized to
-   'stm_fill_mark_nursery_bytes' inside the nursery.  This value can
-   be larger than the nursery; every minor collection shifts the
-   current 'nursery_mark' down by one nursery-size.  After an abort
-   and restart, 'nursery_mark' is set to ~90% of the value it reached
-   in the last attempt.
-*/
 
 /* "atomic" transaction: a transaction where stm_should_break_transaction()
    always returns false, and where stm_leave_transactional_zone() never
@@ -575,21 +574,49 @@
     STM_GC_MAJOR_START,
     STM_GC_MAJOR_DONE,
 
+    /* execution duration profiling events */
+    STM_WARMUP_COMPLETE,
+
+    STM_DURATION_START_TRX,
+    STM_DURATION_WRITE_GC_ONLY,
+    STM_DURATION_WRITE_SLOWPATH,
+    STM_DURATION_VALIDATION,
+    STM_DURATION_CREATE_CLE,
+    STM_DURATION_COMMIT_EXCEPT_GC,
+    STM_DURATION_MINOR_GC,
+    STM_DURATION_MAJOR_GC_LOG_ONLY,
+    STM_DURATION_MAJOR_GC_FULL,
+
+    STM_SINGLE_THREAD_MODE_ON,
+    STM_SINGLE_THREAD_MODE_OFF,
+    STM_SINGLE_THREAD_MODE_ADAPTIVE,
+
     _STM_EVENT_N
 };
 
-#define STM_EVENT_NAMES                         \
-    "transaction start",                        \
-    "transaction commit",                       \
-    "transaction abort",                        \
-    "contention write read",                    \
-    "wait free segment",                        \
-    "wait other inevitable",                    \
-    "wait done",                                \
-    "gc minor start",                           \
-    "gc minor done",                            \
-    "gc major start",                           \
-    "gc major done"
+#define STM_EVENT_NAMES                             \
+    "transaction start",                            \
+    "transaction commit",                           \
+    "transaction abort",                            \
+    "contention write read",                        \
+    "wait free segment",                            \
+    "wait other inevitable",                        \
+    "wait done",                                    \
+    "gc minor start",                               \
+    "gc minor done",                                \
+    "gc major start",                               \
+    "gc major done",                                \
+    /* names of duration events */                  \
+    "marks completion of benchmark warm up phase"   \
+    "duration of transaction start",                \
+    "duration of gc due to write",                  \
+    "duration of write slowpath",                   \
+    "duration of validation",                       \
+    "duration of commit log entry creation",        \
+    "duration of commit except gc",                 \
+    "duration of minor gc",                         \
+    "duration of major gc doing log clean up only", \
+    "duration of full major gc"
 
 /* The markers pushed in the shadowstack are an odd number followed by a
    regular object pointer. */
diff --git a/c8/test/support.py b/c8/test/support.py
--- a/c8/test/support.py
+++ b/c8/test/support.py
@@ -45,7 +45,6 @@
 } stm_thread_local_t;
 
 char *stm_object_pages;
-uintptr_t stm_fill_mark_nursery_bytes;
 
 void stm_read(object_t *obj);
 /*void stm_write(object_t *obj); use _checked_stm_write() instead */
@@ -671,7 +670,7 @@
      undef_macros=['NDEBUG'],
      include_dirs=[parent_dir],
                  extra_compile_args=['-g', '-O0', '-Werror', '-Wall'], #, '-ferror-limit=5'],
-     extra_link_args=['-g', '-lrt'],
+     extra_link_args=['-g', '-lrt', '-lm'],
      force_generic_engine=True)
 
 
diff --git a/c8/test/test_basic.py b/c8/test/test_basic.py
--- a/c8/test/test_basic.py
+++ b/c8/test/test_basic.py
@@ -736,6 +736,7 @@
         self.check_char_everywhere(lp1, 'X')
 
     def test_stm_should_break_transaction_1(self):
+        py.test.skip("replaced by tcp logic")
         lib.stm_fill_mark_nursery_bytes = 100
         #
         self.start_transaction()
@@ -772,6 +773,7 @@
         self.commit_transaction()
 
     def test_stm_should_break_transaction_2(self):
+        py.test.skip("replaced by tcp logic")
         lib.stm_fill_mark_nursery_bytes = 10000000
         #
         n = 10000000
diff --git a/gcc-seg-gs/README.txt b/gcc-seg-gs/README.txt
--- a/gcc-seg-gs/README.txt
+++ b/gcc-seg-gs/README.txt
@@ -8,9 +8,8 @@
 compile the standard gcc.  Of course, it is likely that gcc 6.1 will
 soon be available from your Linux distribution directly.
 
-Note that with gcc 6.1, you no longer need gcc-5.1.0-patch.diff, and you
-should not need the "-fno-*" options either (but we didn't check that
-yet).
+Note that with gcc 6.1, you no longer need gcc-5.1.0-patch.diff, but you
+still need the "-fno-*" options.
 
 
 


More information about the pypy-commit mailing list