[pypy-commit] stmgc evaluation: merge c8-efficient-serial-execution-master
Raemi
pypy.commits at gmail.com
Mon Mar 26 03:06:11 EDT 2018
Author: Remi Meier <remi.meier at gmail.com>
Branch: evaluation
Changeset: r2158:ce8f8880e0e2
Date: 2018-03-26 09:05 +0200
http://bitbucket.org/pypy/stmgc/changeset/ce8f8880e0e2/
Log: merge c8-efficient-serial-execution-master
diff --git a/c8/demo/Makefile b/c8/demo/Makefile
--- a/c8/demo/Makefile
+++ b/c8/demo/Makefile
@@ -17,7 +17,7 @@
H_FILES = ../stmgc.h ../stm/*.h
C_FILES = ../stmgc.c ../stm/*.c
-COMMON = -I.. -pthread -lrt -g -Wall -Werror -DSTM_LARGEMALLOC_TEST
+COMMON = -I.. -pthread -lrt -lm -g -Wall -Werror -DSTM_LARGEMALLOC_TEST
CC = gcc-seg-gs
diff --git a/c8/stm/core.c b/c8/stm/core.c
--- a/c8/stm/core.c
+++ b/c8/stm/core.c
@@ -381,6 +381,14 @@
static void readd_wb_executed_flags(void);
static void check_all_write_barrier_flags(char *segbase, struct list_s *list);
+static void signal_commit_to_inevitable_transaction(void) {
+ struct stm_priv_segment_info_s* inevitable_segement = get_inevitable_thread_segment();
+ if (inevitable_segement != 0) {
+ // the inevitable thread is still running: set its "please commit" flag (is ignored by the inevitable thread if it is atomic)
+ inevitable_segement->commit_if_not_atomic = true;
+ }
+}
+
static void wait_for_inevitable(void)
{
intptr_t detached = 0;
@@ -397,6 +405,8 @@
try to detach an inevitable transaction regularly */
detached = fetch_detached_transaction();
if (detached == 0) {
+ // the inevitable trx was not detached or it was detached but is atomic
+ signal_commit_to_inevitable_transaction();
EMIT_WAIT(STM_WAIT_OTHER_INEVITABLE);
if (!cond_wait_timeout(C_SEGMENT_FREE_OR_SAFE_POINT_REQ, 0.00001))
goto wait_some_more;
@@ -1138,11 +1148,10 @@
}
_do_start_transaction(tl);
- if (repeat_count == 0) { /* else, 'nursery_mark' was already set
- in abort_data_structures_from_segment_num() */
- STM_SEGMENT->nursery_mark = ((stm_char *)_stm_nursery_start +
- stm_fill_mark_nursery_bytes);
- }
+ STM_PSEGMENT->commit_if_not_atomic = false;
+ STM_SEGMENT->nursery_mark = ((stm_char *)_stm_nursery_start +
+ stm_get_transaction_length(tl));
+
return repeat_count;
}
@@ -1271,6 +1280,8 @@
bool was_inev = STM_PSEGMENT->transaction_state == TS_INEVITABLE;
_validate_and_add_to_commit_log();
+
+ stm_thread_local_t *tl_for_trx_len = STM_SEGMENT->running_thread;
if (external) {
/* from this point on, unlink the original 'stm_thread_local_t *'
from its segment. Better do it as soon as possible, because
@@ -1318,6 +1329,8 @@
s_mutex_unlock();
+ stm_transaction_length_handle_validation(tl_for_trx_len, false);
+
/* between transactions, call finalizers. this will execute
a transaction itself */
if (tl != NULL)
@@ -1484,22 +1497,6 @@
if (pseg->active_queues)
queues_deactivate_all(pseg, /*at_commit=*/false);
-
- /* Set the next nursery_mark: first compute the value that
- nursery_mark must have had at the start of the aborted transaction */
- stm_char *old_mark =pseg->pub.nursery_mark + pseg->total_throw_away_nursery;
-
- /* This means that the limit, in term of bytes, was: */
- uintptr_t old_limit = old_mark - (stm_char *)_stm_nursery_start;
-
- /* If 'total_throw_away_nursery' is smaller than old_limit, use that */
- if (pseg->total_throw_away_nursery < old_limit)
- old_limit = pseg->total_throw_away_nursery;
-
- /* Now set the new limit to 90% of the old limit */
- pseg->pub.nursery_mark = ((stm_char *)_stm_nursery_start +
- (uintptr_t)(old_limit * 0.9));
-
#ifdef STM_NO_AUTOMATIC_SETJMP
did_abort = 1;
#endif
@@ -1534,6 +1531,8 @@
tl->self_or_0_if_atomic = (intptr_t)tl; /* clear the 'atomic' flag */
STM_PSEGMENT->atomic_nesting_levels = 0;
+ stm_transaction_length_handle_validation(tl, true);
+
if (tl->mem_clear_on_abort)
memset(tl->mem_clear_on_abort, 0, tl->mem_bytes_to_clear_on_abort);
if (tl->mem_reset_on_abort) {
@@ -1588,7 +1587,7 @@
void _stm_become_inevitable(const char *msg)
{
- int num_waits = 0;
+ int num_waits = 1;
timing_become_inevitable();
@@ -1599,50 +1598,48 @@
_stm_collectable_safe_point();
dprintf(("become_inevitable: %s\n", msg));
- if (any_soon_finished_or_inevitable_thread_segment() &&
- num_waits <= NB_SEGMENTS) {
+ if (any_soon_finished_or_inevitable_thread_segment()) {
#if STM_TESTS /* for tests: another transaction */
stm_abort_transaction(); /* is already inevitable, abort */
#endif
- bool timed_out = false;
+ signal_commit_to_inevitable_transaction();
s_mutex_lock();
if (any_soon_finished_or_inevitable_thread_segment() &&
- !safe_point_requested()) {
+ !safe_point_requested() &&
+ num_waits <= NB_SEGMENTS) {
/* wait until C_SEGMENT_FREE_OR_SAFE_POINT_REQ is signalled */
EMIT_WAIT(STM_WAIT_OTHER_INEVITABLE);
- if (!cond_wait_timeout(C_SEGMENT_FREE_OR_SAFE_POINT_REQ,
- 0.000054321))
- timed_out = true;
+ if (cond_wait_timeout(C_SEGMENT_FREE_OR_SAFE_POINT_REQ, 0.00001)) {
+ num_waits++;
+ }
}
s_mutex_unlock();
-
- if (timed_out) {
- /* try to detach another inevitable transaction, but
- only after waiting a bit. This is necessary to avoid
- deadlocks in some situations, which are hopefully
- not too common. We don't want two threads constantly
- detaching each other. */
- intptr_t detached = fetch_detached_transaction();
- if (detached != 0) {
- EMIT_WAIT_DONE();
- commit_fetched_detached_transaction(detached);
- }
- }
- else {
- num_waits++;
+ /* XXX try to detach another inevitable transaction, but
+ only after waiting a bit. This is necessary to avoid
+ deadlocks in some situations, which are hopefully
+ not too common. We don't want two threads constantly
+ detaching each other. */
+ intptr_t detached = fetch_detached_transaction();
+ if (detached != 0) {
+ EMIT_WAIT_DONE();
+ commit_fetched_detached_transaction(detached);
+ EMIT_WAIT(STM_WAIT_OTHER_INEVITABLE);
}
goto retry_from_start;
}
- EMIT_WAIT_DONE();
- if (!_validate_and_turn_inevitable())
- goto retry_from_start;
+ else {
+ EMIT_WAIT_DONE();
+ if (!_validate_and_turn_inevitable()) {
+ EMIT_WAIT(STM_WAIT_OTHER_INEVITABLE);
+ goto retry_from_start;
+ }
+ }
}
- else {
- if (!_validate_and_turn_inevitable())
- return;
+ else if (!_validate_and_turn_inevitable()) {
+ return;
}
/* There may be a concurrent commit of a detached Tx going on.
@@ -1654,6 +1651,7 @@
stm_spin_loop();
assert(_stm_detached_inevitable_from_thread == 0);
+ STM_PSEGMENT->commit_if_not_atomic = false;
soon_finished_or_inevitable_thread_segment();
STM_PSEGMENT->transaction_state = TS_INEVITABLE;
diff --git a/c8/stm/core.h b/c8/stm/core.h
--- a/c8/stm/core.h
+++ b/c8/stm/core.h
@@ -168,6 +168,9 @@
/* For stm_enable_atomic() */
uintptr_t atomic_nesting_levels;
+
+ // TODO signal flag that is checked in throw_away_nursery() for making immediate commit
+ bool commit_if_not_atomic;
};
enum /* safe_point */ {
diff --git a/c8/stm/detach.c b/c8/stm/detach.c
--- a/c8/stm/detach.c
+++ b/c8/stm/detach.c
@@ -215,6 +215,7 @@
}
}
+// TODO write tests, verify is working, verify no overflows with adaptive mode
uintptr_t stm_is_atomic(stm_thread_local_t *tl)
{
assert(STM_SEGMENT->running_thread == tl);
@@ -228,14 +229,18 @@
return STM_PSEGMENT->atomic_nesting_levels;
}
+// max intptr_t value is 7FFFFFFFFFFFFFFF on 64-bit => larger than 2 * huge value
#define HUGE_INTPTR_VALUE 0x3000000000000000L
void stm_enable_atomic(stm_thread_local_t *tl)
{
if (!stm_is_atomic(tl)) {
+ // do for outermost atomic block only
tl->self_or_0_if_atomic = 0;
/* increment 'nursery_mark' by HUGE_INTPTR_VALUE, so that
- stm_should_break_transaction() returns always false */
+ stm_should_break_transaction() returns always false.
+ preserves the previous nursery_mark, unless it is < 0
+ or >= huge value */
intptr_t mark = (intptr_t)STM_SEGMENT->nursery_mark;
if (mark < 0)
mark = 0;
@@ -255,6 +260,7 @@
STM_PSEGMENT->atomic_nesting_levels--;
if (STM_PSEGMENT->atomic_nesting_levels == 0) {
+ // revert changes by stm_enable_atomic only if we left the outermost atomic block
tl->self_or_0_if_atomic = (intptr_t)tl;
/* decrement 'nursery_mark' by HUGE_INTPTR_VALUE, to cancel
what was done in stm_enable_atomic() */
diff --git a/c8/stm/nursery.c b/c8/stm/nursery.c
--- a/c8/stm/nursery.c
+++ b/c8/stm/nursery.c
@@ -4,6 +4,8 @@
#endif
#include "finalizer.h"
+#include <math.h>
+#include <inttypes.h>
/************************************************************/
@@ -13,14 +15,77 @@
static uintptr_t _stm_nursery_start;
+#define DEFAULT_FILL_MARK_NURSERY_BYTES (NURSERY_SIZE / 4)
-#define DEFAULT_FILL_MARK_NURSERY_BYTES (NURSERY_SIZE / 4)
+// corresponds to ~4 GB
+#define LARGE_FILL_MARK_NURSERY_BYTES 0x100000000L
-uintptr_t stm_fill_mark_nursery_bytes = DEFAULT_FILL_MARK_NURSERY_BYTES;
+// corresponds to ~4 MB nursery fill
+#define STM_DEFAULT_RELATIVE_TRANSACTION_LENGTH (0.001)
+// corresponds to ~400 KB nursery fill
+#define STM_MIN_RELATIVE_TRANSACTION_LENGTH (0.0001)
+
+#define BACKOFF_COUNT (20)
+#define BACKOFF_MULTIPLIER (BACKOFF_COUNT / -log10(STM_MIN_RELATIVE_TRANSACTION_LENGTH))
+
+static inline void set_backoff(stm_thread_local_t *tl, double rel_trx_len) {
+ /* the shorter the trx, the more backoff:
+ think a*x + b = backoff, x := -log(rel-trx-len),
+ backoff is <BACKOFF_COUNT> + b at default trx length,
+ linear decrease to b at max trx length */
+ const int b = 5;
+ int new_backoff = (int)((BACKOFF_MULTIPLIER * -log10(rel_trx_len)) + b);
+ tl->transaction_length_backoff = new_backoff;
+ // printf("thread %d, backoff %d\n", tl->thread_local_counter, tl->transaction_length_backoff);
+ tl->linear_transaction_length_increment = rel_trx_len / new_backoff;
+}
+
+static inline double get_new_transaction_length(stm_thread_local_t *tl, bool aborts) {
+ const int multiplier = 2;
+ double previous = tl->relative_transaction_length;
+ double new;
+ if (aborts) {
+ new = previous / multiplier;
+ if (new < STM_MIN_RELATIVE_TRANSACTION_LENGTH) {
+ new = STM_MIN_RELATIVE_TRANSACTION_LENGTH;
+ }
+ set_backoff(tl, new);
+ } else if (tl->transaction_length_backoff == 0) {
+ // backoff counter is zero, exponential increase up to 1
+ new = previous * multiplier;
+ if (new > 1) {
+ new = 1;
+ }
+ if (tl->linear_transaction_length_increment != 0) {
+ // thread had to abort before: slow start
+ set_backoff(tl, new);
+ }
+ } else { // not abort and backoff != 0
+ // in backoff, linear increase up to 1
+ new = previous + tl->linear_transaction_length_increment;
+ if (new > 1) {
+ new = 1;
+ }
+ tl->transaction_length_backoff -= 1;
+ }
+ return new;
+}
+
+static inline void stm_transaction_length_handle_validation(stm_thread_local_t *tl, bool aborts) {
+ tl->relative_transaction_length = get_new_transaction_length(tl, aborts);
+}
+
+static inline uintptr_t stm_get_transaction_length(stm_thread_local_t *tl) {
+ double relative_additional_length = tl->relative_transaction_length;
+ uintptr_t result =
+ (uintptr_t)(LARGE_FILL_MARK_NURSERY_BYTES * relative_additional_length);
+ // printf("%020" PRIxPTR "\n", result);
+ return result;
+}
+
/************************************************************/
-
static void setup_nursery(void)
{
assert(_STM_FAST_ALLOC <= NURSERY_SIZE);
@@ -500,6 +565,14 @@
pseg->pub.nursery_current = (stm_char *)_stm_nursery_start;
pseg->pub.nursery_mark -= nursery_used;
+ assert((pseg->transaction_state == TS_INEVITABLE) || !pseg->commit_if_not_atomic);
+ if (pseg->commit_if_not_atomic
+ && pseg->transaction_state == TS_INEVITABLE
+ && pseg->pub.running_thread->self_or_0_if_atomic != 0) {
+ // transaction is inevitable, not atomic, and commit has been signalled by waiting thread: commit immediately
+ pseg->pub.nursery_mark = 0;
+ }
+
/* free any object left from 'young_outside_nursery' */
if (!tree_is_cleared(pseg->young_outside_nursery)) {
wlog_t *item;
diff --git a/c8/stm/nursery.h b/c8/stm/nursery.h
--- a/c8/stm/nursery.h
+++ b/c8/stm/nursery.h
@@ -56,4 +56,7 @@
static inline struct object_s *mark_loc(object_t *obj);
static inline bool _is_from_same_transaction(object_t *obj);
+static inline void stm_transaction_length_handle_validation(stm_thread_local_t *tl, bool aborts);
+static inline uintptr_t stm_get_transaction_length(stm_thread_local_t *tl);
+
#endif
diff --git a/c8/stm/setup.c b/c8/stm/setup.c
--- a/c8/stm/setup.c
+++ b/c8/stm/setup.c
@@ -277,6 +277,12 @@
numbers automatically. */
tl->last_associated_segment_num = num + 1;
tl->thread_local_counter = ++thread_local_counters;
+
+ /* init adaptive transaction length mode */
+ tl->relative_transaction_length = STM_DEFAULT_RELATIVE_TRANSACTION_LENGTH;
+ tl->transaction_length_backoff = 0;
+ tl->linear_transaction_length_increment = 0;
+
*_get_cpth(tl) = pthread_self();
_init_shadow_stack(tl);
set_gs_register(get_segment_base(num + 1));
diff --git a/c8/stm/sync.c b/c8/stm/sync.c
--- a/c8/stm/sync.c
+++ b/c8/stm/sync.c
@@ -176,6 +176,18 @@
/************************************************************/
+#if 0
+static uint8_t number_of_segments_in_use(void) {
+ uint8_t result = 0;
+ int num;
+ for (num = 1; num < NB_SEGMENTS; num++) {
+ if (sync_ctl.in_use1[num] > 0) {
+ result++;
+ }
+ }
+ return result;
+}
+#endif
#if 0
void stm_wait_for_current_inevitable_transaction(void)
@@ -202,7 +214,6 @@
}
#endif
-
static void acquire_thread_segment(stm_thread_local_t *tl)
{
/* This function acquires a segment for the currently running thread,
@@ -293,6 +304,19 @@
return false;
}
+static struct stm_priv_segment_info_s* get_inevitable_thread_segment(void)
+{
+ struct stm_priv_segment_info_s* segment;
+ int num;
+ for (num = 1; num < NB_SEGMENTS; num++) {
+ segment = get_priv_segment(num);
+ if (segment->transaction_state == TS_INEVITABLE) {
+ return segment;
+ }
+ }
+ return 0;
+}
+
__attribute__((unused))
static bool _seems_to_be_running_transaction(void)
{
diff --git a/c8/stm/sync.h b/c8/stm/sync.h
--- a/c8/stm/sync.h
+++ b/c8/stm/sync.h
@@ -29,6 +29,7 @@
static void release_thread_segment(stm_thread_local_t *tl);
static void soon_finished_or_inevitable_thread_segment(void);
static bool any_soon_finished_or_inevitable_thread_segment(void);
+static struct stm_priv_segment_info_s* get_inevitable_thread_segment(void);
enum sync_type_e {
STOP_OTHERS_UNTIL_MUTEX_UNLOCK,
diff --git a/c8/stmgc.h b/c8/stmgc.h
--- a/c8/stmgc.h
+++ b/c8/stmgc.h
@@ -88,6 +88,13 @@
struct stm_thread_local_s *prev, *next;
intptr_t self_or_0_if_atomic;
void *creating_pthread[2];
+ /* == adaptive single thread mode == */
+ /* factor that is multiplied with max transaction length before the start of the next transaction on this thread */
+ double relative_transaction_length;
+ /* when zero, transaction length may increase exponentially, otherwise transaction length may only increase linearly. is (re-)set to some value upon abort and counted down until zero upon successful validation. */
+ int transaction_length_backoff;
+ /* during the backoff, transaction length may increase linearly by this increment on every successful validation */
+ double linear_transaction_length_increment;
} stm_thread_local_t;
@@ -202,7 +209,7 @@
/* ==================== PUBLIC API ==================== */
/* Number of segments (i.e. how many transactions can be executed in
- parallel, in maximum). If you try to start transactions in more
+ parallel, at maximum). If you try to start transactions in more
threads than the number of segments, it will block, waiting for the
next segment to become free.
*/
@@ -464,14 +471,6 @@
return ((intptr_t)STM_SEGMENT->nursery_current >=
(intptr_t)STM_SEGMENT->nursery_mark);
}
-extern uintptr_t stm_fill_mark_nursery_bytes;
-/* ^^^ at the start of a transaction, 'nursery_mark' is initialized to
- 'stm_fill_mark_nursery_bytes' inside the nursery. This value can
- be larger than the nursery; every minor collection shifts the
- current 'nursery_mark' down by one nursery-size. After an abort
- and restart, 'nursery_mark' is set to ~90% of the value it reached
- in the last attempt.
-*/
/* "atomic" transaction: a transaction where stm_should_break_transaction()
always returns false, and where stm_leave_transactional_zone() never
@@ -575,21 +574,49 @@
STM_GC_MAJOR_START,
STM_GC_MAJOR_DONE,
+ /* execution duration profiling events */
+ STM_WARMUP_COMPLETE,
+
+ STM_DURATION_START_TRX,
+ STM_DURATION_WRITE_GC_ONLY,
+ STM_DURATION_WRITE_SLOWPATH,
+ STM_DURATION_VALIDATION,
+ STM_DURATION_CREATE_CLE,
+ STM_DURATION_COMMIT_EXCEPT_GC,
+ STM_DURATION_MINOR_GC,
+ STM_DURATION_MAJOR_GC_LOG_ONLY,
+ STM_DURATION_MAJOR_GC_FULL,
+
+ STM_SINGLE_THREAD_MODE_ON,
+ STM_SINGLE_THREAD_MODE_OFF,
+ STM_SINGLE_THREAD_MODE_ADAPTIVE,
+
_STM_EVENT_N
};
-#define STM_EVENT_NAMES \
- "transaction start", \
- "transaction commit", \
- "transaction abort", \
- "contention write read", \
- "wait free segment", \
- "wait other inevitable", \
- "wait done", \
- "gc minor start", \
- "gc minor done", \
- "gc major start", \
- "gc major done"
+#define STM_EVENT_NAMES \
+ "transaction start", \
+ "transaction commit", \
+ "transaction abort", \
+ "contention write read", \
+ "wait free segment", \
+ "wait other inevitable", \
+ "wait done", \
+ "gc minor start", \
+ "gc minor done", \
+ "gc major start", \
+ "gc major done", \
+ /* names of duration events */ \
+ "marks completion of benchmark warm up phase" \
+ "duration of transaction start", \
+ "duration of gc due to write", \
+ "duration of write slowpath", \
+ "duration of validation", \
+ "duration of commit log entry creation", \
+ "duration of commit except gc", \
+ "duration of minor gc", \
+ "duration of major gc doing log clean up only", \
+ "duration of full major gc"
/* The markers pushed in the shadowstack are an odd number followed by a
regular object pointer. */
diff --git a/c8/test/support.py b/c8/test/support.py
--- a/c8/test/support.py
+++ b/c8/test/support.py
@@ -45,7 +45,6 @@
} stm_thread_local_t;
char *stm_object_pages;
-uintptr_t stm_fill_mark_nursery_bytes;
void stm_read(object_t *obj);
/*void stm_write(object_t *obj); use _checked_stm_write() instead */
@@ -671,7 +670,7 @@
undef_macros=['NDEBUG'],
include_dirs=[parent_dir],
extra_compile_args=['-g', '-O0', '-Werror', '-Wall'], #, '-ferror-limit=5'],
- extra_link_args=['-g', '-lrt'],
+ extra_link_args=['-g', '-lrt', '-lm'],
force_generic_engine=True)
diff --git a/c8/test/test_basic.py b/c8/test/test_basic.py
--- a/c8/test/test_basic.py
+++ b/c8/test/test_basic.py
@@ -736,6 +736,7 @@
self.check_char_everywhere(lp1, 'X')
def test_stm_should_break_transaction_1(self):
+ py.test.skip("replaced by tcp logic")
lib.stm_fill_mark_nursery_bytes = 100
#
self.start_transaction()
@@ -772,6 +773,7 @@
self.commit_transaction()
def test_stm_should_break_transaction_2(self):
+ py.test.skip("replaced by tcp logic")
lib.stm_fill_mark_nursery_bytes = 10000000
#
n = 10000000
diff --git a/gcc-seg-gs/README.txt b/gcc-seg-gs/README.txt
--- a/gcc-seg-gs/README.txt
+++ b/gcc-seg-gs/README.txt
@@ -8,9 +8,8 @@
compile the standard gcc. Of course, it is likely that gcc 6.1 will
soon be available from your Linux distribution directly.
-Note that with gcc 6.1, you no longer need gcc-5.1.0-patch.diff, and you
-should not need the "-fno-*" options either (but we didn't check that
-yet).
+Note that with gcc 6.1, you no longer need gcc-5.1.0-patch.diff, but you
+still need the "-fno-*" options.
More information about the pypy-commit
mailing list