[pypy-commit] stmgc c7-refactor: In-progress: another refactoring simplifying various things, after discovering that we
arigo
noreply at buildbot.pypy.org
Sun Feb 23 22:21:14 CET 2014
Author: Armin Rigo <arigo at tunes.org>
Branch: c7-refactor
Changeset: r817:8ee070f3575c
Date: 2014-02-23 22:21 +0100
http://bitbucket.org/pypy/stmgc/changeset/8ee070f3575c/
Log: In-progress: another refactoring simplifying various things, after
discovering that we can after all run a minor collection after each
transaction... At least running about 30'000 opcodes in PyPy
consumes typically 400 KB of nursery, and lowering the nursery size
from 4 MB down to 400 KB has only a 1.5% performance impact.
diff --git a/c7/stm/core.c b/c7/stm/core.c
--- a/c7/stm/core.c
+++ b/c7/stm/core.c
@@ -12,6 +12,9 @@
void _stm_write_slowpath(object_t *obj)
{
assert(_running_transaction());
+ assert(!_is_in_nursery(obj));
+ abort();//...
+#if 0
/* for old objects from the same transaction, we are done now */
if (obj_from_same_transaction(obj)) {
@@ -51,9 +54,10 @@
/* claim the write-lock for this object */
retry:;
- uintptr_t lock_idx = (((uintptr_t)obj) >> 4) - READMARKER_START;
+ uintptr_t lock_idx = (((uintptr_t)obj) >> 4) - WRITELOCK_START;
uint8_t lock_num = STM_PSEGMENT->write_lock_num;
uint8_t prev_owner;
+ assert((intptr_t)lock_idx >= 0);
prev_owner = __sync_val_compare_and_swap(&write_locks[lock_idx],
0, lock_num);
@@ -73,6 +77,7 @@
assert(!(obj->stm_flags & GCFLAG_WRITE_BARRIER_CALLED));
obj->stm_flags |= GCFLAG_WRITE_BARRIER_CALLED;
LIST_APPEND(STM_PSEGMENT->modified_objects, obj);
+#endif
}
static void reset_transaction_read_version(void)
@@ -137,13 +142,11 @@
reset_transaction_read_version();
}
- STM_PSEGMENT->min_read_version_outside_nursery =
- STM_SEGMENT->transaction_read_version;
+ assert(list_is_empty(STM_PSEGMENT->modified_old_objects));
- assert(list_is_empty(STM_PSEGMENT->modified_objects));
- assert(list_is_empty(STM_PSEGMENT->creation_markers));
-
- align_nursery_at_transaction_start();
+#ifdef STM_TESTS
+ check_nursery_at_transaction_start();
+#endif
}
@@ -158,8 +161,6 @@
long remote_num = 1 - STM_SEGMENT->segment_num;
char *remote_base = get_segment_base(remote_num);
uint8_t remote_version = get_segment(remote_num)->transaction_read_version;
- uint8_t remote_min_outside_nursery =
- get_priv_segment(remote_num)->min_read_version_outside_nursery;
switch (get_priv_segment(remote_num)->transaction_state) {
case TS_NONE:
@@ -169,11 +170,10 @@
}
LIST_FOREACH_R(
- STM_PSEGMENT->modified_objects,
+ STM_PSEGMENT->modified_old_objects,
object_t * /*item*/,
({
- if (was_read_remote(remote_base, item, remote_version,
- remote_min_outside_nursery)) {
+ if (was_read_remote(remote_base, item, remote_version)) {
/* A write-read conflict! */
contention_management(remote_num, false);
@@ -196,25 +196,24 @@
get_priv_segment(remote_num)->transaction_state == TS_INEVITABLE);
LIST_FOREACH_R(
- STM_PSEGMENT->modified_objects,
+ STM_PSEGMENT->modified_old_objects,
object_t * /*item*/,
({
if (remote_active) {
assert(!was_read_remote(remote_base, item,
- get_segment(remote_num)->transaction_read_version,
- get_priv_segment(remote_num)->
- min_read_version_outside_nursery));
+ get_segment(remote_num)->transaction_read_version));
}
/* clear the write-lock (note that this runs with all other
threads paused, so no need to be careful about ordering) */
- uintptr_t lock_idx = (((uintptr_t)item) >> 4) - READMARKER_START;
+ uintptr_t lock_idx = (((uintptr_t)item) >> 4) - WRITELOCK_START;
+ assert((intptr_t)lock_idx >= 0);
assert(write_locks[lock_idx] == STM_PSEGMENT->write_lock_num);
write_locks[lock_idx] = 0;
- /* remove again the WRITE_BARRIER_CALLED flag */
- assert(item->stm_flags & GCFLAG_WRITE_BARRIER_CALLED);
- item->stm_flags &= ~GCFLAG_WRITE_BARRIER_CALLED;
+ /* set again the WRITE_BARRIER flag */
+ assert((item->stm_flags & GCFLAG_WRITE_BARRIER) == 0);
+ item->stm_flags |= GCFLAG_WRITE_BARRIER;
/* copy the modified object to the other segment */
char *src = REAL_ADDRESS(local_base, item);
@@ -223,7 +222,7 @@
memcpy(dst, src, size);
}));
- list_clear(STM_PSEGMENT->modified_objects);
+ list_clear(STM_PSEGMENT->modified_old_objects);
}
static void _finish_transaction(void)
@@ -232,29 +231,23 @@
release_thread_segment(tl);
STM_PSEGMENT->safe_point = SP_NO_TRANSACTION;
STM_PSEGMENT->transaction_state = TS_NONE;
- list_clear(STM_PSEGMENT->old_objects_pointing_to_young);
+ if (STM_PSEGMENT->overflow_objects_pointing_to_nursery != NULL) {
+ list_free(STM_PSEGMENT->overflow_objects_pointing_to_nursery);
+ STM_PSEGMENT->overflow_objects_pointing_to_nursery = NULL;
+ }
}
void stm_commit_transaction(void)
{
+ minor_collection();
+
mutex_lock();
assert(STM_PSEGMENT->safe_point = SP_RUNNING);
STM_PSEGMENT->safe_point = SP_SAFE_POINT_CAN_COLLECT;
restart:
- switch (STM_PSEGMENT->transaction_state) {
-
- case TS_REGULAR:
- case TS_INEVITABLE:
- break;
-
- case TS_MUST_ABORT:
- abort_with_mutex();
-
- default:
- assert(!"commit: bad transaction_state");
- }
+ abort_if_needed();
/* wait until the other thread is at a safe-point */
if (!try_wait_for_other_safe_points(SP_SAFE_POINT_CANNOT_COLLECT))
@@ -275,10 +268,11 @@
/* copy modified object versions to other threads */
push_modified_to_other_segments();
- /* reset the creation markers, and if necessary (i.e. if the page the
- data is on is not SHARED) copy the data to other threads. The
- hope is that it's rarely necessary. */
- reset_all_creation_markers_and_push_created_data();
+ /* update 'overflow_number' if needed */
+ if (STM_PSEGMENT->overflow_objects_pointing_to_nursery != NULL) {
+ highest_overflow_number += GCFLAG_OVERFLOW_NUMBER_bit0;
+ STM_PSEGMENT->overflow_number = highest_overflow_number;
+ }
/* done */
_finish_transaction();
@@ -297,6 +291,8 @@
static void reset_modified_from_other_segments(void)
{
+ abort();//...
+#if 0
/* pull the right versions from other threads in order
to reset our pages as part of an abort */
long remote_num = 1 - STM_SEGMENT->segment_num;
@@ -304,7 +300,7 @@
char *remote_base = get_segment_base(remote_num);
LIST_FOREACH_R(
- STM_PSEGMENT->modified_objects,
+ STM_PSEGMENT->modified_old_objects,
object_t * /*item*/,
({
/* all objects in 'modified_objects' have this flag */
@@ -331,12 +327,14 @@
write_fence();
/* clear the write-lock */
- uintptr_t lock_idx = (((uintptr_t)item) >> 4) - READMARKER_START;
+ uintptr_t lock_idx = (((uintptr_t)item) >> 4) - WRITELOCK_START;
+ assert((intptr_t)lock_idx >= 0);
assert(write_locks[lock_idx]);
write_locks[lock_idx] = 0;
}));
list_clear(STM_PSEGMENT->modified_objects);
+#endif
}
static void abort_with_mutex(void)
@@ -356,8 +354,6 @@
/* reset all the modified objects (incl. re-adding GCFLAG_WRITE_BARRIER) */
reset_modified_from_other_segments();
- reset_all_creation_markers();
-
stm_jmpbuf_t *jmpbuf_ptr = STM_SEGMENT->jmpbuf_ptr;
stm_thread_local_t *tl = STM_SEGMENT->running_thread;
tl->shadowstack = STM_PSEGMENT->shadowstack_at_start_of_transaction;
diff --git a/c7/stm/core.h b/c7/stm/core.h
--- a/c7/stm/core.h
+++ b/c7/stm/core.h
@@ -11,8 +11,8 @@
#define NB_PAGES (1500*256) // 1500MB
#define NB_SEGMENTS 2
+#define NB_SEGMENTS_MAX 240 /* don't increase NB_SEGMENTS past this */
#define MAP_PAGES_FLAGS (MAP_SHARED | MAP_ANONYMOUS | MAP_NORESERVE)
-#define LARGE_OBJECT_WORDS 36
#define NB_NURSERY_PAGES 1024 // 4MB
#define TOTAL_MEMORY (NB_PAGES * 4096UL * NB_SEGMENTS)
@@ -25,23 +25,31 @@
#define FIRST_READMARKER_PAGE (READMARKER_START / 4096UL)
#define NB_READMARKER_PAGES (FIRST_OBJECT_PAGE - FIRST_READMARKER_PAGE)
-#define CREATMARKER_START ((FIRST_OBJECT_PAGE * 4096UL) >> 8)
-#define FIRST_CREATMARKER_PAGE (CREATMARKER_START / 4096UL)
+#define WRITELOCK_START ((END_NURSERY_PAGE * 4096UL) >> 4)
+#define WRITELOCK_END READMARKER_END
-enum {
- /* this flag is not set on most objects. when stm_write() is called
- on an object that is not from the current transaction, then
- _stm_write_slowpath() is called, and then the flag is set to
- say "called once already, no need to call again". */
- GCFLAG_WRITE_BARRIER_CALLED = _STM_GCFLAG_WRITE_BARRIER_CALLED,
- /* allocated by gcpage.c in uniformly-sized pages of small objects */
+enum /* stm_flags */ {
+ /* This flag is set on non-nursery objects. It forces stm_write()
+ to call _stm_write_slowpath().
+ */
+ GCFLAG_WRITE_BARRIER = _STM_GCFLAG_WRITE_BARRIER,
+
+ /* This flag is set by gcpage.c for all objects living in
+ uniformly-sized pages of small objects.
+ */
GCFLAG_SMALL_UNIFORM = 0x02,
+
+ /* All remaining bits of the 32-bit 'stm_flags' field are taken by
+ the "overflow number". This is a number that identifies the
+ "overflow objects" from the current transaction among all old
+ objects. More precisely, overflow objects are objects from the
+ current transaction that have been flushed out of the nursery,
+ which occurs if the same transaction allocates too many objects.
+ */
+ GCFLAG_OVERFLOW_NUMBER_bit0 = 0x04 /* must be last */
};
-#define CROSS_PAGE_BOUNDARY(start, stop) \
- (((uintptr_t)(start)) / 4096UL != ((uintptr_t)(stop)) / 4096UL)
-
/************************************************************/
@@ -52,44 +60,61 @@
struct stm_priv_segment_info_s {
struct stm_segment_info_s pub;
- struct list_s *old_objects_pointing_to_young;
- struct list_s *modified_objects;
- struct list_s *creation_markers;
+
+ /* List of overflowed objects (from the same transaction but outside
+ the nursery) on which the write-barrier was triggered, so that
+ they likely contain a pointer to a nursery object */
+ struct list_s *overflow_objects_pointing_to_nursery;
+
+ /* List of old objects (older than the current transaction) that the
+ current transaction attempts to modify */
+ struct list_s *modified_old_objects;
+
+ /* Start time: to know approximately for how long a transaction has
+ been running, in contention management */
uint64_t start_time;
+
+ /* This is the number stored in the overflowed objects (a multiple of
+ GCFLAG_OVERFLOW_NUMBER_bit0). It is incremented when the
+ transaction is done, but only if we actually overflowed any
+ object; otherwise, no object has got this number. */
+ uint32_t overflow_number;
+
+ /* The marker stored in the global 'write_locks' array to mean
+ "this segment has modified this old object". */
uint8_t write_lock_num;
- uint8_t safe_point; /* one of the SP_xxx constants */
- uint8_t transaction_state; /* one of the TS_xxx constants */
- uint8_t min_read_version_outside_nursery; /* see was_read_remote() */
- uintptr_t real_nursery_section_end;
+
+ /* The thread's safe-point state, one of the SP_xxx constants */
+ uint8_t safe_point;
+
+ /* The transaction status, one of the TS_xxx constants */
+ uint8_t transaction_state;
+
+ /* In case of abort, we restore the 'shadowstack' field. */
object_t **shadowstack_at_start_of_transaction;
};
-enum {
+enum /* safe_point */ {
SP_NO_TRANSACTION=0,
SP_RUNNING,
SP_SAFE_POINT_CANNOT_COLLECT,
SP_SAFE_POINT_CAN_COLLECT,
};
-enum {
+enum /* transaction_state */ {
TS_NONE=0,
TS_REGULAR,
TS_INEVITABLE,
TS_MUST_ABORT,
};
-enum { /* for stm_creation_marker_t */
- CM_NOT_CURRENT_TRANSACTION = 0x00,
- CM_CURRENT_TRANSACTION_OUTSIDE_NURSERY = 0x01,
- CM_CURRENT_TRANSACTION_IN_NURSERY = 0xff,
-};
static char *stm_object_pages;
-static stm_thread_local_t *stm_thread_locals = NULL;
+static stm_thread_local_t *stm_all_thread_locals = NULL;
#ifdef STM_TESTS
static char *stm_other_pages;
#endif
-static uint8_t write_locks[READMARKER_END - READMARKER_START];
+static uint8_t write_locks[WRITELOCK_END - WRITELOCK_START];
#define REAL_ADDRESS(segment_base, src) ((segment_base) + (uintptr_t)(src))
@@ -113,14 +138,18 @@
static bool _is_tl_registered(stm_thread_local_t *tl);
static bool _running_transaction(void);
-static inline bool obj_from_same_transaction(object_t *obj) {
- return ((stm_creation_marker_t *)(((uintptr_t)obj) >> 8))->cm !=
- CM_NOT_CURRENT_TRANSACTION;
-}
-
static void teardown_core(void);
static void abort_with_mutex(void) __attribute__((noreturn));
+static inline bool was_read_remote(char *base, object_t *obj,
+ uint8_t other_transaction_read_version)
+{
+ uint8_t rm = ((struct stm_read_marker_s *)
+ (base + (((uintptr_t)obj) >> 4)))->rm;
+ assert(rm <= other_transaction_read_version);
+ return rm == other_transaction_read_version;
+}
+
static inline void _duck(void) {
/* put a call to _duck() between two instructions that set 0 into
a %gs-prefixed address and that may otherwise be replaced with
@@ -129,3 +158,17 @@
llvmfix/no-memset-creation-with-addrspace.diff. */
asm("/* workaround for llvm bug */");
}
+
+static inline void abort_if_needed(void) {
+ switch (STM_PSEGMENT->transaction_state) {
+ case TS_REGULAR:
+ case TS_INEVITABLE:
+ break;
+
+ case TS_MUST_ABORT:
+ abort_with_mutex();
+
+ default:
+ assert(!"commit: bad transaction_state");
+ }
+}
diff --git a/c7/stm/gcpage.c b/c7/stm/gcpage.c
--- a/c7/stm/gcpage.c
+++ b/c7/stm/gcpage.c
@@ -19,8 +19,7 @@
static void teardown_gcpage(void)
{
- memset(small_alloc_shared, 0, sizeof(small_alloc_shared));
- memset(small_alloc_privtz, 0, sizeof(small_alloc_privtz));
+ memset(small_alloc, 0, sizeof(small_alloc));
free_uniform_pages = NULL;
}
@@ -56,11 +55,10 @@
return;
out_of_memory:
- stm_fatalerror("out of memory!\n");
+ stm_fatalerror("out of memory!\n"); /* XXX */
}
-static char *_allocate_small_slowpath(
- struct small_alloc_s small_alloc[], uint64_t size)
+static char *_allocate_small_slowpath(uint64_t size)
{
/* not thread-safe! Use only when holding the mutex */
assert(_has_mutex());
@@ -72,8 +70,10 @@
}
+#if 0
static char *allocate_outside_nursery_large(uint64_t size)
{
+ abort(); //XXX review
/* not thread-safe! Use only when holding the mutex */
assert(_has_mutex());
@@ -92,11 +92,9 @@
setup_N_pages(uninitialized_page_start, npages);
uninitialized_page_start += npages * 4096UL;
}
-
- assert(get_single_creation_marker((stm_char *)(addr - stm_object_pages))
- == 0);
return addr;
}
+#endif
object_t *_stm_allocate_old(ssize_t size_rounded_up)
{
diff --git a/c7/stm/gcpage.h b/c7/stm/gcpage.h
--- a/c7/stm/gcpage.h
+++ b/c7/stm/gcpage.h
@@ -22,35 +22,17 @@
contiguous range of unallocated objs */
};
-/* For each small request size, we have three independent chained lists
- of address ranges:
-
- - 'small_alloc_shared': ranges are within pages that are likely to be
- shared. We don't know for sure, because pages can be privatized
- by normal run of stm_write().
-
- - 'small_alloc_sh_old': moved from 'small_alloc_shared' when we're
- looking for a range with the creation_marker set; this collects
- the unsuitable ranges, i.e. the ones with already at least one
- object and no creation marker.
-
- - 'small_alloc_privtz': ranges are within pages that are privatized.
-*/
-static struct small_alloc_s small_alloc_shared[GC_N_SMALL_REQUESTS];
-static struct small_alloc_s small_alloc_sh_old[GC_N_SMALL_REQUESTS];
-static struct small_alloc_s small_alloc_privtz[GC_N_SMALL_REQUESTS];
+static struct small_alloc_s small_alloc[GC_N_SMALL_REQUESTS];
static char *free_uniform_pages;
static void setup_gcpage(void);
static void teardown_gcpage(void);
-static char *allocate_outside_nursery_large(uint64_t size);
+//static char *allocate_outside_nursery_large(uint64_t size);
-static char *_allocate_small_slowpath(
- struct small_alloc_s small_alloc[], uint64_t size);
+static char *_allocate_small_slowpath(uint64_t size);
-static inline char *allocate_outside_nursery_small(
- struct small_alloc_s small_alloc[], uint64_t size)
+static inline char *allocate_outside_nursery_small(uint64_t size)
{
uint64_t index = size / 8;
OPT_ASSERT(2 <= index);
@@ -58,7 +40,7 @@
char *result = small_alloc[index].next_object;
if (result == NULL)
- return _allocate_small_slowpath(small_alloc, size);
+ return _allocate_small_slowpath(size);
char *following;
if (small_alloc[index].range_last == result) {
diff --git a/c7/stm/misc.c b/c7/stm/misc.c
--- a/c7/stm/misc.c
+++ b/c7/stm/misc.c
@@ -38,35 +38,28 @@
bool _stm_was_read(object_t *obj)
{
return was_read_remote(STM_SEGMENT->segment_base, obj,
- STM_SEGMENT->transaction_read_version,
- STM_PSEGMENT->min_read_version_outside_nursery);
+ STM_SEGMENT->transaction_read_version);
}
bool _stm_was_written(object_t *obj)
{
- return !!((((stm_creation_marker_t *)(((uintptr_t)obj) >> 8))->cm |
- obj->stm_flags) & _STM_GCFLAG_WRITE_BARRIER_CALLED);
-}
-
-uint8_t _stm_creation_marker(object_t *obj)
-{
- return ((stm_creation_marker_t *)(((uintptr_t)obj) >> 8))->cm;
+ return (obj->stm_flags & _STM_GCFLAG_WRITE_BARRIER) == 0;
}
#ifdef STM_TESTS
-object_t *_stm_enum_old_objects_pointing_to_young(void)
+object_t *_stm_enum_overflow_objects_pointing_to_nursery(void)
{
static long index = 0;
- struct list_s *lst = STM_PSEGMENT->old_objects_pointing_to_young;
+ struct list_s *lst = STM_PSEGMENT->overflow_objects_pointing_to_nursery;
if (index < list_count(lst))
return (object_t *)list_item(lst, index++);
index = 0;
return (object_t *)-1;
}
-object_t *_stm_enum_modified_objects(void)
+object_t *_stm_enum_modified_old_objects(void)
{
static long index = 0;
- struct list_s *lst = STM_PSEGMENT->modified_objects;
+ struct list_s *lst = STM_PSEGMENT->modified_old_objects;
if (index < list_count(lst))
return (object_t *)list_item(lst, index++);
index = 0;
diff --git a/c7/stm/nursery.c b/c7/stm/nursery.c
--- a/c7/stm/nursery.c
+++ b/c7/stm/nursery.c
@@ -4,61 +4,38 @@
/************************************************************/
+/* xxx later: divide the nursery into sections, and zero them
+ incrementally. For now we avoid the mess of maintaining a
+ description of which parts of the nursery are already zeroed
+ and which ones are not (caused by the fact that each
+ transaction fills up a different amount).
+*/
+
#define NURSERY_START (FIRST_NURSERY_PAGE * 4096UL)
#define NURSERY_SIZE (NB_NURSERY_PAGES * 4096UL)
+#define NURSERY_END (NURSERY_START + NURSERY_SIZE)
-/* an object larger than LARGE_OBJECT will never be allocated in
- the nursery. */
-#define LARGE_OBJECT (65*1024)
-
-/* the nursery is divided in "sections" this big. Each section is
- allocated to a single running thread. */
-#define NURSERY_SECTION_SIZE (128*1024)
-
-/* if objects are larger than this limit but smaller than LARGE_OBJECT,
- then they might be allocted outside sections but still in the nursery. */
-#define MEDIUM_OBJECT (6*1024)
-
-/* size in bytes of the "line". Should be equal to the line used by
- stm_creation_marker_t. */
-#define NURSERY_LINE 256
-
-/************************************************************/
-
-
-static union {
- struct {
- uint64_t used; /* number of bytes from the nursery used so far */
- uint64_t initial_value_of_used;
- };
- char reserved[64];
-} nursery_ctl __attribute__((aligned(64)));
-
-static struct list_s *old_objects_pointing_to_young;
+static uintptr_t _stm_nursery_start;
+uintptr_t _stm_nursery_end;
/************************************************************/
static void setup_nursery(void)
{
- assert(NURSERY_LINE == (1 << 8)); /* from stm_creation_marker_t */
- assert((NURSERY_SECTION_SIZE % NURSERY_LINE) == 0);
- assert(MEDIUM_OBJECT < LARGE_OBJECT);
- assert(LARGE_OBJECT < NURSERY_SECTION_SIZE);
- nursery_ctl.used = 0;
- old_objects_pointing_to_young = list_create();
+ assert(_STM_FAST_ALLOC <= NURSERY_SIZE);
+ _stm_nursery_start = NURSERY_START;
+ _stm_nursery_end = NURSERY_END;
}
static void teardown_nursery(void)
{
- list_free(old_objects_pointing_to_young);
- nursery_ctl.initial_value_of_used = 0;
}
static inline bool _is_in_nursery(object_t *obj)
{
assert((uintptr_t)obj >= NURSERY_START);
- return (uintptr_t)obj < NURSERY_START + NURSERY_SIZE;
+ return (uintptr_t)obj < NURSERY_END;
}
bool _stm_in_nursery(object_t *obj)
@@ -66,29 +43,12 @@
return _is_in_nursery(obj);
}
+#if 0
static bool _is_young(object_t *obj)
{
return _is_in_nursery(obj); /* for now */
}
-
-static inline bool was_read_remote(char *base, object_t *obj,
- uint8_t other_transaction_read_version,
- uint8_t min_read_version_outside_nursery)
-{
- uint8_t rm = ((struct stm_read_marker_s *)
- (base + (((uintptr_t)obj) >> 4)))->rm;
-
- assert(min_read_version_outside_nursery <=
- other_transaction_read_version);
- assert(rm <= other_transaction_read_version);
-
- if (_is_in_nursery(obj)) {
- return rm == other_transaction_read_version;
- }
- else {
- return rm >= min_read_version_outside_nursery;
- }
-}
+#endif
/************************************************************/
@@ -110,8 +70,10 @@
}
}
+#if 0
static void minor_trace_if_young(object_t **pobj)
{
+ abort(); //...
/* takes a normal pointer to a thread-local pointer to an object */
object_t *obj = *pobj;
if (obj == NULL)
@@ -145,7 +107,7 @@
which has a granularity of 256 bytes.
*/
size_t size = stmcb_size_rounded_up((struct object_s *)realobj);
- uintptr_t lock_idx = (((uintptr_t)obj) >> 4) - READMARKER_START;
+ uintptr_t lock_idx = (((uintptr_t)obj) >> 4) - WRITELOCK_START;
uint8_t write_lock = write_locks[lock_idx];
object_t *nobj;
long i;
@@ -183,7 +145,7 @@
uintptr_t lastpage= (dataofs + size - 1) / 4096UL;
pages_privatize(pagenum, lastpage - pagenum + 1, false);
- lock_idx = (dataofs >> 4) - READMARKER_START;
+ lock_idx = (dataofs >> 4) - WRITELOCK_START;
assert(write_locks[lock_idx] == 0);
write_locks[lock_idx] = write_lock;
@@ -238,7 +200,7 @@
static void collect_roots_in_nursery(void)
{
- stm_thread_local_t *tl = stm_thread_locals;
+ stm_thread_local_t *tl = stm_all_thread_locals;
do {
object_t **current = tl->shadowstack;
object_t **base = tl->shadowstack_base;
@@ -246,7 +208,7 @@
minor_trace_if_young(current);
}
tl = tl->next;
- } while (tl != stm_thread_locals);
+ } while (tl != stm_all_thread_locals);
}
static void trace_and_drag_out_of_nursery(object_t *obj)
@@ -256,7 +218,7 @@
struct object_s *realobj =
(struct object_s *)REAL_ADDRESS(get_segment_base(i), obj);
- realobj->stm_flags &= ~GCFLAG_WRITE_BARRIER_CALLED;
+ realobj->stm_flags |= GCFLAG_WRITE_BARRIER;
stmcb_trace((struct object_s *)realobj, &minor_trace_if_young);
@@ -278,8 +240,9 @@
don't, it's because the same object was stored in several
segment's old_objects_pointing_to_young. It's fine to
ignore duplicates. */
- if ((obj->stm_flags & GCFLAG_WRITE_BARRIER_CALLED) == 0)
- continue;
+ abort();//...
+ //if ((obj->stm_flags & GCFLAG_WRITE_BARRIER_CALLED) == 0)
+ // continue;
/* The flag GCFLAG_WRITE_BARRIER_CALLED is going to be removed:
no live object should have this flag set after a nursery
@@ -294,13 +257,10 @@
static void reset_nursery(void)
{
+ abort();//...
/* reset the global amount-of-nursery-used-so-far */
nursery_ctl.used = nursery_ctl.initial_value_of_used;
- /* reset the write locks */
- memset(write_locks + ((NURSERY_START >> 4) - READMARKER_START),
- 0, NURSERY_SIZE >> 4);
-
long i;
for (i = 0; i < NB_SEGMENTS; i++) {
struct stm_priv_segment_info_s *other_pseg = get_priv_segment(i);
@@ -321,9 +281,10 @@
}
else if (other_pseg->pub.transaction_read_version < 0xff) {
other_pseg->pub.transaction_read_version++;
- assert(0 < other_pseg->min_read_version_outside_nursery &&
+ abort();//...
+ /*assert(0 < other_pseg->min_read_version_outside_nursery &&
other_pseg->min_read_version_outside_nursery
- < other_pseg->pub.transaction_read_version);
+ < other_pseg->pub.transaction_read_version);*/
}
else {
/* however, when the value 0xff is reached, we are stuck
@@ -338,7 +299,7 @@
if (old_end > NURSERY_START) {
char *creation_markers = REAL_ADDRESS(other_pseg->pub.segment_base,
NURSERY_START >> 8);
- assert(old_end <= NURSERY_START + NURSERY_SIZE);
+ assert(old_end <= NURSERY_END);
memset(creation_markers, 0, (old_end - NURSERY_START) >> 8);
}
else {
@@ -346,14 +307,17 @@
}
}
}
+#endif
-static void do_minor_collection(void)
+static void minor_collection(void)
{
- /* all other threads are paused in safe points during the whole
- minor collection */
+ assert(!_has_mutex());
+ abort_if_needed();
+
dprintf(("minor_collection\n"));
- assert(_has_mutex());
- assert(list_is_empty(old_objects_pointing_to_young));
+
+ abort();//...
+#if 0
/* List of what we need to do and invariants we need to preserve
-------------------------------------------------------------
@@ -390,142 +354,66 @@
reset_nursery();
pages_make_shared_again(FIRST_NURSERY_PAGE, NB_NURSERY_PAGES);
+#endif
}
-static void restore_nursery_section_end(uintptr_t prev_value)
-{
- __sync_bool_compare_and_swap(&STM_SEGMENT->v_nursery_section_end,
- prev_value,
- STM_PSEGMENT->real_nursery_section_end);
-}
-
-static void stm_minor_collection(uint64_t request_size)
-{
- /* Run a minor collection --- but only if we can't get 'request_size'
- bytes out of the nursery; if we can, no-op. */
- mutex_lock();
-
- assert(STM_PSEGMENT->safe_point == SP_RUNNING);
- STM_PSEGMENT->safe_point = SP_SAFE_POINT_CAN_COLLECT;
-
- restart:
- /* We just waited here, either from mutex_lock() or from cond_wait(),
- so we should check again if another thread did the minor
- collection itself */
- if (request_size <= NURSERY_SIZE - nursery_ctl.used)
- goto exit;
-
- if (!try_wait_for_other_safe_points(SP_SAFE_POINT_CAN_COLLECT))
- goto restart;
-
- /* now we can run our minor collection */
- do_minor_collection();
-
- exit:
- STM_PSEGMENT->safe_point = SP_RUNNING;
-
- mutex_unlock();
-}
-
void stm_collect(long level)
{
assert(level == 0);
- stm_minor_collection(-1);
+ minor_collection();
}
/************************************************************/
-#define NURSERY_ALIGN(bytes) \
- (((bytes) + NURSERY_LINE - 1) & ~(NURSERY_LINE - 1))
-static stm_char *allocate_from_nursery(uint64_t bytes)
-{
- /* may collect! */
- /* thread-safe; allocate a chunk of memory from the nursery */
- bytes = NURSERY_ALIGN(bytes);
- while (1) {
- uint64_t p = __sync_fetch_and_add(&nursery_ctl.used, bytes);
- if (LIKELY(p + bytes <= NURSERY_SIZE)) {
- return (stm_char *)(NURSERY_START + p);
- }
-
- /* nursery full! */
- stm_minor_collection(bytes);
- }
-}
-
-
-stm_char *_stm_allocate_slowpath(ssize_t size_rounded_up)
+object_t *_stm_allocate_slowpath(ssize_t size_rounded_up)
{
/* may collect! */
STM_SEGMENT->nursery_current -= size_rounded_up; /* restore correct val */
- if (_stm_collectable_safe_point())
- return (stm_char *)stm_allocate(size_rounded_up);
+ restart:
+ stm_safe_point();
- if (size_rounded_up < MEDIUM_OBJECT) {
- /* This is a small object. The current section is really full.
- Allocate the next section and initialize it with zeroes. */
- stm_char *p = allocate_from_nursery(NURSERY_SECTION_SIZE);
- STM_SEGMENT->nursery_current = p + size_rounded_up;
+ OPT_ASSERT(size_rounded_up >= 16);
+ OPT_ASSERT((size_rounded_up & 7) == 0);
+ OPT_ASSERT(size_rounded_up < _STM_FAST_ALLOC);
- /* Set v_nursery_section_end, but carefully: another thread may
- have forced it to be equal to NSE_SIGNAL. */
- uintptr_t end = (uintptr_t)p + NURSERY_SECTION_SIZE;
- uintptr_t prev_end = STM_PSEGMENT->real_nursery_section_end;
- STM_PSEGMENT->real_nursery_section_end = end;
- restore_nursery_section_end(prev_end);
-
- memset(REAL_ADDRESS(STM_SEGMENT->segment_base, p), 0,
- NURSERY_SECTION_SIZE);
-
- /* Also fill the corresponding creation markers with 0xff. */
- set_creation_markers(p, NURSERY_SECTION_SIZE,
- CM_CURRENT_TRANSACTION_IN_NURSERY);
- return p;
+ stm_char *p = STM_SEGMENT->nursery_current;
+ stm_char *end = p + size_rounded_up;
+ if ((uintptr_t)end <= NURSERY_END) {
+ STM_SEGMENT->nursery_current = end;
+ return (object_t *)p;
}
- if (size_rounded_up < LARGE_OBJECT) {
- /* A medium-sized object that doesn't fit into the current
- nursery section. Note that if by chance it does fit, then
- _stm_allocate_slowpath() is not even called. This case here
- is to prevent too much of the nursery to remain not used
- just because we tried to allocate a medium-sized object:
- doing so doesn't end the current section. */
- stm_char *p = allocate_from_nursery(size_rounded_up);
- memset(REAL_ADDRESS(STM_SEGMENT->segment_base, p), 0,
- size_rounded_up);
- set_single_creation_marker(p, CM_CURRENT_TRANSACTION_IN_NURSERY);
- return p;
- }
-
- abort();
+ minor_collection();
+ goto restart;
}
-static void align_nursery_at_transaction_start(void)
+object_t *_stm_allocate_external(ssize_t size_rounded_up)
{
- /* When the transaction starts, we must align the 'nursery_current'
- and set creation markers for the part of the section the follows.
- */
- uintptr_t c = (uintptr_t)STM_SEGMENT->nursery_current;
- c = NURSERY_ALIGN(c);
- STM_SEGMENT->nursery_current = (stm_char *)c;
-
- uint64_t size = STM_PSEGMENT->real_nursery_section_end - c;
- if (size > 0) {
- set_creation_markers((stm_char *)c, size,
- CM_CURRENT_TRANSACTION_IN_NURSERY);
- }
+ abort();//...
}
#ifdef STM_TESTS
void _stm_set_nursery_free_count(uint64_t free_count)
{
- assert(free_count == NURSERY_ALIGN(free_count));
- assert(nursery_ctl.used <= NURSERY_SIZE - free_count);
- nursery_ctl.used = NURSERY_SIZE - free_count;
- nursery_ctl.initial_value_of_used = nursery_ctl.used;
+ assert(free_count <= NURSERY_SIZE);
+ _stm_nursery_start = NURSERY_END - free_count;
+
+ long i;
+ for (i = 0; i < NB_SEGMENTS; i++) {
+ if ((uintptr_t)get_segment(i)->nursery_current < _stm_nursery_start)
+ get_segment(i)->nursery_current = (stm_char *)_stm_nursery_start;
+ }
}
#endif
+
+static void check_nursery_at_transaction_start(void)
+{
+ assert((uintptr_t)STM_SEGMENT->nursery_current == _stm_nursery_start);
+ uintptr_t i;
+ for (i = 0; i < _stm_nursery_end - _stm_nursery_start; i++)
+ assert(STM_SEGMENT->nursery_current[i] == 0);
+}
diff --git a/c7/stm/nursery.h b/c7/stm/nursery.h
--- a/c7/stm/nursery.h
+++ b/c7/stm/nursery.h
@@ -1,40 +1,8 @@
-/* special values of 'v_nursery_section_end' */
-#define NSE_SIGNAL 1
-#define NSE_SIGNAL_DONE 2
+/* '_stm_nursery_section_end' is either NURSERY_END or NSE_SIGNAL */
+#define NSE_SIGNAL _STM_NSE_SIGNAL
-#if _STM_NSE_SIGNAL != NSE_SIGNAL
-# error "adapt _STM_NSE_SIGNAL"
-#endif
-/* Rules for 'v_nursery_section_end':
+static uint32_t highest_overflow_number;
- - Its main purpose is to be read by the owning thread in stm_allocate().
-
- - The owning thread can change its value without acquiring the mutex,
- but it must do so carefully, with a compare_and_swap.
-
- - If a different thread has the mutex, it can force the field to the
- value NSE_SIGNAL or NSE_SIGNAL_DONE with a regular write. This should
- not be hidden by the compare_and_swap done by the owning thread:
- even if it occurs just before or just after a compare_and_swap,
- the end result is that the special value NSE_SIGNAL(_DONE) is still
- in the field.
-
- - When the owning thread sees NSE_SIGNAL, it must signal and wait until
- the other thread restores the value to NSE_SIGNAL_DONE. When the
- owning thread sees NSE_SIGNAL_DONE, it can replace it, again with
- compare_and_swap, with the real value.
-
- - This should in theory be a volatile field, because it can be read
- from stm_allocate() while at the same time being changed to the value
- NSE_SIGNAL by another thread. In practice, making it volatile has
- probably just a small negative impact on performance for no good reason.
-*/
-
-static void align_nursery_at_transaction_start(void);
-static void restore_nursery_section_end(uintptr_t prev_value);
-
-static inline bool was_read_remote(char *base, object_t *obj,
- uint8_t other_transaction_read_version,
- uint8_t min_read_version_outside_nursery);
+static void check_nursery_at_transaction_start(void) __attribute__((unused));
diff --git a/c7/stm/pagecopy.c b/c7/stm/pagecopy.c
--- a/c7/stm/pagecopy.c
+++ b/c7/stm/pagecopy.c
@@ -28,11 +28,13 @@
}
}
+#if 0
static void pagecopy_256(void *dest, const void *src)
{
PAGECOPY_128(dest, src );
PAGECOPY_128(dest + 128, src + 128);
}
+#endif
#if 0 /* XXX enable if detected on the cpu */
static void pagecopy_ymm8(void *dest, const void *src)
diff --git a/c7/stm/pagecopy.h b/c7/stm/pagecopy.h
--- a/c7/stm/pagecopy.h
+++ b/c7/stm/pagecopy.h
@@ -1,3 +1,2 @@
static void pagecopy(void *dest, const void *src); // 4096 bytes
-static void pagecopy_256(void *dest, const void *src); // 256 bytes
diff --git a/c7/stm/pages.c b/c7/stm/pages.c
--- a/c7/stm/pages.c
+++ b/c7/stm/pages.c
@@ -23,6 +23,7 @@
flag_page_private[pagenum + i] = SHARED_PAGE;
}
+#if 0
static void pages_make_shared_again(uintptr_t pagenum, uintptr_t count)
{
/* Same as pages_initialize_shared(), but tries hard to minimize the
@@ -42,6 +43,7 @@
pages_initialize_shared(pagenum + start, i - start);
}
}
+#endif
static void privatize_range_and_unlock(uintptr_t pagenum, uintptr_t count,
bool full)
@@ -128,102 +130,7 @@
}
}
-static void set_creation_markers(stm_char *p, uint64_t size, int newvalue)
-{
- /* Set the creation markers to 'newvalue' for all lines from 'p' to
- 'p+size'. Both p and size should be aligned to the line size: 256. */
-
- assert((((uintptr_t)p) & 255) == 0);
- assert((size & 255) == 0);
- assert(size > 0);
-
- uintptr_t cmaddr = ((uintptr_t)p) >> 8;
- LIST_APPEND(STM_PSEGMENT->creation_markers, cmaddr);
-
- char *addr = REAL_ADDRESS(STM_SEGMENT->segment_base, cmaddr);
- memset(addr, newvalue, size >> 8);
-}
-
-static uint8_t get_single_creation_marker(stm_char *p)
-{
- uintptr_t cmaddr = ((uintptr_t)p) >> 8;
- return ((stm_creation_marker_t *)cmaddr)->cm;
-}
-
-static void set_single_creation_marker(stm_char *p, int newvalue)
-{
- uintptr_t cmaddr = ((uintptr_t)p) >> 8;
- ((stm_creation_marker_t *)cmaddr)->cm = newvalue;
- LIST_APPEND(STM_PSEGMENT->creation_markers, cmaddr);
-}
-
-static void reset_all_creation_markers(void)
-{
- /* Note that the page 'NB_PAGES - 1' is not actually used. This
- ensures that the creation markers always end with some zeroes.
- We reset the markers 8 at a time, by writing null integers
- until we reach a place that is already null.
- */
- LIST_FOREACH_R(
- STM_PSEGMENT->creation_markers,
- uintptr_t /*item*/,
- ({
- TLPREFIX uint64_t *p = (TLPREFIX uint64_t *)(item & ~7);
- while (*p != 0)
- *p++ = 0;
- }));
-
- list_clear(STM_PSEGMENT->creation_markers);
-}
-
-static void reset_all_creation_markers_and_push_created_data(void)
-{
- /* This is like reset_all_creation_markers(), but additionally
- it looks for markers in non-SHARED pages, and pushes the
- corresponding data (in 256-bytes blocks) to other threads.
- */
-#if NB_SEGMENTS != 2
-# error "The logic in this function only works with two segments"
-#endif
-
- char *local_base = STM_SEGMENT->segment_base;
- long remote_num = 1 - STM_SEGMENT->segment_num;
- char *remote_base = get_segment_base(remote_num);
-
- /* this logic assumes that creation markers are in 256-bytes blocks,
- and pages are 4096 bytes, so creation markers are handled by groups
- of 16 --- which is two 8-bytes uint64_t. */
-
- LIST_FOREACH_R(
- STM_PSEGMENT->creation_markers,
- uintptr_t /*item*/,
- ({
- TLPREFIX uint64_t *p = (TLPREFIX uint64_t *)(item & ~15);
- while (p[0] != 0 || p[1] != 0) {
-
- uint64_t pagenum = ((uint64_t)p) >> 4;
- if (flag_page_private[pagenum] != SHARED_PAGE) {
- /* copying needed */
- uint64_t dataofs = ((uint64_t)p) << 8;
- stm_char *start = (stm_char *)p;
- stm_char *stop = start + 16;
- while (start < stop) {
- if (*start++ != 0) {
- pagecopy_256(remote_base + dataofs,
- local_base + dataofs);
- }
- dataofs += 256;
- }
- }
- p[0] = 0; _duck();
- p[1] = 0;
- p += 2;
- }
- }));
-
- list_clear(STM_PSEGMENT->creation_markers);
-}
-
+#if 0
static bool is_in_shared_pages(object_t *obj)
{
uintptr_t first_page = ((uintptr_t)obj) / 4096UL;
@@ -234,11 +141,11 @@
ssize_t obj_size = stmcb_size_rounded_up(
(struct object_s *)REAL_ADDRESS(stm_object_pages, obj));
- uintptr_t end_page = (((uintptr_t)obj) + obj_size + 4095) / 4096UL;
- /* that's the page *following* the last page with the object */
+ uintptr_t last_page = (((uintptr_t)obj) + obj_size - 1) / 4096UL;
- while (first_page < end_page)
+ while (first_page <= last_page)
if (flag_page_private[first_page++] != SHARED_PAGE)
return false;
return true;
}
+#endif
diff --git a/c7/stm/pages.h b/c7/stm/pages.h
--- a/c7/stm/pages.h
+++ b/c7/stm/pages.h
@@ -1,5 +1,5 @@
-enum {
+enum /* flag_page_private */ {
/* The page is not in use. Assume that each segment sees its own copy. */
FREE_PAGE=0,
@@ -12,15 +12,13 @@
/* Page is private for each segment. */
PRIVATE_PAGE,
-
-}; /* used for flag_page_private */
-
+};
static uint8_t flag_page_private[NB_PAGES];
static void _pages_privatize(uintptr_t pagenum, uintptr_t count, bool full);
static void pages_initialize_shared(uintptr_t pagenum, uintptr_t count);
-static void pages_make_shared_again(uintptr_t pagenum, uintptr_t count);
+//static void pages_make_shared_again(uintptr_t pagenum, uintptr_t count);
inline static void pages_privatize(uintptr_t pagenum, uintptr_t count,
bool full) {
@@ -32,9 +30,4 @@
_pages_privatize(pagenum, count, full);
}
-static void set_creation_markers(stm_char *p, uint64_t size, int newvalue);
-static uint8_t get_single_creation_marker(stm_char *p) __attribute__((unused));
-static void set_single_creation_marker(stm_char *p, int newvalue);
-static void reset_all_creation_markers(void);
-static void reset_all_creation_markers_and_push_created_data(void);
-static bool is_in_shared_pages(object_t *obj);
+//static bool is_in_shared_pages(object_t *obj);
diff --git a/c7/stm/setup.c b/c7/stm/setup.c
--- a/c7/stm/setup.c
+++ b/c7/stm/setup.c
@@ -5,14 +5,8 @@
void stm_setup(void)
{
-#if 0
- _stm_reset_shared_lock();
- _stm_reset_pages();
-
- inevitable_lock = 0;
-#endif
-
/* Check that some values are acceptable */
+ assert(NB_SEGMENTS <= NB_SEGMENTS_MAX);
assert(4096 <= ((uintptr_t)STM_SEGMENT));
assert((uintptr_t)STM_SEGMENT == (uintptr_t)STM_PSEGMENT);
assert(((uintptr_t)STM_PSEGMENT) + sizeof(*STM_PSEGMENT) <= 8192);
@@ -21,9 +15,6 @@
assert(READMARKER_START < READMARKER_END);
assert(READMARKER_END <= 4096UL * FIRST_OBJECT_PAGE);
assert(FIRST_OBJECT_PAGE < NB_PAGES);
- assert(CREATMARKER_START >= 8192);
- assert(2 <= FIRST_CREATMARKER_PAGE);
- assert(FIRST_CREATMARKER_PAGE <= FIRST_READMARKER_PAGE);
assert((NB_PAGES * 4096UL) >> 8 <= (FIRST_OBJECT_PAGE * 4096UL) >> 4);
assert((END_NURSERY_PAGE * 4096UL) >> 8 <=
(FIRST_READMARKER_PAGE * 4096UL));
@@ -53,10 +44,10 @@
memset(REAL_ADDRESS(segment_base, STM_PSEGMENT), 0,
sizeof(*STM_PSEGMENT));
- /* Pages in range(2, FIRST_CREATMARKER_PAGE) are never used */
- if (FIRST_CREATMARKER_PAGE > 2)
+ /* Pages in range(2, FIRST_READMARKER_PAGE) are never used */
+ if (FIRST_READMARKER_PAGE > 2)
mprotect(segment_base + 8192,
- (FIRST_CREATMARKER_PAGE - 2) * 4096UL,
+ (FIRST_READMARKER_PAGE - 2) * 4096UL,
PROT_NONE);
struct stm_priv_segment_info_s *pr = get_priv_segment(i);
@@ -64,17 +55,16 @@
pr->write_lock_num = i + 1;
pr->pub.segment_num = i;
pr->pub.segment_base = segment_base;
- pr->old_objects_pointing_to_young = list_create();
- pr->modified_objects = list_create();
- pr->creation_markers = list_create();
+ pr->overflow_objects_pointing_to_nursery = NULL;
+ pr->modified_old_objects = list_create();
+ pr->overflow_number = GCFLAG_OVERFLOW_NUMBER_bit0 * (i + 1);
+ highest_overflow_number = pr->overflow_number;
}
- /* Make the nursery pages shared. The other pages are
- shared lazily, as remap_file_pages() takes a relatively
- long time for each page. */
- pages_initialize_shared(FIRST_NURSERY_PAGE, NB_NURSERY_PAGES);
+ /* The pages are shared lazily, as remap_file_pages() takes a relatively
+ long time for each page.
- /* The read markers are initially zero, which is correct:
+ The read markers are initially zero, which is correct:
STM_SEGMENT->transaction_read_version never contains zero,
so a null read marker means "not read" whatever the
current transaction_read_version is.
@@ -96,9 +86,8 @@
long i;
for (i = 0; i < NB_SEGMENTS; i++) {
struct stm_priv_segment_info_s *pr = get_priv_segment(i);
- list_free(pr->old_objects_pointing_to_young);
- list_free(pr->modified_objects);
- list_free(pr->creation_markers);
+ assert(pr->overflow_objects_pointing_to_nursery == NULL);
+ list_free(pr->modified_old_objects);
}
munmap(stm_object_pages, TOTAL_MEMORY);
@@ -115,15 +104,15 @@
void stm_register_thread_local(stm_thread_local_t *tl)
{
int num;
- if (stm_thread_locals == NULL) {
- stm_thread_locals = tl->next = tl->prev = tl;
+ if (stm_all_thread_locals == NULL) {
+ stm_all_thread_locals = tl->next = tl->prev = tl;
num = 0;
}
else {
- tl->next = stm_thread_locals;
- tl->prev = stm_thread_locals->prev;
- stm_thread_locals->prev->next = tl;
- stm_thread_locals->prev = tl;
+ tl->next = stm_all_thread_locals;
+ tl->prev = stm_all_thread_locals->prev;
+ stm_all_thread_locals->prev->next = tl;
+ stm_all_thread_locals->prev = tl;
num = tl->prev->associated_segment_num + 1;
}
@@ -137,10 +126,11 @@
void stm_unregister_thread_local(stm_thread_local_t *tl)
{
- if (tl == stm_thread_locals) {
- stm_thread_locals = stm_thread_locals->next;
- if (tl == stm_thread_locals) {
- stm_thread_locals = NULL;
+ assert(tl->next != NULL);
+ if (tl == stm_all_thread_locals) {
+ stm_all_thread_locals = stm_all_thread_locals->next;
+ if (tl == stm_all_thread_locals) {
+ stm_all_thread_locals = NULL;
return;
}
}
diff --git a/c7/stm/sync.c b/c7/stm/sync.c
--- a/c7/stm/sync.c
+++ b/c7/stm/sync.c
@@ -191,7 +191,6 @@
assert(STM_PSEGMENT->safe_point == SP_SAFE_POINT_CAN_COLLECT);
STM_PSEGMENT->safe_point = SP_RUNNING;
- restore_nursery_section_end(NSE_SIGNAL_DONE);
if (STM_PSEGMENT->transaction_state == TS_MUST_ABORT)
stm_abort_transaction();
}
@@ -219,6 +218,8 @@
try_wait_for_other_safe_points() while another is currently blocked
in the cond_wait() in this same function.
*/
+ abort();//...
+#if 0
assert(_has_mutex());
assert(STM_PSEGMENT->safe_point == SP_SAFE_POINT_CAN_COLLECT);
@@ -265,36 +266,28 @@
cond_broadcast(); /* to wake up the other threads, but later,
when they get the mutex again */
return true;
+#endif
}
-bool _stm_collectable_safe_point(void)
+void _stm_collectable_safe_point(void)
{
- bool any_operation = false;
- restart:;
- switch (STM_SEGMENT->v_nursery_section_end) {
+ /* If nursery_section_end was set to NSE_SIGNAL by another thread,
+ we end up here as soon as we try to call stm_allocate() or do
+ a call to stm_safe_point().
+ See try_wait_for_other_safe_points() for details.
+ */
+ mutex_lock();
+ assert(STM_PSEGMENT->safe_point == SP_RUNNING);
- case NSE_SIGNAL:
- /* If nursery_section_end was set to NSE_SIGNAL by another thread,
- we end up here as soon as we try to call stm_allocate().
- See try_wait_for_other_safe_points() for details. */
- mutex_lock();
- assert(STM_PSEGMENT->safe_point == SP_RUNNING);
+ if (_stm_nursery_end == NSE_SIGNAL) {
STM_PSEGMENT->safe_point = SP_SAFE_POINT_CAN_COLLECT;
+
cond_broadcast();
- cond_wait();
+
+ do { cond_wait(); } while (_stm_nursery_end == NSE_SIGNAL);
+
STM_PSEGMENT->safe_point = SP_RUNNING;
- mutex_unlock();
+ }
- /* Once the sync point is done, retry. */
- any_operation = true;
- goto restart;
-
- case NSE_SIGNAL_DONE:
- restore_nursery_section_end(NSE_SIGNAL_DONE);
- any_operation = true;
- break;
-
- default:;
- }
- return any_operation;
+ mutex_unlock();
}
diff --git a/c7/stmgc.h b/c7/stmgc.h
--- a/c7/stmgc.h
+++ b/c7/stmgc.h
@@ -12,21 +12,12 @@
#include <stdbool.h>
#include <assert.h>
#include <limits.h>
-#include <endian.h>
#include <unistd.h>
#if LONG_MAX == 2147483647
# error "Requires a 64-bit environment"
#endif
-#if BYTE_ORDER == 1234
-# define LENDIAN 1 // little endian
-#elif BYTE_ORDER == 4321
-# define LENDIAN 0 // big endian
-#else
-# error "Unsupported endianness"
-#endif
-
#define TLPREFIX __attribute__((address_space(256)))
@@ -42,27 +33,16 @@
We assume that objects are at least 16 bytes long, and use
their address divided by 16. The read marker is equal to
'STM_SEGMENT->transaction_read_version' if and only if the
- object was read in the current transaction. */
+ object was read in the current transaction. The nurseries
+ also have corresponding read markers, but they are never used. */
uint8_t rm;
};
-struct stm_creation_marker_s {
- /* In addition to read markers, every "line" of 256 bytes has one
- extra byte, the creation marker, located at the address divided
- by 256. The creation marker is either non-zero if all objects in
- this line come have been allocated by the current transaction,
- or 0x00 if none of them have been. Lines cannot contain a
- mixture of both. Non-zero values are 0xff if in the nursery,
- and 0x01 if outside the nursery. */
- uint8_t cm;
-};
-
struct stm_segment_info_s {
uint8_t transaction_read_version;
int segment_num;
char *segment_base;
stm_char *nursery_current;
- uintptr_t v_nursery_section_end; /* see nursery.h */
struct stm_thread_local_s *running_thread;
stm_jmpbuf_t *jmpbuf_ptr;
};
@@ -79,10 +59,13 @@
/* this should use llvm's coldcc calling convention,
but it's not exposed to C code so far */
void _stm_write_slowpath(object_t *);
-stm_char *_stm_allocate_slowpath(ssize_t);
+object_t *_stm_allocate_slowpath(ssize_t);
+object_t *_stm_allocate_external(ssize_t);
void _stm_become_inevitable(char*);
void _stm_start_transaction(stm_thread_local_t *, stm_jmpbuf_t *);
-bool _stm_collectable_safe_point(void);
+void _stm_collectable_safe_point(void);
+
+extern uintptr_t _stm_nursery_end;
#ifdef STM_TESTS
bool _stm_was_read(object_t *obj);
@@ -98,12 +81,13 @@
void _stm_start_safe_point(void);
void _stm_stop_safe_point(void);
void _stm_set_nursery_free_count(uint64_t free_count);
-object_t *_stm_enum_old_objects_pointing_to_young(void);
-object_t *_stm_enum_modified_objects(void);
+object_t *_stm_enum_overflow_objects_pointing_to_nursery(void);
+object_t *_stm_enum_modified_old_objects(void);
#endif
-#define _STM_GCFLAG_WRITE_BARRIER_CALLED 0x80
-#define _STM_NSE_SIGNAL 1
+#define _STM_GCFLAG_WRITE_BARRIER 0x01
+#define _STM_NSE_SIGNAL 0
+#define _STM_FAST_ALLOC (66*1024)
#define STM_FLAGS_PREBUILT 0
@@ -133,7 +117,7 @@
*/
struct object_s {
- uint8_t stm_flags; /* reserved for the STM library */
+ uint32_t stm_flags; /* reserved for the STM library */
};
/* The read barrier must be called whenever the object 'obj' is read.
@@ -142,33 +126,24 @@
transaction commit, nothing that can potentially collect or do a safe
point (like stm_write() on a different object). Also, if we might
have finished the transaction and started the next one, then
- stm_read() needs to be called again.
+ stm_read() needs to be called again. It can be omitted if
+ stm_write() is called, or immediately after getting the object from
+ stm_allocate(), as long as the rules above are respected.
*/
static inline void stm_read(object_t *obj)
{
-#if 0 /* very costly check */
- assert(((stm_read_marker_t *)(((uintptr_t)obj) >> 4))->rm
- <= STM_SEGMENT->transaction_read_version);
-#endif
((stm_read_marker_t *)(((uintptr_t)obj) >> 4))->rm =
STM_SEGMENT->transaction_read_version;
}
/* The write barrier must be called *before* doing any change to the
object 'obj'. If we might have finished the transaction and started
- the next one, then stm_write() needs to be called again.
- If stm_write() is called, it is not necessary to also call stm_read()
- on the same object.
+ the next one, then stm_write() needs to be called again. It is not
+ necessary to call it immediately after stm_allocate().
*/
static inline void stm_write(object_t *obj)
{
- /* this is:
- 'if (cm < 0x80 && (stm_flags & WRITE_BARRIER_CALLED) == 0)'
- where 'cm' can be 0 (not created in current transaction)
- or 0xff (created in current transaction)
- or 0x01 (same, but outside the nursery) */
- if (UNLIKELY(!((((stm_creation_marker_t *)(((uintptr_t)obj) >> 8))->cm |
- obj->stm_flags) & _STM_GCFLAG_WRITE_BARRIER_CALLED)))
+ if (UNLIKELY((obj->stm_flags & _STM_GCFLAG_WRITE_BARRIER) != 0))
_stm_write_slowpath(obj);
}
@@ -190,11 +165,15 @@
OPT_ASSERT(size_rounded_up >= 16);
OPT_ASSERT((size_rounded_up & 7) == 0);
+ if (UNLIKELY(size_rounded_up >= _STM_FAST_ALLOC))
+ return _stm_allocate_external(size_rounded_up);
+
stm_char *p = STM_SEGMENT->nursery_current;
stm_char *end = p + size_rounded_up;
STM_SEGMENT->nursery_current = end;
- if (UNLIKELY((uintptr_t)end > STM_SEGMENT->v_nursery_section_end))
- p = _stm_allocate_slowpath(size_rounded_up);
+ if (UNLIKELY((uintptr_t)end > _stm_nursery_end))
+ return _stm_allocate_slowpath(size_rounded_up);
+
return (object_t *)p;
}
@@ -250,7 +229,7 @@
/* Forces a safe-point if needed. Normally not needed: this is
automatic if you call stm_allocate(). */
static inline void stm_safe_point(void) {
- if (STM_SEGMENT->v_nursery_section_end == _STM_NSE_SIGNAL)
+ if (_stm_nursery_end == _STM_NSE_SIGNAL)
_stm_collectable_safe_point();
}
diff --git a/c7/test/support.py b/c7/test/support.py
--- a/c7/test/support.py
+++ b/c7/test/support.py
@@ -53,7 +53,6 @@
bool _checked_stm_write(object_t *obj);
bool _stm_was_read(object_t *obj);
bool _stm_was_written(object_t *obj);
-uint8_t _stm_creation_marker(object_t *obj);
bool _stm_in_nursery(object_t *obj);
char *_stm_real_address(object_t *obj);
object_t *_stm_segment_address(char *ptr);
@@ -77,8 +76,8 @@
ssize_t stmcb_size_rounded_up(struct object_s *obj);
-object_t *_stm_enum_old_objects_pointing_to_young(void);
-object_t *_stm_enum_modified_objects(void);
+object_t *_stm_enum_overflow_objects_pointing_to_nursery(void);
+object_t *_stm_enum_modified_old_objects(void);
void stm_collect(long level);
""")
@@ -248,7 +247,7 @@
('STM_DEBUGPRINT', '1')],
undef_macros=['NDEBUG'],
include_dirs=[parent_dir],
- extra_compile_args=['-g', '-O0', '-Werror'],
+ extra_compile_args=['-g', '-O0', '-Werror', '-ferror-limit=1'],
force_generic_engine=True)
More information about the pypy-commit
mailing list