[pypy-commit] stmgc default: WIP
Raemi
noreply at buildbot.pypy.org
Wed Sep 3 10:02:26 CEST 2014
Author: Remi Meier <remi.meier at inf.ethz.ch>
Branch:
Changeset: r1332:e250803b835e
Date: 2014-09-03 09:54 +0200
http://bitbucket.org/pypy/stmgc/changeset/e250803b835e/
Log: WIP
diff too long, truncating to 2000 out of 2893 lines
diff --git a/.hgignore b/.hgignore
--- a/.hgignore
+++ b/.hgignore
@@ -8,3 +8,4 @@
*/__pycache__
*.out.*
*/\#*\#
+*/.\#*
diff --git a/c8/stm/atomic.h b/c8/stm/atomic.h
new file mode 100644
--- /dev/null
+++ b/c8/stm/atomic.h
@@ -0,0 +1,47 @@
+#ifndef _STM_ATOMIC_H
+#define _STM_ATOMIC_H
+
+/* spin_loop() corresponds to the PAUSE instruction on x86. On
+ other architectures, we generate no instruction (but still need
+ the compiler barrier); if on another architecture you find the
+ corresponding instruction, feel free to add it here.
+*/
+
+/* write_fence() is a function that inserts a "write fence". The
+ goal is to make sure that past writes are really pushed to memory
+ before the future writes. We assume that the corresponding "read
+ fence" effect is done automatically by a corresponding
+ __sync_bool_compare_and_swap().
+
+ On x86, this is done automatically by the CPU; we only need a
+ compiler barrier (asm("memory")).
+
+ On other architectures, we use __sync_synchronize() as a general
+ fall-back, but we might have more efficient alternative on some other
+ platforms too.
+*/
+
+
+#if defined(__i386__) || defined(__amd64__)
+
+# define HAVE_FULL_EXCHANGE_INSN
+ static inline void spin_loop(void) { asm("pause" : : : "memory"); }
+ static inline void write_fence(void) { asm("" : : : "memory"); }
+
+#else
+
+ static inline void spin_loop(void) { asm("" : : : "memory"); }
+ static inline void write_fence(void) { __sync_synchronize(); }
+
+#endif
+
+
+#define spinlock_acquire(lock) \
+ do { if (LIKELY(__sync_lock_test_and_set(&(lock), 1) == 0)) break; \
+ spin_loop(); } while (1)
+#define spinlock_release(lock) \
+ do { assert((lock) == 1); \
+ __sync_lock_release(&(lock)); } while (0)
+
+
+#endif /* _STM_ATOMIC_H */
diff --git a/c8/stm/core.c b/c8/stm/core.c
new file mode 100644
--- /dev/null
+++ b/c8/stm/core.c
@@ -0,0 +1,101 @@
+#ifndef _STM_CORE_H_
+# error "must be compiled via stmgc.c"
+#endif
+
+
+void _stm_write_slowpath(object_t *obj)
+{
+
+}
+
+static void reset_transaction_read_version(void)
+{
+ /* force-reset all read markers to 0 */
+
+ char *readmarkers = REAL_ADDRESS(STM_SEGMENT->segment_base,
+ FIRST_READMARKER_PAGE * 4096UL);
+ dprintf(("reset_transaction_read_version: %p %ld\n", readmarkers,
+ (long)(NB_READMARKER_PAGES * 4096UL)));
+ if (mmap(readmarkers, NB_READMARKER_PAGES * 4096UL,
+ PROT_READ | PROT_WRITE,
+ MAP_FIXED | MAP_PAGES_FLAGS, -1, 0) != readmarkers) {
+ /* fall-back */
+#if STM_TESTS
+ stm_fatalerror("reset_transaction_read_version: %m");
+#endif
+ memset(readmarkers, 0, NB_READMARKER_PAGES * 4096UL);
+ }
+ STM_SEGMENT->transaction_read_version = 1;
+}
+
+
+static void _stm_start_transaction(stm_thread_local_t *tl, bool inevitable)
+{
+ assert(!_stm_in_transaction(tl));
+
+ retry:
+
+ if (!acquire_thread_segment(tl))
+ goto retry;
+ /* GS invalid before this point! */
+
+#ifndef NDEBUG
+ STM_PSEGMENT->running_pthread = pthread_self();
+#endif
+
+ dprintf(("start_transaction\n"));
+
+ s_mutex_unlock();
+
+ uint8_t old_rv = STM_SEGMENT->transaction_read_version;
+ STM_SEGMENT->transaction_read_version = old_rv + 1;
+ if (UNLIKELY(old_rv == 0xff)) {
+ reset_transaction_read_version();
+ }
+
+ assert(list_is_empty(STM_PSEGMENT->modified_old_objects));
+ check_nursery_at_transaction_start();
+}
+
+long stm_start_transaction(stm_thread_local_t *tl)
+{
+ s_mutex_lock();
+#ifdef STM_NO_AUTOMATIC_SETJMP
+ long repeat_count = 0; /* test/support.py */
+#else
+ long repeat_count = stm_rewind_jmp_setjmp(tl);
+#endif
+ _stm_start_transaction(tl, false);
+ return repeat_count;
+}
+
+
+/************************************************************/
+
+static void _finish_transaction()
+{
+ stm_thread_local_t *tl = STM_SEGMENT->running_thread;
+ release_thread_segment(tl);
+ /* cannot access STM_SEGMENT or STM_PSEGMENT from here ! */
+}
+
+void stm_commit_transaction(void)
+{
+ assert(!_has_mutex());
+ assert(STM_PSEGMENT->running_pthread == pthread_self());
+
+ minor_collection(1);
+ abort();
+
+ s_mutex_lock();
+
+ assert(STM_SEGMENT->nursery_end == NURSERY_END);
+ stm_rewind_jmp_forget(STM_SEGMENT->running_thread);
+
+
+ /* done */
+ _finish_transaction();
+ /* cannot access STM_SEGMENT or STM_PSEGMENT from here ! */
+
+ s_mutex_unlock();
+}
diff --git a/c8/stm/core.h b/c8/stm/core.h
new file mode 100644
--- /dev/null
+++ b/c8/stm/core.h
@@ -0,0 +1,97 @@
+#define _STM_CORE_H_
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <errno.h>
+#include <pthread.h>
+
+/************************************************************/
+
+#ifndef STM_GC_NURSERY
+# define STM_GC_NURSERY 4096 // 4MB
+#endif
+
+
+#define NB_PAGES (2500*256) // 2500MB
+#define NB_SEGMENTS STM_NB_SEGMENTS
+#define NB_SEGMENTS_MAX 240 /* don't increase NB_SEGMENTS past this */
+#define MAP_PAGES_FLAGS (MAP_SHARED | MAP_ANONYMOUS | MAP_NORESERVE)
+#define NB_NURSERY_PAGES (STM_GC_NURSERY/4)
+
+#define TOTAL_MEMORY (NB_PAGES * 4096UL * (1 + NB_SEGMENTS))
+#define READMARKER_END ((NB_PAGES * 4096UL) >> 4)
+#define FIRST_OBJECT_PAGE ((READMARKER_END + 4095) / 4096UL)
+#define FIRST_NURSERY_PAGE FIRST_OBJECT_PAGE
+#define END_NURSERY_PAGE (FIRST_NURSERY_PAGE + NB_NURSERY_PAGES)
+
+#define READMARKER_START ((FIRST_OBJECT_PAGE * 4096UL) >> 4)
+#define FIRST_READMARKER_PAGE (READMARKER_START / 4096UL)
+#define OLD_RM_START ((END_NURSERY_PAGE * 4096UL) >> 4)
+#define FIRST_OLD_RM_PAGE (OLD_RM_START / 4096UL)
+#define NB_READMARKER_PAGES (FIRST_OBJECT_PAGE - FIRST_READMARKER_PAGE)
+
+
+#define USE_REMAP_FILE_PAGES 1
+
+enum /* stm_flags */ {
+ GCFLAG_WRITE_BARRIER = _STM_GCFLAG_WRITE_BARRIER,
+};
+
+
+/************************************************************/
+
+
+#define STM_PSEGMENT ((stm_priv_segment_info_t *)STM_SEGMENT)
+
+typedef TLPREFIX struct stm_priv_segment_info_s stm_priv_segment_info_t;
+
+struct stm_priv_segment_info_s {
+ struct stm_segment_info_s pub;
+
+ struct list_s *modified_old_objects;
+
+ /* For debugging */
+#ifndef NDEBUG
+ pthread_t running_pthread;
+#endif
+};
+
+
+static char *stm_object_pages;
+static int stm_object_pages_fd;
+static stm_thread_local_t *stm_all_thread_locals = NULL;
+
+
+#define REAL_ADDRESS(segment_base, src) ((segment_base) + (uintptr_t)(src))
+
+
+static inline char *get_segment_base(long segment_num) {
+ return stm_object_pages + segment_num * (NB_PAGES * 4096UL);
+}
+
+static inline
+struct stm_segment_info_s *get_segment(long segment_num) {
+ return (struct stm_segment_info_s *)REAL_ADDRESS(
+ get_segment_base(segment_num), STM_PSEGMENT);
+}
+
+static inline
+struct stm_priv_segment_info_s *get_priv_segment(long segment_num) {
+ return (struct stm_priv_segment_info_s *)REAL_ADDRESS(
+ get_segment_base(segment_num), STM_PSEGMENT);
+}
+
+static bool _is_tl_registered(stm_thread_local_t *tl);
+static bool _seems_to_be_running_transaction(void);
+
+
+static inline void _duck(void) {
+ /* put a call to _duck() between two instructions that set 0 into
+ a %gs-prefixed address and that may otherwise be replaced with
+ llvm.memset --- it fails later because of the prefix...
+ This is not needed any more after applying the patch
+ llvmfix/no-memset-creation-with-addrspace.diff. */
+ asm("/* workaround for llvm bug */");
+}
diff --git a/c8/stm/fprintcolor.c b/c8/stm/fprintcolor.c
new file mode 100644
--- /dev/null
+++ b/c8/stm/fprintcolor.c
@@ -0,0 +1,48 @@
+/* ------------------------------------------------------------ */
+#ifdef STM_DEBUGPRINT
+/* ------------------------------------------------------------ */
+
+
+static int threadcolor_printf(const char *format, ...)
+{
+ char buffer[2048];
+ va_list ap;
+ int result;
+ int size = (int)sprintf(buffer, "\033[%dm[%d,%lx] ", dprintfcolor(),
+ (int)getpid(), (long)pthread_self());
+ assert(size >= 0);
+
+ va_start(ap, format);
+ result = vsnprintf(buffer + size, 2000, format, ap);
+ assert(result >= 0);
+ va_end(ap);
+
+ strcpy(buffer + size + result, "\033[0m");
+ fputs(buffer, stderr);
+
+ return result;
+}
+
+
+/* ------------------------------------------------------------ */
+#endif
+/* ------------------------------------------------------------ */
+
+
+static void stm_fatalerror(const char *format, ...)
+{
+ va_list ap;
+
+#ifdef STM_DEBUGPRINT
+ dprintf(("STM Subsystem: Fatal Error\n"));
+#else
+ fprintf(stderr, "STM Subsystem: Fatal Error\n");
+#endif
+
+ va_start(ap, format);
+ vfprintf(stderr, format, ap);
+ fprintf(stderr, "\n");
+ va_end(ap);
+
+ abort();
+}
diff --git a/c8/stm/fprintcolor.h b/c8/stm/fprintcolor.h
new file mode 100644
--- /dev/null
+++ b/c8/stm/fprintcolor.h
@@ -0,0 +1,41 @@
+/* ------------------------------------------------------------ */
+#ifdef STM_DEBUGPRINT
+/* ------------------------------------------------------------ */
+
+
+#include <stdarg.h>
+
+
+#define dprintf(args) threadcolor_printf args
+static inline int dprintfcolor(void)
+{
+ return 31 + (STM_SEGMENT->segment_num + 5) % 6;
+}
+
+static int threadcolor_printf(const char *format, ...)
+ __attribute__((format (printf, 1, 2)));
+
+#ifdef STM_TESTS
+# define dprintf_test(args) dprintf(args)
+#else
+# define dprintf_test(args) do { } while(0)
+#endif
+
+
+/* ------------------------------------------------------------ */
+#else
+/* ------------------------------------------------------------ */
+
+
+#define dprintf(args) do { } while(0)
+#define dprintf_test(args) do { } while(0)
+#define dprintfcolor() 0
+
+
+/* ------------------------------------------------------------ */
+#endif
+/* ------------------------------------------------------------ */
+
+
+static void stm_fatalerror(const char *format, ...)
+ __attribute__((format (printf, 1, 2), noreturn));
diff --git a/c8/stm/list.c b/c8/stm/list.c
new file mode 100644
--- /dev/null
+++ b/c8/stm/list.c
@@ -0,0 +1,180 @@
+#ifndef _STM_CORE_H_
+# error "must be compiled via stmgc.c"
+#endif
+
+
+#define LIST_SETSIZE(n) (sizeof(struct list_s) + LIST_ITEMSSIZE(n))
+#define LIST_ITEMSSIZE(n) ((n) * sizeof(uintptr_t))
+#define LIST_OVERCNT(n) (33 + ((((n) / 2) * 3) | 1))
+
+static struct list_s *list_create(void)
+{
+ uintptr_t initial_allocation = 32;
+ struct list_s *lst = malloc(LIST_SETSIZE(initial_allocation));
+ if (lst == NULL)
+ stm_fatalerror("out of memory in list_create"); /* XXX */
+
+ lst->count = 0;
+ lst->last_allocated = initial_allocation - 1;
+ return lst;
+}
+
+static struct list_s *_list_grow(struct list_s *lst, uintptr_t nalloc)
+{
+ nalloc = LIST_OVERCNT(nalloc);
+ lst = realloc(lst, LIST_SETSIZE(nalloc));
+ if (lst == NULL)
+ stm_fatalerror("out of memory in _list_grow"); /* XXX */
+
+ lst->last_allocated = nalloc - 1;
+ return lst;
+}
+
+
+/************************************************************/
+
+static void _tree_clear_node(wlog_node_t *node)
+{
+ memset(node, 0, sizeof(wlog_node_t));
+}
+
+static void tree_clear(struct tree_s *tree)
+{
+ if (tree->raw_current != tree->raw_start) {
+ _tree_clear_node(&tree->toplevel);
+ tree->raw_current = tree->raw_start;
+ }
+}
+
+static struct tree_s *tree_create(void)
+{
+ return (struct tree_s *)calloc(1, sizeof(struct tree_s));
+}
+
+static void tree_free(struct tree_s *tree)
+{
+ free(tree->raw_start);
+ assert(memset(tree, 0xDD, sizeof(struct tree_s)));
+ free(tree);
+}
+
+static void _tree_compress(struct tree_s *tree)
+{
+ wlog_t *item;
+ struct tree_s tree_copy;
+ memset(&tree_copy, 0, sizeof(struct tree_s));
+
+ TREE_LOOP_FORWARD(*tree, item) {
+ tree_insert(&tree_copy, item->addr, item->val);
+
+ } TREE_LOOP_END;
+
+ free(tree->raw_start);
+ *tree = tree_copy;
+}
+
+static wlog_t *_tree_find(char *entry, uintptr_t addr)
+{
+ uintptr_t key = TREE_HASH(addr);
+ while (((long)entry) & 1) {
+ /* points to a further level */
+ key >>= TREE_BITS;
+ entry = *(char **)((entry - 1) + (key & TREE_MASK));
+ }
+ return (wlog_t *)entry; /* may be NULL */
+}
+
+static void _tree_grow(struct tree_s *tree, long extra)
+{
+ struct tree_s newtree;
+ wlog_t *item;
+ long alloc = tree->raw_end - tree->raw_start;
+ long newalloc = (alloc + extra + (alloc >> 2) + 31) & ~15;
+ //fprintf(stderr, "growth: %ld\n", newalloc);
+ char *newitems = malloc(newalloc);
+ if (newitems == NULL) {
+ stm_fatalerror("out of memory!"); /* XXX */
+ }
+ newtree.raw_start = newitems;
+ newtree.raw_current = newitems;
+ newtree.raw_end = newitems + newalloc;
+ _tree_clear_node(&newtree.toplevel);
+ TREE_LOOP_FORWARD(*tree, item)
+ {
+ tree_insert(&newtree, item->addr, item->val);
+ } TREE_LOOP_END;
+ free(tree->raw_start);
+ *tree = newtree;
+}
+
+static char *_tree_grab(struct tree_s *tree, long size)
+{
+ char *result;
+ result = tree->raw_current;
+ tree->raw_current += size;
+ if (tree->raw_current > tree->raw_end) {
+ _tree_grow(tree, size);
+ return NULL;
+ }
+ return result;
+}
+
+static void tree_insert(struct tree_s *tree, uintptr_t addr, uintptr_t val)
+{
+ assert(addr != 0); /* the NULL key is reserved */
+ retry:;
+ wlog_t *wlog;
+ uintptr_t key = TREE_HASH(addr);
+ int shift = 0;
+ char *p = (char *)(tree->toplevel.items);
+ char *entry;
+ while (1) {
+ assert(shift < TREE_DEPTH_MAX * TREE_BITS);
+ p += (key >> shift) & TREE_MASK;
+ shift += TREE_BITS;
+ entry = *(char **)p;
+ if (entry == NULL)
+ break;
+ else if (((long)entry) & 1) {
+ /* points to a further level */
+ p = entry - 1;
+ }
+ else {
+ wlog_t *wlog1 = (wlog_t *)entry;
+ if (wlog1->addr == 0) {
+ /* reuse the deleted entry and that's it */
+ wlog1->addr = addr;
+ wlog1->val = val;
+ return;
+ }
+ /* the key must not already be present */
+ assert(wlog1->addr != addr);
+ /* collision: there is already a different wlog here */
+ wlog_node_t *node = (wlog_node_t *)
+ _tree_grab(tree, sizeof(wlog_node_t));
+ if (node == NULL) goto retry;
+ _tree_clear_node(node);
+ uintptr_t key1 = TREE_HASH(wlog1->addr);
+ char *p1 = (char *)(node->items);
+ *(wlog_t **)(p1 + ((key1 >> shift) & TREE_MASK)) = wlog1;
+ *(char **)p = ((char *)node) + 1;
+ p = p1;
+ }
+ }
+ wlog = (wlog_t *)_tree_grab(tree, sizeof(wlog_t));
+ if (wlog == NULL) goto retry;
+ wlog->addr = addr;
+ wlog->val = val;
+ *(char **)p = (char *)wlog;
+}
+
+static bool tree_delete_item(struct tree_s *tree, uintptr_t addr)
+{
+ wlog_t *entry;
+ TREE_FIND(*tree, addr, entry, goto missing);
+ entry->addr = 0;
+ return true;
+
+ missing:
+ return false;
+}
diff --git a/c8/stm/list.h b/c8/stm/list.h
new file mode 100644
--- /dev/null
+++ b/c8/stm/list.h
@@ -0,0 +1,217 @@
+#include <stdlib.h>
+#include <stdbool.h>
+
+/************************************************************/
+
+struct list_s {
+ uintptr_t count;
+ uintptr_t last_allocated;
+ uintptr_t items[];
+};
+
+static struct list_s *list_create(void) __attribute__((unused));
+
+static inline void list_free(struct list_s *lst)
+{
+ free(lst);
+}
+
+#define LIST_CREATE(lst) ((lst) = list_create())
+#define LIST_FREE(lst) (list_free(lst), (lst) = NULL)
+
+
+static struct list_s *_list_grow(struct list_s *, uintptr_t);
+
+static inline struct list_s *list_append(struct list_s *lst, uintptr_t item)
+{
+ uintptr_t index = lst->count++;
+ if (UNLIKELY(index > lst->last_allocated))
+ lst = _list_grow(lst, index);
+ lst->items[index] = item;
+ return lst;
+}
+
+#define LIST_APPEND(lst, e) ((lst) = list_append((lst), (uintptr_t)(e)))
+
+static inline struct list_s *list_append2(struct list_s *lst,
+ uintptr_t item0, uintptr_t item1)
+{
+ uintptr_t index = lst->count;
+ lst->count += 2;
+ if (UNLIKELY(index >= lst->last_allocated))
+ lst = _list_grow(lst, index + 1);
+ lst->items[index + 0] = item0;
+ lst->items[index + 1] = item1;
+ return lst;
+}
+
+
+static inline void list_clear(struct list_s *lst)
+{
+ lst->count = 0;
+}
+
+static inline bool list_is_empty(struct list_s *lst)
+{
+ return (lst->count == 0);
+}
+
+static inline uintptr_t list_count(struct list_s *lst)
+{
+ return lst->count;
+}
+
+static inline uintptr_t list_pop_item(struct list_s *lst)
+{
+ assert(lst->count > 0);
+ return lst->items[--lst->count];
+}
+
+static inline uintptr_t list_item(struct list_s *lst, uintptr_t index)
+{
+ return lst->items[index];
+}
+
+static inline void list_set_item(struct list_s *lst, uintptr_t index,
+ uintptr_t newitem)
+{
+ lst->items[index] = newitem;
+}
+
+static inline uintptr_t *list_ptr_to_item(struct list_s *lst, uintptr_t index)
+{
+ return &lst->items[index];
+}
+
+#define LIST_FOREACH_R(lst, TYPE, CODE) \
+ do { \
+ struct list_s *_lst = (lst); \
+ uintptr_t _i; \
+ for (_i = _lst->count; _i--; ) { \
+ TYPE item = (TYPE)_lst->items[_i]; \
+ CODE; \
+ } \
+ } while (0)
+
+/************************************************************/
+
+/* The tree_xx functions are, like the name hints, implemented as a tree,
+ supporting very high performance in TREE_FIND in the common case where
+ there are no or few elements in the tree, but scaling correctly
+ if the number of items becomes large (logarithmically, rather
+ than almost-constant-time with hash maps, but with low constants).
+ The value 0 cannot be used as a key.
+*/
+
+#define TREE_BITS 4
+#define TREE_ARITY (1 << TREE_BITS)
+
+#define TREE_DEPTH_MAX ((sizeof(void*)*8 + TREE_BITS-1) / TREE_BITS)
+/* sizeof(void*)*8 = total number of bits
+ (x + TREE_BITS-1) / TREE_BITS = divide by TREE_BITS, rounding up
+*/
+
+#define TREE_HASH(key) ((key) ^ ((key) << 4))
+#define TREE_MASK ((TREE_ARITY - 1) * sizeof(void*))
+
+typedef struct {
+ uintptr_t addr;
+ uintptr_t val;
+} wlog_t;
+
+typedef struct {
+ char *items[TREE_ARITY];
+} wlog_node_t;
+
+struct tree_s {
+ char *raw_start, *raw_current, *raw_end;
+ wlog_node_t toplevel;
+};
+
+static struct tree_s *tree_create(void) __attribute__((unused));
+static void tree_free(struct tree_s *tree) __attribute__((unused));
+static void tree_clear(struct tree_s *tree) __attribute__((unused));
+//static inline void tree_delete_not_used_any_more(struct tree_s *tree)...
+
+static inline bool tree_is_cleared(struct tree_s *tree) {
+ return tree->raw_current == tree->raw_start;
+}
+
+#define _TREE_LOOP(tree, item, INITIAL, _PLUS_) \
+{ \
+ struct { char **next; char **end; } _stack[TREE_DEPTH_MAX], *_stackp; \
+ char **_next, **_end, *_entry; \
+ long _deleted_factor = 0; \
+ struct tree_s *_tree = &(tree); \
+ /* initialization */ \
+ _stackp = _stack; /* empty stack */ \
+ _next = _tree->toplevel.items + INITIAL; \
+ _end = _next _PLUS_ TREE_ARITY; \
+ /* loop */ \
+ while (1) \
+ { \
+ if (_next == _end) \
+ { \
+ if (_stackp == _stack) \
+ break; /* done */ \
+ /* finished with this level, go to the next one */ \
+ _stackp--; \
+ _next = _stackp->next; \
+ _end = _stackp->end; \
+ continue; \
+ } \
+ _entry = *_next; \
+ _next = _next _PLUS_ 1; \
+ if (_entry == NULL) /* empty entry */ \
+ continue; \
+ if (((long)_entry) & 1) \
+ { /* points to a further level: enter it */ \
+ assert(_stackp - _stack < TREE_DEPTH_MAX); \
+ _stackp->next = _next; \
+ _stackp->end = _end; \
+ _stackp++; \
+ _next = ((wlog_node_t *)(_entry - 1))->items + INITIAL; \
+ _end = _next _PLUS_ TREE_ARITY; \
+ continue; \
+ } \
+ /* points to a wlog_t item */ \
+ if (((wlog_t *)_entry)->addr == 0) { /* deleted entry */ \
+ _deleted_factor += 3; \
+ continue; \
+ } \
+ _deleted_factor -= 4; \
+ item = (wlog_t *)_entry;
+
+#define TREE_LOOP_FORWARD(tree, item) \
+ _TREE_LOOP(tree, item, 0, +)
+#define TREE_LOOP_BACKWARD(tree, item) \
+ _TREE_LOOP(tree, item, (TREE_ARITY-1), -)
+#define TREE_LOOP_END } }
+#define TREE_LOOP_END_AND_COMPRESS \
+ } if (_deleted_factor > 9) _tree_compress(_tree); }
+#define TREE_LOOP_DELETE(item) { (item)->addr = NULL; _deleted_factor += 6; }
+
+#define TREE_FIND(tree, addr1, result, goto_not_found) \
+{ \
+ uintptr_t _key = TREE_HASH(addr1); \
+ char *_p = (char *)((tree).toplevel.items); \
+ char *_entry = *(char **)(_p + (_key & TREE_MASK)); \
+ if (_entry == NULL) \
+ goto_not_found; /* common case, hopefully */ \
+ result = _tree_find(_entry, addr1); \
+ if (result == NULL || result->addr != (addr1)) \
+ goto_not_found; \
+}
+
+static wlog_t *_tree_find(char *entry, uintptr_t addr);
+static void _tree_compress(struct tree_s *tree) __attribute__((unused));
+static void tree_insert(struct tree_s *tree, uintptr_t addr, uintptr_t val);
+static bool tree_delete_item(struct tree_s *tree, uintptr_t addr)
+ __attribute__((unused));
+
+static inline bool tree_contains(struct tree_s *tree, uintptr_t addr)
+{
+ wlog_t *result;
+ TREE_FIND(*tree, addr, result, return false);
+ return true;
+}
diff --git a/c8/stm/misc.c b/c8/stm/misc.c
new file mode 100644
--- /dev/null
+++ b/c8/stm/misc.c
@@ -0,0 +1,61 @@
+#ifndef _STM_CORE_H_
+# error "must be compiled via stmgc.c"
+#endif
+
+
+char *_stm_real_address(object_t *o)
+{
+ if (o == NULL)
+ return NULL;
+
+ assert(FIRST_OBJECT_PAGE * 4096UL <= (uintptr_t)o
+ && (uintptr_t)o < NB_PAGES * 4096UL);
+ return REAL_ADDRESS(STM_SEGMENT->segment_base, o);
+}
+
+char *_stm_get_segment_base(long index)
+{
+ return get_segment_base(index);
+}
+
+struct stm_priv_segment_info_s *_stm_segment(void)
+{
+ char *info = REAL_ADDRESS(STM_SEGMENT->segment_base, STM_PSEGMENT);
+ return (struct stm_priv_segment_info_s *)info;
+}
+
+stm_thread_local_t *_stm_thread(void)
+{
+ return STM_SEGMENT->running_thread;
+}
+
+bool _stm_was_read(object_t *obj)
+{
+ uint8_t rm = ((struct stm_read_marker_s *)
+ (STM_SEGMENT->segment_base + (((uintptr_t)obj) >> 4)))->rm;
+ assert(rm <= STM_SEGMENT->transaction_read_version);
+ return rm == STM_SEGMENT->transaction_read_version;
+}
+
+bool _stm_was_written(object_t *obj)
+{
+ return (obj->stm_flags & _STM_GCFLAG_WRITE_BARRIER) == 0;
+}
+
+
+#ifdef STM_TESTS
+
+long _stm_count_modified_old_objects(void)
+{
+ if (STM_PSEGMENT->modified_old_objects == NULL)
+ return -1;
+ return list_count(STM_PSEGMENT->modified_old_objects);
+}
+
+
+object_t *_stm_enum_modified_old_objects(long index)
+{
+ return (object_t *)list_item(
+ STM_PSEGMENT->modified_old_objects, index);
+}
+#endif
diff --git a/c8/stm/nursery.c b/c8/stm/nursery.c
new file mode 100644
--- /dev/null
+++ b/c8/stm/nursery.c
@@ -0,0 +1,143 @@
+#ifndef _STM_CORE_H_
+# error "must be compiled via stmgc.c"
+#endif
+
+/************************************************************/
+
+#define NURSERY_START (FIRST_NURSERY_PAGE * 4096UL)
+#define NURSERY_SIZE (NB_NURSERY_PAGES * 4096UL)
+#define NURSERY_END (NURSERY_START + NURSERY_SIZE)
+
+static uintptr_t _stm_nursery_start;
+
+
+/************************************************************/
+
+static void setup_nursery(void)
+{
+ _stm_nursery_start = NURSERY_START;
+
+ long i;
+ for (i = 1; i <= NB_SEGMENTS; i++) {
+ get_segment(i)->nursery_current = (stm_char *)NURSERY_START;
+ get_segment(i)->nursery_end = NURSERY_END;
+ }
+}
+
+static inline bool _is_in_nursery(object_t *obj)
+{
+ assert((uintptr_t)obj >= NURSERY_START);
+ return (uintptr_t)obj < NURSERY_END;
+}
+
+
+/************************************************************/
+
+static size_t throw_away_nursery(struct stm_priv_segment_info_s *pseg)
+{
+#pragma push_macro("STM_PSEGMENT")
+#pragma push_macro("STM_SEGMENT")
+#undef STM_PSEGMENT
+#undef STM_SEGMENT
+ dprintf(("throw_away_nursery\n"));
+ /* reset the nursery by zeroing it */
+ size_t nursery_used;
+ char *realnursery;
+
+ realnursery = REAL_ADDRESS(pseg->pub.segment_base, _stm_nursery_start);
+ nursery_used = pseg->pub.nursery_current - (stm_char *)_stm_nursery_start;
+ if (nursery_used > NB_NURSERY_PAGES * 4096) {
+ /* possible in rare cases when the program artificially advances
+ its own nursery_current */
+ nursery_used = NB_NURSERY_PAGES * 4096;
+ }
+ OPT_ASSERT((nursery_used & 7) == 0);
+ memset(realnursery, 0, nursery_used);
+
+ /* assert that the rest of the nursery still contains only zeroes */
+ assert_memset_zero(realnursery + nursery_used,
+ (NURSERY_END - _stm_nursery_start) - nursery_used);
+
+ pseg->pub.nursery_current = (stm_char *)_stm_nursery_start;
+
+
+ return nursery_used;
+#pragma pop_macro("STM_SEGMENT")
+#pragma pop_macro("STM_PSEGMENT")
+}
+
+
+static void _do_minor_collection(bool commit)
+{
+ dprintf(("minor_collection commit=%d\n", (int)commit));
+
+ throw_away_nursery(get_priv_segment(STM_SEGMENT->segment_num));
+}
+
+static void minor_collection(bool commit)
+{
+ assert(!_has_mutex());
+
+ _do_minor_collection(commit);
+}
+
+
+/************************************************************/
+
+
+object_t *_stm_allocate_slowpath(ssize_t size_rounded_up)
+{
+ /* may collect! */
+ STM_SEGMENT->nursery_current -= size_rounded_up; /* restore correct val */
+
+ restart:
+
+ OPT_ASSERT(size_rounded_up >= 16);
+ OPT_ASSERT((size_rounded_up & 7) == 0);
+
+ stm_char *p = STM_SEGMENT->nursery_current;
+ stm_char *end = p + size_rounded_up;
+ if ((uintptr_t)end <= NURSERY_END) {
+ STM_SEGMENT->nursery_current = end;
+ return (object_t *)p;
+ }
+
+ abort();//stm_collect(0);
+ goto restart;
+}
+
+#ifdef STM_TESTS
+void _stm_set_nursery_free_count(uint64_t free_count)
+{
+ assert(free_count <= NURSERY_SIZE);
+ assert((free_count & 7) == 0);
+ _stm_nursery_start = NURSERY_END - free_count;
+
+ long i;
+ for (i = 1; i <= NB_SEGMENTS; i++) {
+ if ((uintptr_t)get_segment(i)->nursery_current < _stm_nursery_start)
+ get_segment(i)->nursery_current = (stm_char *)_stm_nursery_start;
+ }
+}
+#endif
+
+static void assert_memset_zero(void *s, size_t n)
+{
+#ifndef NDEBUG
+ size_t i;
+# ifndef STM_TESTS
+ if (n > 5000) n = 5000;
+# endif
+ n /= 8;
+ for (i = 0; i < n; i++)
+ assert(((uint64_t *)s)[i] == 0);
+#endif
+}
+
+static void check_nursery_at_transaction_start(void)
+{
+ assert((uintptr_t)STM_SEGMENT->nursery_current == _stm_nursery_start);
+ assert_memset_zero(REAL_ADDRESS(STM_SEGMENT->segment_base,
+ STM_SEGMENT->nursery_current),
+ NURSERY_END - _stm_nursery_start);
+}
diff --git a/c8/stm/nursery.h b/c8/stm/nursery.h
new file mode 100644
--- /dev/null
+++ b/c8/stm/nursery.h
@@ -0,0 +1,5 @@
+static void minor_collection(bool commit);
+static void check_nursery_at_transaction_start(void);
+static size_t throw_away_nursery(struct stm_priv_segment_info_s *pseg);
+
+static void assert_memset_zero(void *s, size_t n);
diff --git a/c8/stm/rewind_setjmp.c b/c8/stm/rewind_setjmp.c
new file mode 100644
--- /dev/null
+++ b/c8/stm/rewind_setjmp.c
@@ -0,0 +1,217 @@
+#include "rewind_setjmp.h"
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <alloca.h>
+
+
+struct _rewind_jmp_moved_s {
+ struct _rewind_jmp_moved_s *next;
+ size_t stack_size;
+ size_t shadowstack_size;
+};
+#define RJM_HEADER sizeof(struct _rewind_jmp_moved_s)
+
+#ifndef RJBUF_CUSTOM_MALLOC
+#define rj_malloc malloc
+#define rj_free free
+#else
+void *rj_malloc(size_t);
+void rj_free(void *);
+#endif
+
+/* XXX: currently synchronized with global mutex!! */
+
+static void copy_stack(rewind_jmp_thread *rjthread, char *base, void *ssbase)
+{
+ /* Copy away part of the stack and shadowstack. Sets moved_off_base to
+ the current frame_base.
+
+ The stack is copied between 'base' (lower limit, i.e. newest bytes)
+ and 'rjthread->head->frame_base' (upper limit, i.e. oldest bytes).
+ The shadowstack is copied between 'ssbase' (upper limit, newest)
+ and 'rjthread->head->shadowstack_base' (lower limit, oldest).
+ */
+ struct _rewind_jmp_moved_s *next;
+ char *stop;
+ void *ssstop;
+ size_t stack_size, ssstack_size;
+
+ assert(rjthread->head != NULL);
+ ssstop = rjthread->head->shadowstack_base;
+ if (((long)ssstop) & 1) {
+ /* PyPy's JIT: 'head->frame_base' is missing; use directly 'head',
+ which should be at the end of the frame (and doesn't need itself
+ to be copied because it contains immutable data only) */
+ ssstop = ((char *)ssstop) - 1;
+ stop = (char *)rjthread->head;
+ }
+ else {
+ stop = rjthread->head->frame_base;
+ }
+ assert(stop >= base);
+ assert(ssstop <= ssbase);
+ stack_size = stop - base;
+ ssstack_size = ssbase - ssstop;
+
+ next = (struct _rewind_jmp_moved_s *)
+ rj_malloc(RJM_HEADER + stack_size + ssstack_size);
+ assert(next != NULL); /* XXX out of memory */
+ next->next = rjthread->moved_off;
+ next->stack_size = stack_size;
+ next->shadowstack_size = ssstack_size;
+
+ memcpy(((char *)next) + RJM_HEADER, base, stack_size);
+ memcpy(((char *)next) + RJM_HEADER + stack_size, ssstop,
+ ssstack_size);
+
+ rjthread->moved_off_base = stop;
+ rjthread->moved_off_ssbase = ssstop;
+ rjthread->moved_off = next;
+}
+
+__attribute__((noinline))
+long rewind_jmp_setjmp(rewind_jmp_thread *rjthread, void *ss)
+{
+ /* saves the current stack frame to the list of slices and
+ calls setjmp(). It returns the number of times a longjmp()
+ jumped back to this setjmp() */
+ if (rjthread->moved_off) {
+ /* old stack slices are not needed anymore (next longjmp()
+ will restore only to this setjmp()) */
+ _rewind_jmp_free_stack_slices(rjthread);
+ }
+ /* all locals of this function that need to be saved and restored
+ across the setjmp() should be stored inside this structure */
+ struct { void *ss1; rewind_jmp_thread *rjthread1; } volatile saved =
+ { ss, rjthread };
+
+ int result;
+ if (__builtin_setjmp(rjthread->jmpbuf) == 0) {
+ rjthread = saved.rjthread1;
+ rjthread->initial_head = rjthread->head;
+ result = 0;
+ }
+ else {
+ rjthread = saved.rjthread1;
+ rjthread->head = rjthread->initial_head;
+ result = rjthread->repeat_count + 1;
+ }
+ rjthread->repeat_count = result;
+
+ /* snapshot of top frame: needed every time because longjmp() frees
+ the previous one. Note that this function is called with the
+ mutex already acquired. Although it's not the job of this file,
+ we assert it is indeed acquired here. This is needed, otherwise a
+ concurrent GC may get garbage while saving shadow stack */
+#ifdef _STM_CORE_H_
+ assert(_has_mutex());
+#endif
+ copy_stack(rjthread, (char *)&saved, saved.ss1);
+
+ return result;
+}
+
+__attribute__((noinline, noreturn))
+static void do_longjmp(rewind_jmp_thread *rjthread, char *stack_free)
+{
+ /* go through list of copied stack-slices and copy them back to the
+ current stack, expanding it if necessary. The shadowstack should
+ already be restored at this point (restore_shadowstack()) */
+ assert(rjthread->moved_off_base != NULL);
+
+ while (rjthread->moved_off) {
+ struct _rewind_jmp_moved_s *p = rjthread->moved_off;
+ char *target = rjthread->moved_off_base;
+ /* CPU stack grows downwards: */
+ target -= p->stack_size;
+ if (target < stack_free) {
+ /* need more stack space! */
+ do_longjmp(rjthread, alloca(stack_free - target));
+ abort(); /* unreachable */
+ }
+ memcpy(target, ((char *)p) + RJM_HEADER, p->stack_size);
+
+ rjthread->moved_off_base = target;
+ rjthread->moved_off = p->next;
+ rj_free(p);
+ }
+
+#ifdef _STM_CORE_H_
+ /* This function must be called with the mutex held. It will
+ remain held across the longjmp that follows and into the
+ target rewind_jmp_setjmp() function. */
+ assert(_has_mutex());
+#endif
+ __builtin_longjmp(rjthread->jmpbuf, 1);
+}
+
+__attribute__((noreturn))
+void rewind_jmp_longjmp(rewind_jmp_thread *rjthread)
+{
+ char _rewind_jmp_marker;
+ do_longjmp(rjthread, &_rewind_jmp_marker);
+}
+
+
+char *rewind_jmp_enum_shadowstack(rewind_jmp_thread *rjthread,
+ void *callback(void *, const void *, size_t))
+{
+ /* enumerate all saved shadow-stack slices */
+ struct _rewind_jmp_moved_s *p = rjthread->moved_off;
+ char *sstarget = rjthread->moved_off_ssbase;
+
+#ifdef _STM_CORE_H_
+ assert(_has_mutex());
+#endif
+
+ while (p) {
+ if (p->shadowstack_size) {
+ void *ss_slice = ((char *)p) + RJM_HEADER + p->stack_size;
+ callback(sstarget, ss_slice, p->shadowstack_size);
+
+ sstarget += p->shadowstack_size;
+ }
+ p = p->next;
+ }
+ return sstarget;
+}
+
+
+char *rewind_jmp_restore_shadowstack(rewind_jmp_thread *rjthread)
+{
+ return rewind_jmp_enum_shadowstack(rjthread, memcpy);
+}
+
+__attribute__((noinline))
+void _rewind_jmp_copy_stack_slice(rewind_jmp_thread *rjthread)
+{
+ /* called when leaving a frame. copies the now-current frame
+ to the list of stack-slices */
+#ifdef _STM_CORE_H_
+ /* A transaction should be running now. This means in particular
+ that it's not possible that a major GC runs concurrently with
+ this code (and tries to read the shadowstack slice). */
+ assert(_seems_to_be_running_transaction());
+#endif
+ if (rjthread->head == NULL) {
+ _rewind_jmp_free_stack_slices(rjthread);
+ return;
+ }
+ assert(rjthread->moved_off_base < (char *)rjthread->head);
+ copy_stack(rjthread, rjthread->moved_off_base, rjthread->moved_off_ssbase);
+}
+
+void _rewind_jmp_free_stack_slices(rewind_jmp_thread *rjthread)
+{
+ /* frees all saved stack copies */
+ struct _rewind_jmp_moved_s *p = rjthread->moved_off;
+ while (p) {
+ struct _rewind_jmp_moved_s *pnext = p->next;
+ rj_free(p);
+ p = pnext;
+ }
+ rjthread->moved_off = NULL;
+ rjthread->moved_off_base = NULL;
+ rjthread->moved_off_ssbase = NULL;
+}
diff --git a/c8/stm/rewind_setjmp.h b/c8/stm/rewind_setjmp.h
new file mode 100644
--- /dev/null
+++ b/c8/stm/rewind_setjmp.h
@@ -0,0 +1,117 @@
+#ifndef _REWIND_SETJMP_H_
+#define _REWIND_SETJMP_H_
+
+
+#include <stddef.h>
+
+/************************************************************
+There is a singly-linked list of frames in each thread
+rjthread->head->prev->prev->prev
+
+Another singly-linked list is the list of copied stack-slices.
+When doing a setjmp(), we copy the top-frame, free all old
+stack-slices, and link it to the top-frame->moved_off.
+When returning from the top-frame while moved_off still points
+to a slice, we also need to copy the top-frame->prev frame/slice
+and add it to this list (pointed to by moved_off).
+--------------------------------------------------------------
+
+ : : ^^^^^
+ |-------------------| older frames in the stack
+ | prev=0 |
+ ,---> | rewind_jmp_buf |
+ | |-------------------|
+ | | |
+ | : :
+ | : :
+ | | |
+ | |-------------------|
+ `---------prev |
+ ,----> | rewind_jmp_buf |
+ | +-------------------|
+ | | |
+ | : :
+ | | |
+ | |-------------------|
+ `----------prev |
+ ,---> | rewind_jmp_buf | <--------------- MOVED_OFF_BASE
+ | |---------------- +-------------+
+ | | | STACK COPY |
+ | | : :
+ | : | size |
+ | | | next | <---- MOVED_OFF
+ | | +---|------ +-------------+
+ | | | | | STACK COPY |
+ | |-------------------| | : (SEQUEL) :
+ `---------prev | | : :
+HEAD-----> | rewind_jmp_buf | | | |
+ |-------------------| | | size |
+ `------> | next=0 |
+ +-------------+
+
+
+************************************************************/
+
+typedef struct _rewind_jmp_buf {
+ char *shadowstack_base;
+ struct _rewind_jmp_buf *prev;
+ char *frame_base;
+ /* NB: PyPy's JIT has got details of this structure hard-coded,
+ as follows: it uses 2 words only (so frame_base is invalid)
+ and sets the lowest bit of 'shadowstack_base' to tell this */
+} rewind_jmp_buf;
+
+typedef struct {
+ rewind_jmp_buf *head;
+ rewind_jmp_buf *initial_head;
+ char *moved_off_base;
+ char *moved_off_ssbase;
+ struct _rewind_jmp_moved_s *moved_off;
+ void *jmpbuf[5];
+ long repeat_count;
+} rewind_jmp_thread;
+
+
+/* remember the current stack and ss_stack positions */
+#define rewind_jmp_enterframe(rjthread, rjbuf, ss) do { \
+ rewind_jmp_prepareframe(rjbuf); \
+ rewind_jmp_enterprepframe(rjthread, rjbuf, ss); \
+} while (0)
+#define rewind_jmp_prepareframe(rjbuf) \
+ ((rjbuf)->frame_base = __builtin_frame_address(0))
+#define rewind_jmp_enterprepframe(rjthread, rjbuf, ss) do { \
+ assert((((long)(ss)) & 1) == 0); \
+ (rjbuf)->shadowstack_base = (char *)(ss); \
+ (rjbuf)->prev = (rjthread)->head; \
+ (rjthread)->head = (rjbuf); \
+} while (0)
+
+/* go up one frame. if there was a setjmp call in this frame,
+ */
+#define rewind_jmp_leaveframe(rjthread, rjbuf, ss) do { \
+ assert((rjbuf)->shadowstack_base == (char *)(ss)); \
+ (rjthread)->head = (rjbuf)->prev; \
+ if ((rjbuf)->frame_base == (rjthread)->moved_off_base) { \
+ assert((rjthread)->moved_off_ssbase == (char *)(ss));\
+ _rewind_jmp_copy_stack_slice(rjthread); \
+ } \
+} while (0)
+
+long rewind_jmp_setjmp(rewind_jmp_thread *rjthread, void *ss);
+void rewind_jmp_longjmp(rewind_jmp_thread *rjthread) __attribute__((noreturn));
+char *rewind_jmp_restore_shadowstack(rewind_jmp_thread *rjthread);
+char *rewind_jmp_enum_shadowstack(rewind_jmp_thread *rjthread,
+ void *callback(void *, const void *, size_t));
+
+#define rewind_jmp_forget(rjthread) do { \
+ if ((rjthread)->moved_off) _rewind_jmp_free_stack_slices(rjthread); \
+ (rjthread)->moved_off_base = 0; \
+ (rjthread)->moved_off_ssbase = 0; \
+} while (0)
+
+void _rewind_jmp_copy_stack_slice(rewind_jmp_thread *);
+void _rewind_jmp_free_stack_slices(rewind_jmp_thread *);
+
+#define rewind_jmp_armed(rjthread) ((rjthread)->moved_off_base != 0)
+
+#endif
diff --git a/c8/stm/setup.c b/c8/stm/setup.c
new file mode 100644
--- /dev/null
+++ b/c8/stm/setup.c
@@ -0,0 +1,207 @@
+#ifndef _STM_CORE_H_
+# error "must be compiled via stmgc.c"
+#endif
+
+
+#ifdef USE_REMAP_FILE_PAGES
+static char *setup_mmap(char *reason, int *ignored)
+{
+ char *result = mmap(NULL, TOTAL_MEMORY,
+ PROT_READ | PROT_WRITE,
+ MAP_PAGES_FLAGS, -1, 0);
+ if (result == MAP_FAILED)
+ stm_fatalerror("%s failed: %m", reason);
+
+ return result;
+}
+static void close_fd_mmap(int ignored)
+{
+}
+#else
+#include <fcntl.h> /* For O_* constants */
+static char *setup_mmap(char *reason, int *map_fd)
+{
+ char name[128];
+
+ /* Create the big shared memory object, and immediately unlink it.
+ There is a small window where if this process is killed the
+ object is left around. It doesn't seem possible to do anything
+ about it...
+ */
+ int fd = shm_open(name, O_RDWR | O_CREAT | O_EXCL, 0600);
+ shm_unlink(name);
+
+ if (fd == -1) {
+ stm_fatalerror("%s failed (stm_open): %m", reason);
+ }
+ if (ftruncate(fd, TOTAL_MEMORY) != 0) {
+ stm_fatalerror("%s failed (ftruncate): %m", reason);
+ }
+ char *result = mmap(NULL, TOTAL_MEMORY,
+ PROT_READ | PROT_WRITE,
+ MAP_PAGES_FLAGS & ~MAP_ANONYMOUS, fd, 0);
+ if (result == MAP_FAILED) {
+ stm_fatalerror("%s failed (mmap): %m", reason);
+ }
+ *map_fd = fd;
+ return result;
+}
+static void close_fd_mmap(int map_fd)
+{
+ close(map_fd);
+}
+#endif
+
+static void setup_protection_settings(void)
+{
+ /* The segment 0 is not used to run transactions, but contains the
+ shared copy of the pages. We mprotect all pages before so that
+ accesses fail, up to and including the pages corresponding to the
+ nurseries of the other segments. */
+ mprotect(stm_object_pages, END_NURSERY_PAGE * 4096UL, PROT_NONE);
+
+ long i;
+ for (i = 1; i <= NB_SEGMENTS; i++) {
+ char *segment_base = get_segment_base(i);
+
+ /* In each segment, the first page is where TLPREFIX'ed
+ NULL accesses land. We mprotect it so that accesses fail. */
+ mprotect(segment_base, 4096, PROT_NONE);
+
+ /* Pages in range(2, FIRST_READMARKER_PAGE) are never used */
+ if (FIRST_READMARKER_PAGE > 2)
+ mprotect(segment_base + 8192,
+ (FIRST_READMARKER_PAGE - 2) * 4096UL,
+ PROT_NONE);
+ }
+}
+
+void stm_setup(void)
+{
+ /* Check that some values are acceptable */
+ assert(NB_SEGMENTS <= NB_SEGMENTS_MAX);
+ assert(4096 <= ((uintptr_t)STM_SEGMENT));
+ assert((uintptr_t)STM_SEGMENT == (uintptr_t)STM_PSEGMENT);
+ assert(((uintptr_t)STM_PSEGMENT) + sizeof(*STM_PSEGMENT) <= 8192);
+ assert(2 <= FIRST_READMARKER_PAGE);
+ assert(FIRST_READMARKER_PAGE * 4096UL <= READMARKER_START);
+ assert(READMARKER_START < READMARKER_END);
+ assert(READMARKER_END <= 4096UL * FIRST_OBJECT_PAGE);
+ assert(FIRST_OBJECT_PAGE < NB_PAGES);
+ assert((NB_PAGES * 4096UL) >> 8 <= (FIRST_OBJECT_PAGE * 4096UL) >> 4);
+ assert((END_NURSERY_PAGE * 4096UL) >> 8 <=
+ (FIRST_READMARKER_PAGE * 4096UL));
+
+ stm_object_pages = setup_mmap("initial stm_object_pages mmap()",
+ &stm_object_pages_fd);
+ setup_protection_settings();
+
+ long i;
+ for (i = 1; i <= NB_SEGMENTS; i++) {
+ char *segment_base = get_segment_base(i);
+
+ /* Fill the TLS page (page 1) with 0xDC, for debugging */
+ memset(REAL_ADDRESS(segment_base, 4096), 0xDC, 4096);
+ /* Make a "hole" at STM_PSEGMENT (which includes STM_SEGMENT) */
+ memset(REAL_ADDRESS(segment_base, STM_PSEGMENT), 0,
+ sizeof(*STM_PSEGMENT));
+
+ /* Initialize STM_PSEGMENT */
+ struct stm_priv_segment_info_s *pr = get_priv_segment(i);
+ assert(1 <= i && i < 255); /* 255 is WL_VISITED in gcpage.c */
+ pr->pub.segment_num = i;
+ pr->pub.segment_base = segment_base;
+ pr->modified_old_objects = list_create();
+ pr->pub.transaction_read_version = 0xff;
+ }
+
+ /* The pages are shared lazily, as remap_file_pages() takes a relatively
+ long time for each page.
+
+ The read markers are initially zero, but we set anyway
+ transaction_read_version to 0xff in order to force the first
+ transaction to "clear" the read markers by mapping a different,
+ private range of addresses.
+ */
+
+ setup_sync();
+ setup_nursery();
+}
+
+void stm_teardown(void)
+{
+ /* This function is called during testing, but normal programs don't
+ need to call it. */
+ assert(!_has_mutex());
+
+ long i;
+ for (i = 1; i <= NB_SEGMENTS; i++) {
+ struct stm_priv_segment_info_s *pr = get_priv_segment(i);
+ list_free(pr->modified_old_objects);
+ }
+
+ munmap(stm_object_pages, TOTAL_MEMORY);
+ stm_object_pages = NULL;
+ close_fd_mmap(stm_object_pages_fd);
+
+ teardown_sync();
+}
+
+static pthread_t *_get_cpth(stm_thread_local_t *tl)
+{
+ assert(sizeof(pthread_t) <= sizeof(tl->creating_pthread));
+ return (pthread_t *)(tl->creating_pthread);
+}
+
+void stm_register_thread_local(stm_thread_local_t *tl)
+{
+ int num;
+ s_mutex_lock();
+ if (stm_all_thread_locals == NULL) {
+ stm_all_thread_locals = tl->next = tl->prev = tl;
+ num = 0;
+ }
+ else {
+ tl->next = stm_all_thread_locals;
+ tl->prev = stm_all_thread_locals->prev;
+ stm_all_thread_locals->prev->next = tl;
+ stm_all_thread_locals->prev = tl;
+ num = tl->prev->associated_segment_num;
+ }
+
+ /* assign numbers consecutively, but that's for tests; we could also
+ assign the same number to all of them and they would get their own
+ numbers automatically. */
+ num = (num % NB_SEGMENTS) + 1;
+ tl->associated_segment_num = num;
+ *_get_cpth(tl) = pthread_self();
+ set_gs_register(get_segment_base(num));
+ s_mutex_unlock();
+}
+
+void stm_unregister_thread_local(stm_thread_local_t *tl)
+{
+ s_mutex_lock();
+ assert(tl->prev != NULL);
+ assert(tl->next != NULL);
+
+ if (tl == stm_all_thread_locals) {
+ stm_all_thread_locals = stm_all_thread_locals->next;
+ if (tl == stm_all_thread_locals) {
+ stm_all_thread_locals = NULL;
+ s_mutex_unlock();
+ return;
+ }
+ }
+ tl->prev->next = tl->next;
+ tl->next->prev = tl->prev;
+ tl->prev = NULL;
+ tl->next = NULL;
+ s_mutex_unlock();
+}
+
+__attribute__((unused))
+static bool _is_tl_registered(stm_thread_local_t *tl)
+{
+ return tl->next != NULL;
+}
diff --git a/c8/stm/setup.h b/c8/stm/setup.h
new file mode 100644
--- /dev/null
+++ b/c8/stm/setup.h
@@ -0,0 +1,4 @@
+static char *setup_mmap(char *reason, int *map_fd);
+static void close_fd_mmap(int map_fd);
+static void setup_protection_settings(void);
+static pthread_t *_get_cpth(stm_thread_local_t *);
diff --git a/c8/stm/sync.c b/c8/stm/sync.c
new file mode 100644
--- /dev/null
+++ b/c8/stm/sync.c
@@ -0,0 +1,144 @@
+#include <sys/syscall.h>
+#include <sys/prctl.h>
+#include <asm/prctl.h>
+
+#ifndef _STM_CORE_H_
+# error "must be compiled via stmgc.c"
+#endif
+
+
+
+static union {
+ struct {
+ pthread_mutex_t global_mutex;
+ /* some additional pieces of global state follow */
+ uint8_t in_use1[NB_SEGMENTS]; /* 1 if running a pthread */
+ };
+ char reserved[192];
+} sync_ctl __attribute__((aligned(64)));
+
+
+static void setup_sync(void)
+{
+ if (pthread_mutex_init(&sync_ctl.global_mutex, NULL) != 0)
+ stm_fatalerror("mutex initialization: %m");
+}
+
+static void teardown_sync(void)
+{
+ if (pthread_mutex_destroy(&sync_ctl.global_mutex) != 0)
+ stm_fatalerror("mutex destroy: %m");
+
+ memset(&sync_ctl, 0, sizeof(sync_ctl));
+}
+
+#ifndef NDEBUG
+__thread bool _has_mutex_here;
+static inline bool _has_mutex(void)
+{
+ return _has_mutex_here;
+}
+#endif
+
+static void set_gs_register(char *value)
+{
+ if (UNLIKELY(syscall(SYS_arch_prctl, ARCH_SET_GS, (uint64_t)value) != 0))
+ stm_fatalerror("syscall(arch_prctl, ARCH_SET_GS): %m");
+}
+
+static inline void s_mutex_lock(void)
+{
+ assert(!_has_mutex_here);
+ if (UNLIKELY(pthread_mutex_lock(&sync_ctl.global_mutex) != 0))
+ stm_fatalerror("pthread_mutex_lock: %m");
+ assert((_has_mutex_here = true, 1));
+}
+
+static inline void s_mutex_unlock(void)
+{
+ assert(_has_mutex_here);
+ if (UNLIKELY(pthread_mutex_unlock(&sync_ctl.global_mutex) != 0))
+ stm_fatalerror("pthread_mutex_unlock: %m");
+ assert((_has_mutex_here = false, 1));
+}
+
+/************************************************************/
+
+
+static bool acquire_thread_segment(stm_thread_local_t *tl)
+{
+ /* This function acquires a segment for the currently running thread,
+ and set up the GS register if it changed. */
+ assert(_has_mutex());
+ assert(_is_tl_registered(tl));
+
+ int num = tl->associated_segment_num;
+ if (sync_ctl.in_use1[num - 1] == 0) {
+ /* fast-path: we can get the same segment number than the one
+ we had before. The value stored in GS is still valid. */
+#ifdef STM_TESTS
+ /* that can be optimized away, except during tests, because
+ they use only one thread */
+ set_gs_register(get_segment_base(num));
+#endif
+ dprintf(("acquired same segment: %d\n", num));
+ goto got_num;
+ }
+ /* Look for the next free segment. If there is none, wait for
+ the condition variable. */
+ int retries;
+ for (retries = 0; retries < NB_SEGMENTS; retries++) {
+ num = (num % NB_SEGMENTS) + 1;
+ if (sync_ctl.in_use1[num - 1] == 0) {
+ /* we're getting 'num', a different number. */
+ dprintf(("acquired different segment: %d->%d\n", tl->associated_segment_num, num));
+ tl->associated_segment_num = num;
+ set_gs_register(get_segment_base(num));
+ goto got_num;
+ }
+ }
+ /* No segment available. Wait until release_thread_segment()
+ signals that one segment has been freed. */
+ abort(); /* XXX */
+
+ /* Return false to the caller, which will call us again */
+ return false;
+
+ got_num:
+ sync_ctl.in_use1[num - 1] = 1;
+ assert(STM_SEGMENT->segment_num == num);
+ assert(STM_SEGMENT->running_thread == NULL);
+ STM_SEGMENT->running_thread = tl;
+ return true;
+}
+
+static void release_thread_segment(stm_thread_local_t *tl)
+{
+ assert(_has_mutex());
+
+ assert(STM_SEGMENT->running_thread == tl);
+ STM_SEGMENT->running_thread = NULL;
+
+ assert(sync_ctl.in_use1[tl->associated_segment_num - 1] == 1);
+ sync_ctl.in_use1[tl->associated_segment_num - 1] = 0;
+}
+
+__attribute__((unused))
+static bool _seems_to_be_running_transaction(void)
+{
+ return (STM_SEGMENT->running_thread != NULL);
+}
+
+bool _stm_in_transaction(stm_thread_local_t *tl)
+{
+ int num = tl->associated_segment_num;
+ assert(1 <= num && num <= NB_SEGMENTS);
+ return get_segment(num)->running_thread == tl;
+}
+
+void _stm_test_switch(stm_thread_local_t *tl)
+{
+ assert(_stm_in_transaction(tl));
+ set_gs_register(get_segment_base(tl->associated_segment_num));
+ assert(STM_SEGMENT->running_thread == tl);
+}
diff --git a/c8/stm/sync.h b/c8/stm/sync.h
new file mode 100644
--- /dev/null
+++ b/c8/stm/sync.h
@@ -0,0 +1,16 @@
+static void setup_sync(void);
+static void teardown_sync(void);
+
+
+static void s_mutex_lock(void);
+static void s_mutex_unlock(void);
+#ifndef NDEBUG
+static bool _has_mutex(void);
+#endif
+static void set_gs_register(char *value);
+
+
+/* acquire and release one of the segments for running the given thread
+ (must have the mutex acquired!) */
+static bool acquire_thread_segment(stm_thread_local_t *tl);
+static void release_thread_segment(stm_thread_local_t *tl);
diff --git a/c8/stmgc.c b/c8/stmgc.c
new file mode 100644
--- /dev/null
+++ b/c8/stmgc.c
@@ -0,0 +1,19 @@
+#define _GNU_SOURCE 1
+#include "stmgc.h"
+#include "stm/atomic.h"
+#include "stm/list.h"
+#include "stm/core.h"
+#include "stm/nursery.h"
+#include "stm/sync.h"
+#include "stm/setup.h"
+#include "stm/fprintcolor.h"
+#include "stm/rewind_setjmp.h"
+
+#include "stm/list.c"
+#include "stm/nursery.c"
+#include "stm/core.c"
+#include "stm/sync.c"
+#include "stm/setup.c"
+#include "stm/fprintcolor.c"
+#include "stm/rewind_setjmp.c"
+#include "stm/misc.c"
diff --git a/c8/stmgc.h b/c8/stmgc.h
new file mode 100644
--- /dev/null
+++ b/c8/stmgc.h
@@ -0,0 +1,162 @@
+#ifndef _STMGC_H
+#define _STMGC_H
+
+
+/* ==================== INTERNAL ==================== */
+
+/* See "API" below. */
+
+
+#include <stddef.h>
+#include <stdint.h>
+#include <assert.h>
+#include <limits.h>
+#include <unistd.h>
+
+#include "stm/rewind_setjmp.h"
+
+#if LONG_MAX == 2147483647
+# error "Requires a 64-bit environment"
+#endif
+
+
+#define TLPREFIX __attribute__((address_space(256)))
+
+typedef TLPREFIX struct object_s object_t;
+typedef TLPREFIX struct stm_segment_info_s stm_segment_info_t;
+typedef TLPREFIX struct stm_read_marker_s stm_read_marker_t;
+typedef TLPREFIX char stm_char;
+
+struct stm_read_marker_s {
+ /* In every segment, every object has a corresponding read marker.
+ We assume that objects are at least 16 bytes long, and use
+ their address divided by 16. The read marker is equal to
+ 'STM_SEGMENT->transaction_read_version' if and only if the
+ object was read in the current transaction. The nurseries
+ also have corresponding read markers, but they are never used. */
+ uint8_t rm;
+};
+
+struct stm_segment_info_s {
+ uint8_t transaction_read_version;
+ int segment_num;
+ char *segment_base;
+ stm_char *nursery_current;
+ uintptr_t nursery_end;
+ struct stm_thread_local_s *running_thread;
+};
+#define STM_SEGMENT ((stm_segment_info_t *)4352)
+
+
+typedef struct stm_thread_local_s {
+ /* rewind_setjmp's interface */
+ rewind_jmp_thread rjthread;
+ /* the next fields are handled internally by the library */
+ int associated_segment_num;
+ struct stm_thread_local_s *prev, *next;
+ void *creating_pthread[2];
+} stm_thread_local_t;
+
+#define _STM_GCFLAG_WRITE_BARRIER 0x01
+
+
+void _stm_write_slowpath(object_t *);
+object_t *_stm_allocate_slowpath(ssize_t);
+#ifdef STM_TESTS
+#include <stdbool.h>
+bool _stm_was_read(object_t *obj);
+bool _stm_was_written(object_t *obj);
+
+char *_stm_get_segment_base(long index);
+bool _stm_in_transaction(stm_thread_local_t *tl);
+void _stm_set_nursery_free_count(uint64_t free_count);
+long _stm_count_modified_old_objects(void);
+object_t *_stm_enum_modified_old_objects(long index);
+#endif
+
+/* ==================== HELPERS ==================== */
+#ifdef NDEBUG
+#define OPT_ASSERT(cond) do { if (!(cond)) __builtin_unreachable(); } while (0)
+#else
+#define OPT_ASSERT(cond) assert(cond)
+#endif
+#define LIKELY(x) __builtin_expect(x, 1)
+#define UNLIKELY(x) __builtin_expect(x, 0)
+#define IMPLY(a, b) (!(a) || (b))
+
+
+/* ==================== PUBLIC API ==================== */
+
+/* Number of segments (i.e. how many transactions can be executed in
+ parallel, in maximum). If you try to start transactions in more
+ threads than the number of segments, it will block, waiting for the
+ next segment to become free.
+*/
+#define STM_NB_SEGMENTS 4
+
+
+struct object_s {
+ uint32_t stm_flags; /* reserved for the STM library */
+};
+
+extern ssize_t stmcb_size_rounded_up(struct object_s *);
+
+__attribute__((always_inline))
+static inline void stm_read(object_t *obj)
+{
+ ((stm_read_marker_t *)(((uintptr_t)obj) >> 4))->rm =
+ STM_SEGMENT->transaction_read_version;
+}
+
+__attribute__((always_inline))
+static inline void stm_write(object_t *obj)
+{
+ if (UNLIKELY((obj->stm_flags & _STM_GCFLAG_WRITE_BARRIER) != 0))
+ _stm_write_slowpath(obj);
+}
+
+
+__attribute__((always_inline))
+static inline object_t *stm_allocate(ssize_t size_rounded_up)
+{
+ OPT_ASSERT(size_rounded_up >= 16);
+ OPT_ASSERT((size_rounded_up & 7) == 0);
+
+ stm_char *p = STM_SEGMENT->nursery_current;
+ stm_char *end = p + size_rounded_up;
+ STM_SEGMENT->nursery_current = end;
+ if (UNLIKELY((uintptr_t)end > STM_SEGMENT->nursery_end))
+ return _stm_allocate_slowpath(size_rounded_up);
+
+ return (object_t *)p;
+}
+
+
+void stm_setup(void);
+void stm_teardown(void);
+
+
+void stm_register_thread_local(stm_thread_local_t *tl);
+void stm_unregister_thread_local(stm_thread_local_t *tl);
+
+#define stm_rewind_jmp_enterprepframe(tl, rjbuf) \
+ rewind_jmp_enterprepframe(&(tl)->rjthread, rjbuf, (tl)->shadowstack)
+#define stm_rewind_jmp_enterframe(tl, rjbuf) \
+ rewind_jmp_enterframe(&(tl)->rjthread, rjbuf, (tl)->shadowstack)
+#define stm_rewind_jmp_leaveframe(tl, rjbuf) \
+ rewind_jmp_leaveframe(&(tl)->rjthread, rjbuf, (tl)->shadowstack)
+#define stm_rewind_jmp_setjmp(tl) \
+ rewind_jmp_setjmp(&(tl)->rjthread, (tl)->shadowstack)
+#define stm_rewind_jmp_longjmp(tl) \
+ rewind_jmp_longjmp(&(tl)->rjthread)
+#define stm_rewind_jmp_forget(tl) \
+ rewind_jmp_forget(&(tl)->rjthread)
+
+
+long stm_start_transaction(stm_thread_local_t *tl);
+void stm_commit_transaction(void);
+
+
+/* ==================== END ==================== */
+
+#endif
diff --git a/c8/test/common.py b/c8/test/common.py
new file mode 100644
--- /dev/null
+++ b/c8/test/common.py
@@ -0,0 +1,27 @@
+import os
+import sys
+assert sys.maxint == 9223372036854775807, "requires a 64-bit environment"
+
+# ----------
+os.environ['CC'] = 'clang'
+
+parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+
+# ----------
+
+source_files = [os.path.join(parent_dir, "stmgc.c")]
+all_files = [os.path.join(parent_dir, "stmgc.h"),
+ os.path.join(parent_dir, "stmgc.c")] + [
+ os.path.join(parent_dir, 'stm', _n)
+ for _n in os.listdir(os.path.join(parent_dir, 'stm'))
+ if (_n.endswith('.h') or _n.endswith('.c')) and not _n.startswith('.')]
+
+_pycache_ = os.path.join(parent_dir, 'test', '__pycache__')
+if os.path.exists(_pycache_):
+ _fs = [_f for _f in os.listdir(_pycache_) if _f.startswith('_cffi_')]
+ if _fs:
+ _fsmtime = min(os.stat(os.path.join(_pycache_, _f)).st_mtime
+ for _f in _fs)
+ if any(os.stat(src).st_mtime >= _fsmtime for src in all_files):
+ import shutil
+ shutil.rmtree(_pycache_)
diff --git a/c8/test/support.py b/c8/test/support.py
new file mode 100644
--- /dev/null
+++ b/c8/test/support.py
@@ -0,0 +1,463 @@
+import os
+import cffi, weakref
+from common import parent_dir, source_files
+
+# ----------
+
+ffi = cffi.FFI()
+ffi.cdef("""
+typedef ... object_t;
+#define SIZEOF_MYOBJ ...
+#define STM_NB_SEGMENTS ...
+#define _STM_GCFLAG_WRITE_BARRIER ...
+
+typedef struct {
+...;
+} rewind_jmp_thread;
+
+typedef struct {
+ rewind_jmp_thread rjthread;
+ int associated_segment_num;
+ struct stm_thread_local_s *prev, *next;
+ void *creating_pthread[2];
+ ...;
+} stm_thread_local_t;
+
+void stm_read(object_t *obj);
+/*void stm_write(object_t *obj); use _checked_stm_write() instead */
+object_t *stm_allocate(ssize_t size_rounded_up);
+
+void stm_setup(void);
+void stm_teardown(void);
+void stm_register_thread_local(stm_thread_local_t *tl);
+void stm_unregister_thread_local(stm_thread_local_t *tl);
+
+bool _checked_stm_write(object_t *obj);
+bool _stm_was_read(object_t *obj);
+bool _stm_was_written(object_t *obj);
+char *_stm_get_segment_base(long index);
+bool _stm_in_transaction(stm_thread_local_t *tl);
More information about the pypy-commit
mailing list