[pypy-commit] pypy stmgc-c8: import stmgc/f0d995d5609d, branch c8-locking
arigo
noreply at buildbot.pypy.org
Mon Jun 1 16:59:11 CEST 2015
Author: Armin Rigo <arigo at tunes.org>
Branch: stmgc-c8
Changeset: r77743:a3a33fcdc546
Date: 2015-06-01 15:58 +0100
http://bitbucket.org/pypy/pypy/changeset/a3a33fcdc546/
Log: import stmgc/f0d995d5609d, branch c8-locking
diff --git a/rpython/translator/stm/src_stm/revision b/rpython/translator/stm/src_stm/revision
--- a/rpython/translator/stm/src_stm/revision
+++ b/rpython/translator/stm/src_stm/revision
@@ -1,1 +1,1 @@
-e55658d12179
+f0d995d5609d
diff --git a/rpython/translator/stm/src_stm/stm/core.c b/rpython/translator/stm/src_stm/stm/core.c
--- a/rpython/translator/stm/src_stm/stm/core.c
+++ b/rpython/translator/stm/src_stm/stm/core.c
@@ -50,8 +50,8 @@
char *src_segment_base = (from_segnum >= 0 ? get_segment_base(from_segnum)
: NULL);
- assert(IMPLY(from_segnum >= 0, get_priv_segment(from_segnum)->modification_lock));
- assert(STM_PSEGMENT->modification_lock);
+ assert(IMPLY(from_segnum >= 0, modification_lock_check_rdlock(from_segnum)));
+ assert(modification_lock_check_wrlock(STM_SEGMENT->segment_num));
long my_segnum = STM_SEGMENT->segment_num;
DEBUG_EXPECT_SEGFAULT(false);
@@ -131,7 +131,7 @@
struct stm_commit_log_entry_s *from,
struct stm_commit_log_entry_s *to)
{
- assert(STM_PSEGMENT->modification_lock);
+ assert(modification_lock_check_wrlock(STM_SEGMENT->segment_num));
assert(from->rev_num >= to->rev_num);
/* walk BACKWARDS the commit log and update the page 'pagenum',
initially at revision 'from', until we reach the revision 'to'. */
@@ -199,8 +199,8 @@
/* before copying anything, acquire modification locks from our and
the other segment */
- uint64_t to_lock = (1UL << copy_from_segnum)| (1UL << my_segnum);
- acquire_modification_lock_set(to_lock);
+ uint64_t to_lock = (1UL << copy_from_segnum);
+ acquire_modification_lock_set(to_lock, my_segnum);
pagecopy(get_virtual_page(my_segnum, pagenum),
get_virtual_page(copy_from_segnum, pagenum));
@@ -223,7 +223,7 @@
if (src_version->rev_num > target_version->rev_num)
go_to_the_past(pagenum, src_version, target_version);
- release_modification_lock_set(to_lock);
+ release_modification_lock_set(to_lock, my_segnum);
release_all_privatization_locks();
}
@@ -308,7 +308,7 @@
static void reset_modified_from_backup_copies(int segment_num); /* forward */
-static bool _stm_validate()
+static bool _stm_validate(void)
{
/* returns true if we reached a valid state, or false if
we need to abort now */
@@ -357,7 +357,7 @@
}
/* Find the set of segments we need to copy from and lock them: */
- uint64_t segments_to_lock = 1UL << my_segnum;
+ uint64_t segments_to_lock = 0;
cl = first_cl;
while ((next_cl = cl->next) != NULL) {
if (next_cl == INEV_RUNNING) {
@@ -375,8 +375,8 @@
/* HERE */
- acquire_privatization_lock(STM_SEGMENT->segment_num);
- acquire_modification_lock_set(segments_to_lock);
+ acquire_privatization_lock(my_segnum);
+ acquire_modification_lock_set(segments_to_lock, my_segnum);
/* import objects from first_cl to last_cl: */
@@ -466,8 +466,8 @@
}
/* done with modifications */
- release_modification_lock_set(segments_to_lock);
- release_privatization_lock(STM_SEGMENT->segment_num);
+ release_modification_lock_set(segments_to_lock, my_segnum);
+ release_privatization_lock(my_segnum);
}
return !needs_abort;
@@ -545,7 +545,7 @@
time" as the attach to commit log. Otherwise, another thread may
see the new CL entry, import it, look for backup copies in this
segment and find the old backup copies! */
- acquire_modification_lock(STM_SEGMENT->segment_num);
+ acquire_modification_lock_wr(STM_SEGMENT->segment_num);
}
/* try to attach to commit log: */
@@ -559,7 +559,7 @@
}
if (is_commit) {
- release_modification_lock(STM_SEGMENT->segment_num);
+ release_modification_lock_wr(STM_SEGMENT->segment_num);
/* XXX: unfortunately, if we failed to attach our CL entry,
we have to re-add the WB_EXECUTED flags before we try to
validate again because of said condition (s.a) */
@@ -596,7 +596,7 @@
list_clear(STM_PSEGMENT->modified_old_objects);
STM_PSEGMENT->last_commit_log_entry = new;
- release_modification_lock(STM_SEGMENT->segment_num);
+ release_modification_lock_wr(STM_SEGMENT->segment_num);
}
}
@@ -692,7 +692,7 @@
increment_total_allocated(slice_sz);
memcpy(bk_slice, realobj + slice_off, slice_sz);
- acquire_modification_lock(STM_SEGMENT->segment_num);
+ acquire_modification_lock_wr(STM_SEGMENT->segment_num);
/* !! follows layout of "struct stm_undo_s" !! */
STM_PSEGMENT->modified_old_objects = list_append3(
STM_PSEGMENT->modified_old_objects,
@@ -700,7 +700,7 @@
(uintptr_t)bk_slice, /* bk_addr */
NEW_SLICE(slice_off, slice_sz));
dprintf(("> append slice %p, off=%lu, sz=%lu\n", bk_slice, slice_off, slice_sz));
- release_modification_lock(STM_SEGMENT->segment_num);
+ release_modification_lock_wr(STM_SEGMENT->segment_num);
slice_off += slice_sz;
}
@@ -896,6 +896,8 @@
static void touch_all_pages_of_obj(object_t *obj, size_t obj_size)
{
+ /* XXX should it be simpler, just really trying to read a dummy
+ byte in each page? */
int my_segnum = STM_SEGMENT->segment_num;
uintptr_t end_page, first_page = ((uintptr_t)obj) / 4096UL;
@@ -1345,7 +1347,7 @@
#pragma push_macro("STM_SEGMENT")
#undef STM_PSEGMENT
#undef STM_SEGMENT
- assert(get_priv_segment(segment_num)->modification_lock);
+ assert(modification_lock_check_wrlock(segment_num));
struct stm_priv_segment_info_s *pseg = get_priv_segment(segment_num);
struct list_s *list = pseg->modified_old_objects;
@@ -1407,9 +1409,9 @@
_reset_object_cards(pseg, item, CARD_CLEAR, false, false);
});
- acquire_modification_lock(segment_num);
+ acquire_modification_lock_wr(segment_num);
reset_modified_from_backup_copies(segment_num);
- release_modification_lock(segment_num);
+ release_modification_lock_wr(segment_num);
_verify_cards_cleared_in_all_lists(pseg);
stm_thread_local_t *tl = pseg->pub.running_thread;
diff --git a/rpython/translator/stm/src_stm/stm/core.h b/rpython/translator/stm/src_stm/stm/core.h
--- a/rpython/translator/stm/src_stm/stm/core.h
+++ b/rpython/translator/stm/src_stm/stm/core.h
@@ -74,11 +74,6 @@
struct stm_priv_segment_info_s {
struct stm_segment_info_s pub;
- /* lock protecting from concurrent modification of
- 'modified_old_objects', page-revision-changes, ...
- Always acquired in global order of segments to avoid deadlocks. */
- uint8_t modification_lock;
-
/* All the old objects (older than the current transaction) that
the current transaction attempts to modify. This is used to
track the STM status: these are old objects that where written
@@ -297,7 +292,7 @@
static void synchronize_objects_flush(void);
static void _signal_handler(int sig, siginfo_t *siginfo, void *context);
-static bool _stm_validate();
+static bool _stm_validate(void);
static inline bool was_read_remote(char *base, object_t *obj)
{
@@ -329,7 +324,7 @@
spinlock_release(get_priv_segment(segnum)->privatization_lock);
}
-static inline bool all_privatization_locks_acquired()
+static inline bool all_privatization_locks_acquired(void)
{
#ifndef NDEBUG
long l;
@@ -343,7 +338,7 @@
#endif
}
-static inline void acquire_all_privatization_locks()
+static inline void acquire_all_privatization_locks(void)
{
/* XXX: don't do for the sharing seg0 */
long l;
@@ -352,60 +347,10 @@
}
}
-static inline void release_all_privatization_locks()
+static inline void release_all_privatization_locks(void)
{
long l;
for (l = NB_SEGMENTS-1; l >= 0; l--) {
release_privatization_lock(l);
}
}
-
-
-
-/* Modification locks are used to prevent copying from a segment
- where either the revision of some pages is inconsistent with the
- rest, or the modified_old_objects list is being modified (bk_copys).
-
- Lock ordering: acquire privatization lock around acquiring a set
- of modification locks!
-*/
-
-static inline void acquire_modification_lock(int segnum)
-{
- spinlock_acquire(get_priv_segment(segnum)->modification_lock);
-}
-
-static inline void release_modification_lock(int segnum)
-{
- spinlock_release(get_priv_segment(segnum)->modification_lock);
-}
-
-static inline void acquire_modification_lock_set(uint64_t seg_set)
-{
- assert(NB_SEGMENTS <= 64);
- OPT_ASSERT(seg_set < (1 << NB_SEGMENTS));
-
- /* acquire locks in global order */
- int i;
- for (i = 0; i < NB_SEGMENTS; i++) {
- if ((seg_set & (1 << i)) == 0)
- continue;
-
- spinlock_acquire(get_priv_segment(i)->modification_lock);
- }
-}
-
-static inline void release_modification_lock_set(uint64_t seg_set)
-{
- assert(NB_SEGMENTS <= 64);
- OPT_ASSERT(seg_set < (1 << NB_SEGMENTS));
-
- int i;
- for (i = 0; i < NB_SEGMENTS; i++) {
- if ((seg_set & (1 << i)) == 0)
- continue;
-
- assert(get_priv_segment(i)->modification_lock);
- spinlock_release(get_priv_segment(i)->modification_lock);
- }
-}
diff --git a/rpython/translator/stm/src_stm/stm/forksupport.c b/rpython/translator/stm/src_stm/stm/forksupport.c
--- a/rpython/translator/stm/src_stm/stm/forksupport.c
+++ b/rpython/translator/stm/src_stm/stm/forksupport.c
@@ -120,6 +120,9 @@
just release these locks early */
s_mutex_unlock();
+ /* Re-init these locks; might be needed after a fork() */
+ setup_modification_locks();
+
/* Unregister all other stm_thread_local_t, mostly as a way to free
the memory used by the shadowstacks
diff --git a/rpython/translator/stm/src_stm/stm/gcpage.c b/rpython/translator/stm/src_stm/stm/gcpage.c
--- a/rpython/translator/stm/src_stm/stm/gcpage.c
+++ b/rpython/translator/stm/src_stm/stm/gcpage.c
@@ -681,7 +681,7 @@
_stm_smallmalloc_sweep();
}
-static void clean_up_commit_log_entries()
+static void clean_up_commit_log_entries(void)
{
struct stm_commit_log_entry_s *cl, *next;
diff --git a/rpython/translator/stm/src_stm/stm/locks.h b/rpython/translator/stm/src_stm/stm/locks.h
new file mode 100644
--- /dev/null
+++ b/rpython/translator/stm/src_stm/stm/locks.h
@@ -0,0 +1,124 @@
+/* Imported by rpython/translator/stm/import_stmgc.py */
+/* Modification locks protect from concurrent modification of
+ 'modified_old_objects', page-revision-changes, ...
+
+ Modification locks are used to prevent copying from a segment
+ where either the revision of some pages is inconsistent with the
+ rest, or the modified_old_objects list is being modified (bk_copys).
+
+ Lock ordering: acquire privatization lock around acquiring a set
+ of modification locks!
+*/
+
+typedef struct {
+ pthread_rwlock_t lock;
+#ifndef NDEBUG
+ volatile bool write_locked;
+#endif
+} modification_lock_t __attribute__((aligned(64)));
+
+static modification_lock_t _modlocks[NB_SEGMENTS - 1];
+
+
+static void setup_modification_locks(void)
+{
+ int i;
+ for (i = 1; i < NB_SEGMENTS; i++) {
+ if (pthread_rwlock_init(&_modlocks[i - 1].lock, NULL) != 0)
+ stm_fatalerror("pthread_rwlock_init: %m");
+ }
+}
+
+static void teardown_modification_locks(void)
+{
+ int i;
+ for (i = 1; i < NB_SEGMENTS; i++)
+ pthread_rwlock_destroy(&_modlocks[i - 1].lock);
+ memset(_modlocks, 0, sizeof(_modlocks));
+}
+
+
+static inline void acquire_modification_lock_wr(int segnum)
+{
+ if (UNLIKELY(pthread_rwlock_wrlock(&_modlocks[segnum - 1].lock) != 0))
+ stm_fatalerror("pthread_rwlock_wrlock: %m");
+#ifndef NDEBUG
+ assert(!_modlocks[segnum - 1].write_locked);
+ _modlocks[segnum - 1].write_locked = true;
+#endif
+}
+
+static inline void release_modification_lock_wr(int segnum)
+{
+#ifndef NDEBUG
+ assert(_modlocks[segnum - 1].write_locked);
+ _modlocks[segnum - 1].write_locked = false;
+#endif
+ if (UNLIKELY(pthread_rwlock_unlock(&_modlocks[segnum - 1].lock) != 0))
+ stm_fatalerror("pthread_rwlock_unlock(wr): %m");
+}
+
+static void acquire_modification_lock_set(uint64_t readset, int write)
+{
+ /* acquire the modification lock in 'read' mode for all segments
+ in 'readset', plus the modification lock in 'write' mode for
+ the segment number 'write'.
+ */
+ assert(NB_SEGMENTS <= 64);
+ OPT_ASSERT(readset < (1 << NB_SEGMENTS));
+ assert((readset & 1) == 0); /* segment numbers normally start at 1 */
+ assert(0 <= write && write < NB_SEGMENTS); /* use 0 to mean "nobody" */
+
+ /* acquire locks in global order */
+ readset |= (1UL << write);
+ int i;
+ for (i = 1; i < NB_SEGMENTS; i++) {
+ if ((readset & (1UL << i)) == 0)
+ continue;
+ if (i == write) {
+ acquire_modification_lock_wr(write);
+ }
+ else {
+ if (UNLIKELY(pthread_rwlock_rdlock(&_modlocks[i - 1].lock) != 0))
+ stm_fatalerror("pthread_rwlock_rdlock: %m");
+ }
+ }
+}
+
+static void release_modification_lock_set(uint64_t readset, int write)
+{
+ assert(NB_SEGMENTS <= 64);
+ OPT_ASSERT(readset < (1 << NB_SEGMENTS));
+
+ /* release lock order does not matter; prefer early release of
+ the write lock */
+ if (write > 0) {
+ release_modification_lock_wr(write);
+ readset &= ~(1UL << write);
+ }
+ int i;
+ for (i = 1; i < NB_SEGMENTS; i++) {
+ if ((readset & (1UL << i)) == 0)
+ continue;
+ if (UNLIKELY(pthread_rwlock_unlock(&_modlocks[i - 1].lock) != 0))
+ stm_fatalerror("pthread_rwlock_unlock(rd): %m");
+ }
+}
+
+#ifndef NDEBUG
+static bool modification_lock_check_rdlock(int segnum)
+{
+ assert(segnum > 0);
+ if (_modlocks[segnum - 1].write_locked)
+ return false;
+ if (pthread_rwlock_trywrlock(&_modlocks[segnum - 1].lock) == 0) {
+ pthread_rwlock_unlock(&_modlocks[segnum - 1].lock);
+ return false;
+ }
+ return true;
+}
+static bool modification_lock_check_wrlock(int segnum)
+{
+ return segnum == 0 || _modlocks[segnum - 1].write_locked;
+}
+#endif
diff --git a/rpython/translator/stm/src_stm/stm/misc.c b/rpython/translator/stm/src_stm/stm/misc.c
--- a/rpython/translator/stm/src_stm/stm/misc.c
+++ b/rpython/translator/stm/src_stm/stm/misc.c
@@ -44,7 +44,7 @@
return obj->stm_flags & _STM_GCFLAG_CARDS_SET;
}
-long _stm_count_cl_entries()
+long _stm_count_cl_entries(void)
{
struct stm_commit_log_entry_s *cl = &commit_log_root;
@@ -115,7 +115,7 @@
return cards[get_index_to_card_index(idx)].rm;
}
-uint8_t _stm_get_transaction_read_version()
+uint8_t _stm_get_transaction_read_version(void)
{
return STM_SEGMENT->transaction_read_version;
}
@@ -124,7 +124,7 @@
static struct stm_commit_log_entry_s *_last_cl_entry;
static long _last_cl_entry_index;
-void _stm_start_enum_last_cl_entry()
+void _stm_start_enum_last_cl_entry(void)
{
_last_cl_entry = &commit_log_root;
struct stm_commit_log_entry_s *cl = &commit_log_root;
@@ -135,7 +135,7 @@
_last_cl_entry_index = 0;
}
-object_t *_stm_next_last_cl_entry()
+object_t *_stm_next_last_cl_entry(void)
{
if (_last_cl_entry == &commit_log_root)
return NULL;
@@ -150,7 +150,7 @@
}
-void _stm_smallmalloc_sweep_test()
+void _stm_smallmalloc_sweep_test(void)
{
acquire_all_privatization_locks();
_stm_smallmalloc_sweep();
diff --git a/rpython/translator/stm/src_stm/stm/setup.c b/rpython/translator/stm/src_stm/stm/setup.c
--- a/rpython/translator/stm/src_stm/stm/setup.c
+++ b/rpython/translator/stm/src_stm/stm/setup.c
@@ -127,6 +127,7 @@
private range of addresses.
*/
+ setup_modification_locks();
setup_sync();
setup_nursery();
setup_gcpage();
@@ -174,6 +175,7 @@
teardown_gcpage();
teardown_smallmalloc();
teardown_pages();
+ teardown_modification_locks();
}
static void _shadowstack_trap_page(char *start, int prot)
diff --git a/rpython/translator/stm/src_stm/stmgc.c b/rpython/translator/stm/src_stm/stmgc.c
--- a/rpython/translator/stm/src_stm/stmgc.c
+++ b/rpython/translator/stm/src_stm/stmgc.c
@@ -18,6 +18,7 @@
#include "stm/marker.h"
#include "stm/rewind_setjmp.h"
#include "stm/finalizer.h"
+#include "stm/locks.h"
#include "stm/misc.c"
#include "stm/list.c"
#include "stm/smallmalloc.c"
diff --git a/rpython/translator/stm/src_stm/stmgc.h b/rpython/translator/stm/src_stm/stmgc.h
--- a/rpython/translator/stm/src_stm/stmgc.h
+++ b/rpython/translator/stm/src_stm/stmgc.h
@@ -57,13 +57,16 @@
typedef struct stm_thread_local_s {
/* rewind_setjmp's interface */
rewind_jmp_thread rjthread;
+ /* every thread should handle the shadow stack itself */
struct stm_shadowentry_s *shadowstack, *shadowstack_base;
-
/* a generic optional thread-local object */
object_t *thread_local_obj;
-
+ /* in case this thread runs a transaction that aborts,
+ the following raw region of memory is cleared. */
char *mem_clear_on_abort;
size_t mem_bytes_to_clear_on_abort;
+ /* after an abort, some details about the abort are stored there.
+ (this field is not modified on a successful commit) */
long last_abort__bytes_in_nursery;
/* the next fields are handled internally by the library */
int associated_segment_num;
@@ -73,34 +76,22 @@
void *creating_pthread[2];
} stm_thread_local_t;
-#ifndef _STM_NURSERY_ZEROED
-#define _STM_NURSERY_ZEROED 0
-#endif
-#define _STM_GCFLAG_WRITE_BARRIER 0x01
-#define _STM_FAST_ALLOC (66*1024)
-#define _STM_NSE_SIGNAL_ABORT 1
-#define _STM_NSE_SIGNAL_MAX 2
-
-#define _STM_CARD_MARKED 1 /* should always be 1... */
-#define _STM_GCFLAG_CARDS_SET 0x8
-#define _STM_CARD_BITS 5 /* must be 5/6/7 for the pypy jit */
-#define _STM_CARD_SIZE (1 << _STM_CARD_BITS)
-#define _STM_MIN_CARD_COUNT 17
-#define _STM_MIN_CARD_OBJ_SIZE (_STM_CARD_SIZE * _STM_MIN_CARD_COUNT)
-
+/* this should use llvm's coldcc calling convention,
+ but it's not exposed to C code so far */
void _stm_write_slowpath(object_t *);
void _stm_write_slowpath_card(object_t *, uintptr_t);
object_t *_stm_allocate_slowpath(ssize_t);
object_t *_stm_allocate_external(ssize_t);
void _stm_become_inevitable(const char*);
-void _stm_collectable_safe_point();
+void _stm_collectable_safe_point(void);
+/* for tests, but also used in duhton: */
object_t *_stm_allocate_old(ssize_t size_rounded_up);
char *_stm_real_address(object_t *o);
#ifdef STM_TESTS
#include <stdbool.h>
-uint8_t _stm_get_transaction_read_version();
+uint8_t _stm_get_transaction_read_version(void);
uint8_t _stm_get_card_value(object_t *obj, long idx);
bool _stm_was_read(object_t *obj);
bool _stm_was_written(object_t *obj);
@@ -137,14 +128,32 @@
long _stm_count_objects_pointing_to_nursery(void);
object_t *_stm_enum_modified_old_objects(long index);
object_t *_stm_enum_objects_pointing_to_nursery(long index);
-object_t *_stm_next_last_cl_entry();
-void _stm_start_enum_last_cl_entry();
-long _stm_count_cl_entries();
+object_t *_stm_next_last_cl_entry(void);
+void _stm_start_enum_last_cl_entry(void);
+long _stm_count_cl_entries(void);
long _stm_count_old_objects_with_cards_set(void);
object_t *_stm_enum_old_objects_with_cards_set(long index);
uint64_t _stm_total_allocated(void);
#endif
+
+#ifndef _STM_NURSERY_ZEROED
+#define _STM_NURSERY_ZEROED 0
+#endif
+
+#define _STM_GCFLAG_WRITE_BARRIER 0x01
+#define _STM_FAST_ALLOC (66*1024)
+#define _STM_NSE_SIGNAL_ABORT 1
+#define _STM_NSE_SIGNAL_MAX 2
+
+#define _STM_CARD_MARKED 1 /* should always be 1... */
+#define _STM_GCFLAG_CARDS_SET 0x8
+#define _STM_CARD_BITS 5 /* must be 5/6/7 for the pypy jit */
+#define _STM_CARD_SIZE (1 << _STM_CARD_BITS)
+#define _STM_MIN_CARD_COUNT 17
+#define _STM_MIN_CARD_OBJ_SIZE (_STM_CARD_SIZE * _STM_MIN_CARD_COUNT)
+
+
/* ==================== HELPERS ==================== */
#ifdef NDEBUG
#define OPT_ASSERT(cond) do { if (!(cond)) __builtin_unreachable(); } while (0)
@@ -165,30 +174,32 @@
*/
#define STM_NB_SEGMENTS 4
+/* Structure of objects
+ --------------------
+ Objects manipulated by the user program, and managed by this library,
+ must start with a "struct object_s" field. Pointers to any user object
+ must use the "TLPREFIX struct foo *" type --- don't forget TLPREFIX.
+ The best is to use typedefs like above.
+
+ The object_s part contains some fields reserved for the STM library.
+ Right now this is only four bytes.
+*/
struct object_s {
uint32_t stm_flags; /* reserved for the STM library */
};
-extern ssize_t stmcb_size_rounded_up(struct object_s *);
-void stmcb_trace(struct object_s *obj, void visit(object_t **));
-/* a special trace-callback that is only called for the marked
- ranges of indices (using stm_write_card(o, index)) */
-extern void stmcb_trace_cards(struct object_s *, void (object_t **),
- uintptr_t start, uintptr_t stop);
-/* this function will be called on objects that support cards.
- It returns the base_offset (in bytes) inside the object from
- where the indices start, and item_size (in bytes) for the size of
- one item */
-extern void stmcb_get_card_base_itemsize(struct object_s *,
- uintptr_t offset_itemsize[2]);
-/* returns whether this object supports cards. we will only call
- stmcb_get_card_base_itemsize on objs that do so. */
-extern long stmcb_obj_supports_cards(struct object_s *);
-
-
-
+/* The read barrier must be called whenever the object 'obj' is read.
+ It is not required to call it before reading: it can be delayed for a
+ bit, but we must still be in the same "scope": no allocation, no
+ transaction commit, nothing that can potentially collect or do a safe
+ point (like stm_write() on a different object). Also, if we might
+ have finished the transaction and started the next one, then
+ stm_read() needs to be called again. It can be omitted if
+ stm_write() is called, or immediately after getting the object from
+ stm_allocate(), as long as the rules above are respected.
+*/
__attribute__((always_inline))
static inline void stm_read(object_t *obj)
{
@@ -199,6 +210,11 @@
#define _STM_WRITE_CHECK_SLOWPATH(obj) \
UNLIKELY(((obj)->stm_flags & _STM_GCFLAG_WRITE_BARRIER) != 0)
+/* The write barrier must be called *before* doing any change to the
+ object 'obj'. If we might have finished the transaction and started
+ the next one, then stm_write() needs to be called again. It is not
+ necessary to call it immediately after stm_allocate().
+*/
__attribute__((always_inline))
static inline void stm_write(object_t *obj)
{
@@ -206,7 +222,14 @@
_stm_write_slowpath(obj);
}
-
+/* The following is a GC-optimized barrier that works on the granularity
+ of CARD_SIZE. It can be used on any array object, but it is only
+ useful with those that were internally marked with GCFLAG_HAS_CARDS.
+ It has the same purpose as stm_write() for TM and allows write-access
+ to a part of an object/array.
+ 'index' is the array-item-based position within the object, which
+ is measured in units returned by stmcb_get_card_base_itemsize().
+*/
__attribute__((always_inline))
static inline void stm_write_card(object_t *obj, uintptr_t index)
{
@@ -245,7 +268,34 @@
}
}
+/* Must be provided by the user of this library.
+ The "size rounded up" must be a multiple of 8 and at least 16.
+ "Tracing" an object means enumerating all GC references in it,
+ by invoking the callback passed as argument.
+*/
+extern ssize_t stmcb_size_rounded_up(struct object_s *);
+void stmcb_trace(struct object_s *obj, void visit(object_t **));
+/* a special trace-callback that is only called for the marked
+ ranges of indices (using stm_write_card(o, index)) */
+extern void stmcb_trace_cards(struct object_s *, void (object_t **),
+ uintptr_t start, uintptr_t stop);
+/* this function will be called on objects that support cards.
+ It returns the base_offset (in bytes) inside the object from
+ where the indices start, and item_size (in bytes) for the size of
+ one item */
+extern void stmcb_get_card_base_itemsize(struct object_s *,
+ uintptr_t offset_itemsize[2]);
+/* returns whether this object supports cards. we will only call
+ stmcb_get_card_base_itemsize on objs that do so. */
+extern long stmcb_obj_supports_cards(struct object_s *);
+
+
+
+/* Allocate an object of the given size, which must be a multiple
+ of 8 and at least 16. In the fast-path, this is inlined to just
+ a few assembler instructions.
+*/
__attribute__((always_inline))
static inline object_t *stm_allocate(ssize_t size_rounded_up)
{
@@ -267,21 +317,48 @@
return (object_t *)p;
}
-
+/* Allocate a weakref object. Weakref objects have a
+ reference to an object at the byte-offset
+ stmcb_size_rounded_up(obj) - sizeof(void*)
+ You must assign the reference before the next collection may happen.
+ After that, you must not mutate the reference anymore. However,
+ it can become NULL after any GC if the reference dies during that
+ collection.
+ NOTE: For performance, we assume stmcb_size_rounded_up(weakref)==16
+*/
object_t *stm_allocate_weakref(ssize_t size_rounded_up);
+/* stm_setup() needs to be called once at the beginning of the program.
+ stm_teardown() can be called at the end, but that's not necessary
+ and rather meant for tests.
+ */
void stm_setup(void);
void stm_teardown(void);
+/* The size of each shadow stack, in number of entries.
+ Must be big enough to accomodate all STM_PUSH_ROOTs! */
#define STM_SHADOW_STACK_DEPTH 163840
+
+/* Push and pop roots from/to the shadow stack. Only allowed inside
+ transaction. */
#define STM_PUSH_ROOT(tl, p) ((tl).shadowstack++->ss = (object_t *)(p))
#define STM_POP_ROOT(tl, p) ((p) = (typeof(p))((--(tl).shadowstack)->ss))
#define STM_POP_ROOT_RET(tl) ((--(tl).shadowstack)->ss)
+/* Every thread needs to have a corresponding stm_thread_local_t
+ structure. It may be a "__thread" global variable or something else.
+ Use the following functions at the start and at the end of a thread.
+ The user of this library needs to maintain the two shadowstack fields;
+ at any call to stm_allocate(), these fields should point to a range
+ of memory that can be walked in order to find the stack roots.
+*/
void stm_register_thread_local(stm_thread_local_t *tl);
void stm_unregister_thread_local(stm_thread_local_t *tl);
+/* At some key places, like the entry point of the thread and in the
+ function with the interpreter's dispatch loop, you need to declare
+ a local variable of type 'rewind_jmp_buf' and call these macros. */
#define stm_rewind_jmp_enterprepframe(tl, rjbuf) \
rewind_jmp_enterprepframe(&(tl)->rjthread, rjbuf, (tl)->shadowstack)
#define stm_rewind_jmp_enterframe(tl, rjbuf) \
@@ -303,37 +380,23 @@
rewind_jmp_enum_shadowstack(&(tl)->rjthread, callback)
+/* Starting and ending transactions. stm_read(), stm_write() and
+ stm_allocate() should only be called from within a transaction.
+ The stm_start_transaction() call returns the number of times it
+ returned, starting at 0. If it is > 0, then the transaction was
+ aborted and restarted this number of times. */
long stm_start_transaction(stm_thread_local_t *tl);
void stm_start_inevitable_transaction(stm_thread_local_t *tl);
-
void stm_commit_transaction(void);
/* Temporary fix? Call this outside a transaction. If there is an
inevitable transaction running somewhere else, wait until it finishes. */
void stm_wait_for_current_inevitable_transaction(void);
+/* Abort the currently running transaction. This function never
+ returns: it jumps back to the stm_start_transaction(). */
void stm_abort_transaction(void) __attribute__((noreturn));
-void stm_collect(long level);
-
-long stm_identityhash(object_t *obj);
-long stm_id(object_t *obj);
-void stm_set_prebuilt_identityhash(object_t *obj, long hash);
-
-long stm_can_move(object_t *obj);
-
-object_t *stm_setup_prebuilt(object_t *);
-object_t *stm_setup_prebuilt_weakref(object_t *);
-
-long stm_call_on_abort(stm_thread_local_t *, void *key, void callback(void *));
-long stm_call_on_commit(stm_thread_local_t *, void *key, void callback(void *));
-
-static inline void stm_safe_point(void) {
- if (STM_SEGMENT->nursery_end <= _STM_NSE_SIGNAL_MAX)
- _stm_collectable_safe_point();
-}
-
-
#ifdef STM_NO_AUTOMATIC_SETJMP
int stm_is_inevitable(void);
#else
@@ -341,6 +404,10 @@
return !rewind_jmp_armed(&STM_SEGMENT->running_thread->rjthread);
}
#endif
+
+/* Turn the current transaction inevitable.
+ stm_become_inevitable() itself may still abort the transaction instead
+ of returning. */
static inline void stm_become_inevitable(stm_thread_local_t *tl,
const char* msg) {
assert(STM_SEGMENT->running_thread == tl);
@@ -348,7 +415,64 @@
_stm_become_inevitable(msg);
}
+/* Forces a safe-point if needed. Normally not needed: this is
+ automatic if you call stm_allocate(). */
+static inline void stm_safe_point(void) {
+ if (STM_SEGMENT->nursery_end <= _STM_NSE_SIGNAL_MAX)
+ _stm_collectable_safe_point();
+}
+
+/* Forces a collection. */
+void stm_collect(long level);
+
+
+/* Prepare an immortal "prebuilt" object managed by the GC. Takes a
+ pointer to an 'object_t', which should not actually be a GC-managed
+ structure but a real static structure. Returns the equivalent
+ GC-managed pointer. Works by copying it into the GC pages, following
+ and fixing all pointers it contains, by doing stm_setup_prebuilt() on
+ each of them recursively. (Note that this will leave garbage in the
+ static structure, but it should never be used anyway.) */
+object_t *stm_setup_prebuilt(object_t *);
+/* The same, if the prebuilt object is actually a weakref. */
+object_t *stm_setup_prebuilt_weakref(object_t *);
+
+/* Hash, id. The id is just the address of the object (of the address
+ where it *will* be after the next minor collection). The hash is the
+ same, mangled -- except on prebuilt objects, where it can be
+ controlled for each prebuilt object individually. (Useful uor PyPy) */
+long stm_identityhash(object_t *obj);
+long stm_id(object_t *obj);
+void stm_set_prebuilt_identityhash(object_t *obj, long hash);
+
+/* Returns 1 if the object can still move (it's in the nursery), or 0
+ otherwise. After a minor collection no object can move any more. */
+long stm_can_move(object_t *obj);
+
+/* If the current transaction aborts later, invoke 'callback(key)'. If
+ the current transaction commits, then the callback is forgotten. You
+ can only register one callback per key. You can call
+ 'stm_call_on_abort(key, NULL)' to cancel an existing callback
+ (returns 0 if there was no existing callback to cancel).
+ Note: 'key' must be aligned to a multiple of 8 bytes. */
+long stm_call_on_abort(stm_thread_local_t *, void *key, void callback(void *));
+/* If the current transaction commits later, invoke 'callback(key)'. If
+ the current transaction aborts, then the callback is forgotten. Same
+ restrictions as stm_call_on_abort(). If the transaction is or becomes
+ inevitable, 'callback(key)' is called immediately. */
+long stm_call_on_commit(stm_thread_local_t *, void *key, void callback(void *));
+
+
+/* Similar to stm_become_inevitable(), but additionally suspend all
+ other threads. A very heavy-handed way to make sure that no other
+ transaction is running concurrently. Avoid as much as possible.
+ Other transactions will continue running only after this transaction
+ commits. (xxx deprecated and may be removed) */
void stm_become_globally_unique_transaction(stm_thread_local_t *tl, const char *msg);
+
+/* Moves the transaction forward in time by validating the read and
+ write set with all commits that happened since the last validation
+ (explicit or implicit). */
void stm_validate(void);
/* Temporarily stop all the other threads, by waiting until they
@@ -407,8 +531,8 @@
/* The markers pushed in the shadowstack are an odd number followed by a
regular object pointer. */
typedef struct {
- uintptr_t odd_number;
- object_t *object;
+ uintptr_t odd_number; /* marker odd number, or 0 if marker is missing */
+ object_t *object; /* marker object, or NULL if marker is missing */
} stm_loc_marker_t;
extern void (*stmcb_timing_event)(stm_thread_local_t *tl, /* the local thread */
enum stm_event_e event,
More information about the pypy-commit
mailing list