[pypy-commit] pypy stmgc-c7: import stmgc/a158a889e78b and remove the spinlock_acquire()
arigo
noreply at buildbot.pypy.org
Sun Apr 13 18:41:58 CEST 2014
Author: Armin Rigo <arigo at tunes.org>
Branch: stmgc-c7
Changeset: r70614:06e3816b1e7b
Date: 2014-04-13 18:41 +0200
http://bitbucket.org/pypy/pypy/changeset/06e3816b1e7b/
Log: import stmgc/a158a889e78b and remove the spinlock_acquire() function
from stmgcintf.h
diff --git a/rpython/translator/c/src/mem.c b/rpython/translator/c/src/mem.c
--- a/rpython/translator/c/src/mem.c
+++ b/rpython/translator/c/src/mem.c
@@ -48,7 +48,7 @@
// spinlock_acquire/spinlock_release defined in ../../stm/src_stm/stmgcintf.h
static Signed pypy_debug_alloc_lock = 0;
#else
-# define spinlock_acquire(lock, targetvalue) /* nothing */
+# define spinlock_acquire(lock) /* nothing */
# define spinlock_release(lock) /* nothing */
#endif
@@ -58,7 +58,7 @@
RPyAssert(p, "out of memory");
p->addr = addr;
p->funcname = funcname;
- spinlock_acquire(pypy_debug_alloc_lock, '+');
+ spinlock_acquire(pypy_debug_alloc_lock);
p->next = pypy_debug_alloc_list;
pypy_debug_alloc_list = p;
spinlock_release(pypy_debug_alloc_lock);
@@ -67,7 +67,7 @@
int try_pypy_debug_alloc_stop(void *addr)
{
struct pypy_debug_alloc_s **p;
- spinlock_acquire(pypy_debug_alloc_lock, '-');
+ spinlock_acquire(pypy_debug_alloc_lock);
for (p = &pypy_debug_alloc_list; *p; p = &((*p)->next))
if ((*p)->addr == addr)
{
@@ -92,7 +92,7 @@
{
long count = 0;
struct pypy_debug_alloc_s *p;
- spinlock_acquire(pypy_debug_alloc_lock, 'R');
+ spinlock_acquire(pypy_debug_alloc_lock);
for (p = pypy_debug_alloc_list; p; p = p->next)
count++;
if (count > 0)
diff --git a/rpython/translator/stm/src_stm/revision b/rpython/translator/stm/src_stm/revision
--- a/rpython/translator/stm/src_stm/revision
+++ b/rpython/translator/stm/src_stm/revision
@@ -1,1 +1,1 @@
-0492e398156b
+a158a889e78b
diff --git a/rpython/translator/stm/src_stm/stm/atomic.h b/rpython/translator/stm/src_stm/stm/atomic.h
--- a/rpython/translator/stm/src_stm/stm/atomic.h
+++ b/rpython/translator/stm/src_stm/stm/atomic.h
@@ -37,4 +37,12 @@
#endif
+#define spinlock_acquire(lock) \
+ do { if (LIKELY(__sync_lock_test_and_set(&(lock), 1) == 0)) break; \
+ spin_loop(); } while (1)
+#define spinlock_release(lock) \
+ do { assert((lock) == 1); \
+ __sync_lock_release(&(lock)); } while (0)
+
+
#endif /* _STM_ATOMIC_H */
diff --git a/rpython/translator/stm/src_stm/stm/core.c b/rpython/translator/stm/src_stm/stm/core.c
--- a/rpython/translator/stm/src_stm/stm/core.c
+++ b/rpython/translator/stm/src_stm/stm/core.c
@@ -9,6 +9,23 @@
memset(write_locks, 0, sizeof(write_locks));
}
+#ifdef NDEBUG
+#define EVENTUALLY(condition) /* nothing */
+#else
+#define EVENTUALLY(condition) \
+ { \
+ if (!(condition)) { \
+ int _i; \
+ for (_i = 1; _i <= NB_SEGMENTS; _i++) \
+ spinlock_acquire(lock_pages_privatizing[_i]); \
+ if (!(condition)) \
+ stm_fatalerror("fails: " #condition); \
+ for (_i = 1; _i <= NB_SEGMENTS; _i++) \
+ spinlock_release(lock_pages_privatizing[_i]); \
+ } \
+ }
+#endif
+
static void check_flag_write_barrier(object_t *obj)
{
/* check that all copies of the object, apart from mine, have the
@@ -22,12 +39,7 @@
if (i == STM_SEGMENT->segment_num)
continue;
o1 = (struct object_s *)REAL_ADDRESS(get_segment_base(i), obj);
- if (!(o1->stm_flags & GCFLAG_WRITE_BARRIER)) {
- mutex_pages_lock(); /* try again... */
- if (!(o1->stm_flags & GCFLAG_WRITE_BARRIER))
- stm_fatalerror("missing GCFLAG_WRITE_BARRIER");
- mutex_pages_unlock();
- }
+ EVENTUALLY(o1->stm_flags & GCFLAG_WRITE_BARRIER);
}
#endif
}
@@ -272,7 +284,6 @@
with synchronize_object_now() but I don't completely see how to
improve...
*/
- assert(_has_mutex_pages());
assert(!_is_young(obj));
char *segment_base = get_segment_base(source_segment_num);
@@ -327,10 +338,7 @@
/* Copy around the version of 'obj' that lives in our own segment.
It is first copied into the shared pages, and then into other
segments' own private pages.
-
- This must be called with the mutex_pages_lock!
*/
- assert(_has_mutex_pages());
assert(!_is_young(obj));
assert(obj->stm_flags & GCFLAG_WRITE_BARRIER);
@@ -374,9 +382,26 @@
memcpy(dst, src, copy_size);
}
else {
- assert(memcmp(dst, src, copy_size) == 0); /* same page */
+ EVENTUALLY(memcmp(dst, src, copy_size) == 0); /* same page */
}
+ /* Do a full memory barrier. We must make sure that other
+ CPUs see the changes we did to the shared page ("S",
+ above) before we check the other segments below with
+ is_private_page(). Otherwise, we risk the following:
+ this CPU writes "S" but the writes are not visible yet;
+ then it checks is_private_page() and gets false, and does
+ nothing more; just afterwards another CPU sets its own
+ private_page bit and copies the page; but it risks doing
+ so before seeing the "S" writes.
+
+ XXX what is the cost of this? If it's high, then we
+ should reorganize the code so that we buffer the second
+ parts and do them by bunch of N, after just one call to
+ __sync_synchronize()...
+ */
+ __sync_synchronize();
+
for (i = 1; i <= NB_SEGMENTS; i++) {
if (i == myself)
continue;
@@ -393,7 +418,7 @@
memcpy(dst, src, copy_size);
}
else {
- assert(memcmp(dst, src, copy_size) == 0); /* same page */
+ EVENTUALLY(!memcmp(dst, src, copy_size)); /* same page */
}
}
@@ -486,12 +511,10 @@
major_collection_now_at_safe_point();
/* synchronize overflow objects living in privatized pages */
- mutex_pages_lock();
push_overflow_objects_from_privatized_pages();
/* synchronize modified old objects to other threads */
push_modified_to_other_segments();
- mutex_pages_unlock();
/* update 'overflow_number' if needed */
if (STM_PSEGMENT->overflow_number_has_been_used) {
diff --git a/rpython/translator/stm/src_stm/stm/forksupport.c b/rpython/translator/stm/src_stm/stm/forksupport.c
--- a/rpython/translator/stm/src_stm/stm/forksupport.c
+++ b/rpython/translator/stm/src_stm/stm/forksupport.c
@@ -71,7 +71,6 @@
s_mutex_lock();
synchronize_all_threads(STOP_OTHERS_UNTIL_MUTEX_UNLOCK);
- mutex_pages_lock();
/* Make a new mmap at some other address, but of the same size as
the standard mmap at stm_object_pages
@@ -167,7 +166,6 @@
fork_big_copy = NULL;
bool was_in_transaction = fork_was_in_transaction;
- mutex_pages_unlock();
s_mutex_unlock();
if (!was_in_transaction) {
@@ -204,7 +202,6 @@
/* this new process contains no other thread, so we can
just release these locks early */
- mutex_pages_unlock();
s_mutex_unlock();
/* Move the copy of the mmap over the old one, overwriting it
diff --git a/rpython/translator/stm/src_stm/stm/gcpage.c b/rpython/translator/stm/src_stm/stm/gcpage.c
--- a/rpython/translator/stm/src_stm/stm/gcpage.c
+++ b/rpython/translator/stm/src_stm/stm/gcpage.c
@@ -39,6 +39,7 @@
static void grab_more_free_pages_for_small_allocations(void)
{
+ abort();//XXX
/* grab N (= GCPAGE_NUM_PAGES) pages out of the top addresses */
uintptr_t decrease_by = GCPAGE_NUM_PAGES * 4096;
if (uninitialized_page_stop - uninitialized_page_start <= decrease_by)
@@ -76,17 +77,22 @@
}
+static int lock_growth_large = 0;
+
static char *allocate_outside_nursery_large(uint64_t size)
{
- /* thread-safe: use the lock of pages.c to prevent any remapping
- from occurring under our feet */
- mutex_pages_lock();
-
/* Allocate the object with largemalloc.c from the lower addresses. */
char *addr = _stm_large_malloc(size);
if (addr == NULL)
stm_fatalerror("not enough memory!");
+ if (LIKELY(addr + size <= uninitialized_page_start)) {
+ return addr;
+ }
+
+ /* uncommon case: need to initialize some more pages */
+ spinlock_acquire(lock_growth_large);
+
if (addr + size > uninitialized_page_start) {
uintptr_t npages;
npages = (addr + size - uninitialized_page_start) / 4096UL;
@@ -96,11 +102,10 @@
stm_fatalerror("out of memory!"); /* XXX */
}
setup_N_pages(uninitialized_page_start, npages);
+ __sync_synchronize();
uninitialized_page_start += npages * 4096UL;
}
-
- mutex_pages_unlock();
-
+ spinlock_release(lock_growth_large);
return addr;
}
@@ -256,7 +261,6 @@
total_allocated by 4096. */
long i;
- mutex_pages_lock();
for (i = 1; i <= NB_SEGMENTS; i++) {
/* The 'modified_old_objects' list gives the list of objects
@@ -306,7 +310,6 @@
for (i = 1; i <= NB_SEGMENTS; i++) {
major_restore_private_bits_for_modified_objects(i);
}
- mutex_pages_unlock();
}
@@ -465,9 +468,7 @@
static void sweep_large_objects(void)
{
- mutex_pages_lock();
_stm_largemalloc_sweep();
- mutex_pages_unlock();
}
static void clean_write_locks(void)
diff --git a/rpython/translator/stm/src_stm/stm/largemalloc.c b/rpython/translator/stm/src_stm/stm/largemalloc.c
--- a/rpython/translator/stm/src_stm/stm/largemalloc.c
+++ b/rpython/translator/stm/src_stm/stm/largemalloc.c
@@ -107,20 +107,35 @@
*/
-static dlist_t largebins[N_BINS];
-static mchunk_t *first_chunk, *last_chunk;
+
+static struct {
+ int lock;
+ mchunk_t *first_chunk, *last_chunk;
+ dlist_t largebins[N_BINS];
+} lm __attribute__((aligned(64)));
+
+
+static void lm_lock(void)
+{
+ spinlock_acquire(lm.lock);
+}
+
+static void lm_unlock(void)
+{
+ spinlock_release(lm.lock);
+}
static void insert_unsorted(mchunk_t *new)
{
size_t index = LAST_BIN_INDEX(new->size) ? N_BINS - 1
: largebin_index(new->size);
- new->d.next = &largebins[index];
- new->d.prev = largebins[index].prev;
+ new->d.next = &lm.largebins[index];
+ new->d.prev = lm.largebins[index].prev;
new->d.prev->next = &new->d;
new->u.up = UU_UNSORTED;
new->u.down = NULL;
- largebins[index].prev = &new->d;
+ lm.largebins[index].prev = &new->d;
}
static int compare_chunks(const void *vchunk1, const void *vchunk2)
@@ -140,8 +155,8 @@
static void really_sort_bin(size_t index)
{
- dlist_t *unsorted = largebins[index].prev;
- dlist_t *end = &largebins[index];
+ dlist_t *unsorted = lm.largebins[index].prev;
+ dlist_t *end = &lm.largebins[index];
dlist_t *scan = unsorted->prev;
size_t count = 1;
while (scan != end && data2chunk(scan)->u.up == UU_UNSORTED) {
@@ -177,7 +192,7 @@
chunk1 = chunks[--count];
}
size_t search_size = chunk1->size;
- dlist_t *head = largebins[index].next;
+ dlist_t *head = lm.largebins[index].next;
while (1) {
if (head == end || data2chunk(head)->size < search_size) {
@@ -219,8 +234,8 @@
static void sort_bin(size_t index)
{
- dlist_t *last = largebins[index].prev;
- if (last != &largebins[index] && data2chunk(last)->u.up == UU_UNSORTED)
+ dlist_t *last = lm.largebins[index].prev;
+ if (last != &lm.largebins[index] && data2chunk(last)->u.up == UU_UNSORTED)
really_sort_bin(index);
}
@@ -263,13 +278,15 @@
if (request_size < MIN_ALLOC_SIZE)
request_size = MIN_ALLOC_SIZE;
+ lm_lock();
+
size_t index = largebin_index(request_size);
sort_bin(index);
/* scan through the chunks of current bin in reverse order
to find the smallest that fits. */
- dlist_t *scan = largebins[index].prev;
- dlist_t *end = &largebins[index];
+ dlist_t *scan = lm.largebins[index].prev;
+ dlist_t *end = &lm.largebins[index];
mchunk_t *mscan;
while (scan != end) {
mscan = data2chunk(scan);
@@ -287,16 +304,17 @@
smallest item of the first non-empty bin, as it will be large
enough. */
while (++index < N_BINS) {
- if (largebins[index].prev != &largebins[index]) {
+ if (lm.largebins[index].prev != &lm.largebins[index]) {
/* non-empty bin. */
sort_bin(index);
- scan = largebins[index].prev;
+ scan = lm.largebins[index].prev;
mscan = data2chunk(scan);
goto found;
}
}
/* not enough memory. */
+ lm_unlock();
return NULL;
found:
@@ -337,12 +355,13 @@
mscan->prev_size = BOTH_CHUNKS_USED;
increment_total_allocated(request_size + LARGE_MALLOC_OVERHEAD);
+ lm_unlock();
+
return (char *)&mscan->d;
}
-void _stm_large_free(char *data)
+static void _large_free(mchunk_t *chunk)
{
- mchunk_t *chunk = data2chunk(data);
assert((chunk->size & (sizeof(char *) - 1)) == 0);
assert(chunk->prev_size != THIS_CHUNK_FREE);
@@ -350,9 +369,12 @@
increment_total_allocated(-(chunk->size + LARGE_MALLOC_OVERHEAD));
#ifndef NDEBUG
- assert(chunk->size >= sizeof(dlist_t));
- assert(chunk->size <= (((char *)last_chunk) - (char *)data));
- memset(data, 0xDE, chunk->size);
+ {
+ char *data = (char *)&chunk->d;
+ assert(chunk->size >= sizeof(dlist_t));
+ assert(chunk->size <= (((char *)lm.last_chunk) - data));
+ memset(data, 0xDE, chunk->size);
+ }
#endif
/* try to merge with the following chunk in memory */
@@ -409,10 +431,18 @@
insert_unsorted(chunk);
}
+void _stm_large_free(char *data)
+{
+ lm_lock();
+ _large_free(data2chunk(data));
+ lm_unlock();
+}
+
void _stm_large_dump(void)
{
- char *data = ((char *)first_chunk) + 16;
+ lm_lock();
+ char *data = ((char *)lm.first_chunk) + 16;
size_t prev_size_if_free = 0;
fprintf(stderr, "\n");
while (1) {
@@ -447,12 +477,13 @@
data += 16;
}
fprintf(stderr, "\n %p: end. ]\n\n", data - 8);
- assert(data - 16 == (char *)last_chunk);
+ assert(data - 16 == (char *)lm.last_chunk);
+ lm_unlock();
}
char *_stm_largemalloc_data_start(void)
{
- return (char *)first_chunk;
+ return (char *)lm.first_chunk;
}
#ifdef STM_LARGEMALLOC_TEST
@@ -463,21 +494,23 @@
{
int i;
for (i = 0; i < N_BINS; i++) {
- largebins[i].prev = &largebins[i];
- largebins[i].next = &largebins[i];
+ lm.largebins[i].prev = &lm.largebins[i];
+ lm.largebins[i].next = &lm.largebins[i];
}
assert(data_size >= 2 * sizeof(struct malloc_chunk));
assert((data_size & 31) == 0);
- first_chunk = (mchunk_t *)data_start;
- first_chunk->prev_size = THIS_CHUNK_FREE;
- first_chunk->size = data_size - 2 * CHUNK_HEADER_SIZE;
- last_chunk = chunk_at_offset(first_chunk, data_size - CHUNK_HEADER_SIZE);
- last_chunk->prev_size = first_chunk->size;
- last_chunk->size = END_MARKER;
- assert(last_chunk == next_chunk(first_chunk));
+ lm.first_chunk = (mchunk_t *)data_start;
+ lm.first_chunk->prev_size = THIS_CHUNK_FREE;
+ lm.first_chunk->size = data_size - 2 * CHUNK_HEADER_SIZE;
+ lm.last_chunk = chunk_at_offset(lm.first_chunk,
+ data_size - CHUNK_HEADER_SIZE);
+ lm.last_chunk->prev_size = lm.first_chunk->size;
+ lm.last_chunk->size = END_MARKER;
+ assert(lm.last_chunk == next_chunk(lm.first_chunk));
+ lm.lock = 0;
- insert_unsorted(first_chunk);
+ insert_unsorted(lm.first_chunk);
#ifdef STM_LARGEMALLOC_TEST
_stm_largemalloc_keep = NULL;
@@ -486,57 +519,64 @@
int _stm_largemalloc_resize_arena(size_t new_size)
{
+ int result = 0;
+ lm_lock();
+
if (new_size < 2 * sizeof(struct malloc_chunk))
- return 0;
+ goto fail;
OPT_ASSERT((new_size & 31) == 0);
new_size -= CHUNK_HEADER_SIZE;
- mchunk_t *new_last_chunk = chunk_at_offset(first_chunk, new_size);
- mchunk_t *old_last_chunk = last_chunk;
- size_t old_size = ((char *)old_last_chunk) - (char *)first_chunk;
+ mchunk_t *new_last_chunk = chunk_at_offset(lm.first_chunk, new_size);
+ mchunk_t *old_last_chunk = lm.last_chunk;
+ size_t old_size = ((char *)old_last_chunk) - (char *)lm.first_chunk;
if (new_size < old_size) {
/* check if there is enough free space at the end to allow
such a reduction */
- size_t lsize = last_chunk->prev_size;
+ size_t lsize = lm.last_chunk->prev_size;
assert(lsize != THIS_CHUNK_FREE);
if (lsize == BOTH_CHUNKS_USED)
- return 0;
+ goto fail;
lsize += CHUNK_HEADER_SIZE;
- mchunk_t *prev_chunk = chunk_at_offset(last_chunk, -lsize);
+ mchunk_t *prev_chunk = chunk_at_offset(lm.last_chunk, -lsize);
if (((char *)new_last_chunk) < ((char *)prev_chunk) +
sizeof(struct malloc_chunk))
- return 0;
+ goto fail;
/* unlink the prev_chunk from the doubly-linked list */
unlink_chunk(prev_chunk);
/* reduce the prev_chunk */
- assert(prev_chunk->size == last_chunk->prev_size);
+ assert(prev_chunk->size == lm.last_chunk->prev_size);
prev_chunk->size = ((char*)new_last_chunk) - (char *)prev_chunk
- CHUNK_HEADER_SIZE;
/* make a fresh-new last chunk */
new_last_chunk->prev_size = prev_chunk->size;
new_last_chunk->size = END_MARKER;
- last_chunk = new_last_chunk;
- assert(last_chunk == next_chunk(prev_chunk));
+ lm.last_chunk = new_last_chunk;
+ assert(lm.last_chunk == next_chunk(prev_chunk));
insert_unsorted(prev_chunk);
}
else if (new_size > old_size) {
/* make the new last chunk first, with only the extra size */
- mchunk_t *old_last_chunk = last_chunk;
+ mchunk_t *old_last_chunk = lm.last_chunk;
old_last_chunk->size = (new_size - old_size) - CHUNK_HEADER_SIZE;
new_last_chunk->prev_size = BOTH_CHUNKS_USED;
new_last_chunk->size = END_MARKER;
- last_chunk = new_last_chunk;
- assert(last_chunk == next_chunk(old_last_chunk));
+ lm.last_chunk = new_last_chunk;
+ assert(lm.last_chunk == next_chunk(old_last_chunk));
/* then free the last_chunk (turn it from "used" to "free) */
- _stm_large_free((char *)&old_last_chunk->d);
+ _large_free(old_last_chunk);
}
- return 1;
+
+ result = 1;
+ fail:
+ lm_unlock();
+ return result;
}
@@ -551,15 +591,17 @@
void _stm_largemalloc_sweep(void)
{
- /* This may be slightly optimized by inlining _stm_large_free() and
+ lm_lock();
+
+ /* This may be slightly optimized by inlining _large_free() and
making cases, e.g. we might know already if the previous block
was free or not. It's probably not really worth it. */
- mchunk_t *mnext, *chunk = first_chunk;
+ mchunk_t *mnext, *chunk = lm.first_chunk;
if (chunk->prev_size == THIS_CHUNK_FREE)
chunk = next_chunk(chunk); /* go to the first non-free chunk */
- while (chunk != last_chunk) {
+ while (chunk != lm.last_chunk) {
/* here, the chunk we're pointing to is not free */
assert(chunk->prev_size != THIS_CHUNK_FREE);
@@ -571,8 +613,10 @@
/* use the callback to know if 'chunk' contains an object that
survives or dies */
if (!_largemalloc_sweep_keep(chunk)) {
- _stm_large_free((char *)&chunk->d); /* dies */
+ _large_free(chunk); /* dies */
}
chunk = mnext;
}
+
+ lm_unlock();
}
diff --git a/rpython/translator/stm/src_stm/stm/misc.c b/rpython/translator/stm/src_stm/stm/misc.c
--- a/rpython/translator/stm/src_stm/stm/misc.c
+++ b/rpython/translator/stm/src_stm/stm/misc.c
@@ -76,21 +76,6 @@
uint64_t _stm_total_allocated(void)
{
- mutex_pages_lock();
- uint64_t result = increment_total_allocated(0);
- mutex_pages_unlock();
- return result;
+ return increment_total_allocated(0);
}
#endif
-
-#ifdef STM_LARGEMALLOC_TEST
-void _stm_mutex_pages_lock(void)
-{
- mutex_pages_lock();
-}
-
-void _stm_mutex_pages_unlock(void)
-{
- mutex_pages_unlock();
-}
-#endif
diff --git a/rpython/translator/stm/src_stm/stm/nursery.c b/rpython/translator/stm/src_stm/stm/nursery.c
--- a/rpython/translator/stm/src_stm/stm/nursery.c
+++ b/rpython/translator/stm/src_stm/stm/nursery.c
@@ -196,9 +196,7 @@
content); or add the object to 'large_overflow_objects'.
*/
if (STM_PSEGMENT->minor_collect_will_commit_now) {
- mutex_pages_lock();
synchronize_object_now(obj);
- mutex_pages_unlock();
}
else
LIST_APPEND(STM_PSEGMENT->large_overflow_objects, obj);
@@ -234,20 +232,13 @@
/* free any object left from 'young_outside_nursery' */
if (!tree_is_cleared(pseg->young_outside_nursery)) {
- bool locked = false;
wlog_t *item;
+
TREE_LOOP_FORWARD(*pseg->young_outside_nursery, item) {
assert(!_is_in_nursery((object_t *)item->addr));
- if (!locked) {
- mutex_pages_lock();
- locked = true;
- }
_stm_large_free(stm_object_pages + item->addr);
} TREE_LOOP_END;
- if (locked)
- mutex_pages_unlock();
-
tree_clear(pseg->young_outside_nursery);
}
diff --git a/rpython/translator/stm/src_stm/stm/pages.c b/rpython/translator/stm/src_stm/stm/pages.c
--- a/rpython/translator/stm/src_stm/stm/pages.c
+++ b/rpython/translator/stm/src_stm/stm/pages.c
@@ -6,16 +6,12 @@
/************************************************************/
-static union {
- struct {
- uint8_t mutex_pages;
- volatile bool major_collection_requested;
- uint64_t total_allocated; /* keep track of how much memory we're
- using, ignoring nurseries */
- uint64_t total_allocated_bound;
- };
- char reserved[64];
-} pages_ctl __attribute__((aligned(64)));
+struct {
+ volatile bool major_collection_requested;
+ uint64_t total_allocated; /* keep track of how much memory we're
+ using, ignoring nurseries */
+ uint64_t total_allocated_bound;
+} pages_ctl;
static void setup_pages(void)
@@ -29,37 +25,15 @@
memset(pages_privatized, 0, sizeof(pages_privatized));
}
-static void mutex_pages_lock(void)
-{
- if (__sync_lock_test_and_set(&pages_ctl.mutex_pages, 1) == 0)
- return;
-
- int previous = change_timing_state(STM_TIME_SPIN_LOOP);
- while (__sync_lock_test_and_set(&pages_ctl.mutex_pages, 1) != 0) {
- spin_loop();
- }
- change_timing_state(previous);
-}
-
-static void mutex_pages_unlock(void)
-{
- __sync_lock_release(&pages_ctl.mutex_pages);
-}
-
-static bool _has_mutex_pages(void)
-{
- return pages_ctl.mutex_pages != 0;
-}
-
static uint64_t increment_total_allocated(ssize_t add_or_remove)
{
- assert(_has_mutex_pages());
- pages_ctl.total_allocated += add_or_remove;
+ uint64_t ta = __sync_add_and_fetch(&pages_ctl.total_allocated,
+ add_or_remove);
- if (pages_ctl.total_allocated >= pages_ctl.total_allocated_bound)
+ if (ta >= pages_ctl.total_allocated_bound)
pages_ctl.major_collection_requested = true;
- return pages_ctl.total_allocated;
+ return ta;
}
static bool is_major_collection_requested(void)
@@ -118,10 +92,12 @@
/* call remap_file_pages() to make all pages in the range(pagenum,
pagenum+count) refer to the same physical range of pages from
segment 0. */
- uintptr_t i;
- assert(_has_mutex_pages());
+ dprintf(("pages_initialize_shared: 0x%ld - 0x%ld\n", pagenum,
+ pagenum + count));
+ assert(pagenum < NB_PAGES);
if (count == 0)
return;
+ uintptr_t i;
for (i = 1; i <= NB_SEGMENTS; i++) {
char *segment_base = get_segment_base(i);
d_remap_file_pages(segment_base + pagenum * 4096UL,
@@ -131,14 +107,20 @@
static void page_privatize(uintptr_t pagenum)
{
- if (is_private_page(STM_SEGMENT->segment_num, pagenum)) {
- /* the page is already privatized */
+ /* check this thread's 'pages_privatized' bit */
+ uint64_t bitmask = 1UL << (STM_SEGMENT->segment_num - 1);
+ struct page_shared_s *ps = &pages_privatized[pagenum - PAGE_FLAG_START];
+ if (ps->by_segment & bitmask) {
+ /* the page is already privatized; nothing to do */
return;
}
- /* lock, to prevent concurrent threads from looking up this thread's
- 'pages_privatized' bits in parallel */
- mutex_pages_lock();
+#ifndef NDEBUG
+ spinlock_acquire(lock_pages_privatizing[STM_SEGMENT->segment_num]);
+#endif
+
+ /* add this thread's 'pages_privatized' bit */
+ __sync_fetch_and_add(&ps->by_segment, bitmask);
/* "unmaps" the page to make the address space location correspond
again to its underlying file offset (XXX later we should again
@@ -152,11 +134,9 @@
/* copy the content from the shared (segment 0) source */
pagecopy(new_page, stm_object_pages + pagenum * 4096UL);
- /* add this thread's 'pages_privatized' bit */
- uint64_t bitmask = 1UL << (STM_SEGMENT->segment_num - 1);
- pages_privatized[pagenum - PAGE_FLAG_START].by_segment |= bitmask;
-
- mutex_pages_unlock();
+#ifndef NDEBUG
+ spinlock_release(lock_pages_privatizing[STM_SEGMENT->segment_num]);
+#endif
}
static void _page_do_reshare(long segnum, uintptr_t pagenum)
diff --git a/rpython/translator/stm/src_stm/stm/pages.h b/rpython/translator/stm/src_stm/stm/pages.h
--- a/rpython/translator/stm/src_stm/stm/pages.h
+++ b/rpython/translator/stm/src_stm/stm/pages.h
@@ -35,6 +35,20 @@
};
static struct page_shared_s pages_privatized[PAGE_FLAG_END - PAGE_FLAG_START];
+/* Rules for concurrent access to this array, possibly with is_private_page():
+
+ - we clear bits only during major collection, when all threads are
+ synchronized anyway
+
+ - we set only the bit corresponding to our segment number, using
+ an atomic addition; and we do it _before_ we actually make the
+ page private.
+
+ - concurrently, other threads checking the bits might (rarely)
+ get the answer 'true' to is_private_page() even though it is not
+ actually private yet. This inconsistency is in the direction
+ that we want for synchronize_object_now().
+*/
static void pages_initialize_shared(uintptr_t pagenum, uintptr_t count);
static void page_privatize(uintptr_t pagenum);
@@ -42,10 +56,6 @@
static void _page_do_reshare(long segnum, uintptr_t pagenum);
static void pages_setup_readmarkers_for_nursery(void);
-/* Note: don't ever do "mutex_pages_lock(); mutex_lock()" in that order */
-static void mutex_pages_lock(void);
-static void mutex_pages_unlock(void);
-static bool _has_mutex_pages(void) __attribute__((unused));
static uint64_t increment_total_allocated(ssize_t add_or_remove);
static bool is_major_collection_requested(void);
static void force_major_collection_request(void);
@@ -64,4 +74,6 @@
page_reshare(pagenum);
}
-void _stm_mutex_pages_lock(void);
+#ifndef NDEBUG
+static char lock_pages_privatizing[NB_SEGMENTS + 1] = { 0 };
+#endif
diff --git a/rpython/translator/stm/src_stm/stm/timing.c b/rpython/translator/stm/src_stm/stm/timing.c
--- a/rpython/translator/stm/src_stm/stm/timing.c
+++ b/rpython/translator/stm/src_stm/stm/timing.c
@@ -56,7 +56,6 @@
"minor gc",
"major gc",
"sync pause",
- "spin loop",
};
void stm_flush_timing(stm_thread_local_t *tl, int verbose)
diff --git a/rpython/translator/stm/src_stm/stmgc.h b/rpython/translator/stm/src_stm/stmgc.h
--- a/rpython/translator/stm/src_stm/stmgc.h
+++ b/rpython/translator/stm/src_stm/stmgc.h
@@ -71,7 +71,6 @@
STM_TIME_MINOR_GC,
STM_TIME_MAJOR_GC,
STM_TIME_SYNC_PAUSE,
- STM_TIME_SPIN_LOOP,
_STM_TIME_N
};
@@ -134,8 +133,6 @@
object_t *_stm_enum_modified_old_objects(long index);
object_t *_stm_enum_objects_pointing_to_nursery(long index);
uint64_t _stm_total_allocated(void);
-void _stm_mutex_pages_lock(void);
-void _stm_mutex_pages_unlock(void);
#endif
#define _STM_GCFLAG_WRITE_BARRIER 0x01
diff --git a/rpython/translator/stm/src_stm/stmgcintf.h b/rpython/translator/stm/src_stm/stmgcintf.h
--- a/rpython/translator/stm/src_stm/stmgcintf.h
+++ b/rpython/translator/stm/src_stm/stmgcintf.h
@@ -6,7 +6,7 @@
#include <errno.h>
#include "stmgc.h"
-#include "stm/atomic.h" /* for spin_loop() and write_fence() */
+#include "stm/atomic.h" /* for spin_loop(), write_fence(), spinlock_xxx() */
extern __thread struct stm_thread_local_s stm_thread_local;
extern __thread long pypy_stm_ready_atomic;
@@ -101,25 +101,4 @@
}
-#if 0 /* fprinting versions */
-# define spinlock_acquire(lock, targetvalue) \
- do { if (__sync_bool_compare_and_swap(&(lock), 0, (targetvalue))) { \
- dprintf(("<<< locked %d\n", (int)targetvalue)); \
- break; \
- } \
- do { spin_loop(); } while (lock); \
- } while (1)
-# define spinlock_release(lock) \
- do { dprintf(("unlocked >>>\n")); write_fence(); \
- assert((lock) != 0); (lock) = 0; } while (0)
-#else
-# define spinlock_acquire(lock, targetvalue) \
- do { if (__sync_bool_compare_and_swap(&(lock), 0, (targetvalue))) break; \
- do { spin_loop(); } while (lock); \
- } while (1)
-# define spinlock_release(lock) \
- do { write_fence(); assert((lock) != 0); (lock) = 0; } while (0)
-#endif
-
-
#endif /* _RPY_STMGCINTF_H */
More information about the pypy-commit
mailing list