[pypy-commit] stmgc default: Give up trying to understand what was wrong with the lock-free
arigo
noreply at buildbot.pypy.org
Tue Apr 22 20:46:39 CEST 2014
Author: Armin Rigo <arigo at tunes.org>
Branch:
Changeset: r1177:e4fa937a6860
Date: 2014-04-22 20:46 +0200
http://bitbucket.org/pypy/stmgc/changeset/e4fa937a6860/
Log: Give up trying to understand what was wrong with the lock-free
model, and reintroduce locks (in a way that shouldn't have a
noticeable cost, hopefully).
diff --git a/c7/stm/core.c b/c7/stm/core.c
--- a/c7/stm/core.c
+++ b/c7/stm/core.c
@@ -14,13 +14,10 @@
#define EVENTUALLY(condition) \
{ \
if (!(condition)) { \
- int _i; \
- for (_i = 1; _i <= NB_SEGMENTS; _i++) \
- spinlock_acquire(lock_pages_privatizing[_i]); \
+ acquire_privatization_lock(); \
if (!(condition)) \
stm_fatalerror("fails: " #condition); \
- for (_i = 1; _i <= NB_SEGMENTS; _i++) \
- spinlock_release(lock_pages_privatizing[_i]); \
+ release_privatization_lock(); \
} \
}
#endif
@@ -337,9 +334,12 @@
/* Copy around the version of 'obj' that lives in our own segment.
It is first copied into the shared pages, and then into other
segments' own private pages.
+
+ Must be called with the privatization lock acquired.
*/
assert(!_is_young(obj));
assert(obj->stm_flags & GCFLAG_WRITE_BARRIER);
+ assert(STM_PSEGMENT->privatization_lock == 1);
uintptr_t start = (uintptr_t)obj;
uintptr_t first_page = start / 4096UL;
@@ -381,26 +381,9 @@
memcpy(dst, src, copy_size);
}
else {
- EVENTUALLY(memcmp(dst, src, copy_size) == 0); /* same page */
+ assert(memcmp(dst, src, copy_size) == 0); /* same page */
}
- /* Do a full memory barrier. We must make sure that other
- CPUs see the changes we did to the shared page ("S",
- above) before we check the other segments below with
- is_private_page(). Otherwise, we risk the following:
- this CPU writes "S" but the writes are not visible yet;
- then it checks is_private_page() and gets false, and does
- nothing more; just afterwards another CPU sets its own
- private_page bit and copies the page; but it risks doing
- so before seeing the "S" writes.
-
- XXX what is the cost of this? If it's high, then we
- should reorganize the code so that we buffer the second
- parts and do them by bunch of N, after just one call to
- __sync_synchronize()...
- */
- __sync_synchronize();
-
for (i = 1; i <= NB_SEGMENTS; i++) {
if (i == myself)
continue;
@@ -417,7 +400,7 @@
memcpy(dst, src, copy_size);
}
else {
- EVENTUALLY(!memcmp(dst, src, copy_size)); /* same page */
+ assert(!memcmp(dst, src, copy_size)); /* same page */
}
}
@@ -431,12 +414,15 @@
if (STM_PSEGMENT->large_overflow_objects == NULL)
return;
+ acquire_privatization_lock();
LIST_FOREACH_R(STM_PSEGMENT->large_overflow_objects, object_t *,
synchronize_object_now(item));
+ release_privatization_lock();
}
static void push_modified_to_other_segments(void)
{
+ acquire_privatization_lock();
LIST_FOREACH_R(
STM_PSEGMENT->modified_old_objects,
object_t * /*item*/,
@@ -456,6 +442,7 @@
private pages as needed */
synchronize_object_now(item);
}));
+ release_privatization_lock();
list_clear(STM_PSEGMENT->modified_old_objects);
}
diff --git a/c7/stm/core.h b/c7/stm/core.h
--- a/c7/stm/core.h
+++ b/c7/stm/core.h
@@ -148,6 +148,12 @@
/* For sleeping contention management */
bool signal_when_done;
+ /* This lock is acquired when that segment calls synchronize_object_now.
+ On the rare event of a page_privatize(), the latter will acquire
+ all the locks in all segments. Otherwise, for the common case,
+ it's cheap. */
+ uint8_t privatization_lock;
+
/* In case of abort, we restore the 'shadowstack' field and the
'thread_local_obj' field. */
struct stm_shadowentry_s *shadowstack_at_start_of_transaction;
@@ -226,3 +232,17 @@
static void copy_object_to_shared(object_t *obj, int source_segment_num);
static void synchronize_object_now(object_t *obj);
+
+static inline void acquire_privatization_lock(void)
+{
+ uint8_t *lock = (uint8_t *)REAL_ADDRESS(STM_SEGMENT->segment_base,
+ &STM_PSEGMENT->privatization_lock);
+ spinlock_acquire(*lock);
+}
+
+static inline void release_privatization_lock(void)
+{
+ uint8_t *lock = (uint8_t *)REAL_ADDRESS(STM_SEGMENT->segment_base,
+ &STM_PSEGMENT->privatization_lock);
+ spinlock_release(*lock);
+}
diff --git a/c7/stm/gcpage.c b/c7/stm/gcpage.c
--- a/c7/stm/gcpage.c
+++ b/c7/stm/gcpage.c
@@ -92,17 +92,20 @@
/* uncommon case: need to initialize some more pages */
spinlock_acquire(lock_growth_large);
- if (addr + size > uninitialized_page_start) {
+ char *start = uninitialized_page_start;
+ if (addr + size > start) {
uintptr_t npages;
- npages = (addr + size - uninitialized_page_start) / 4096UL;
+ npages = (addr + size - start) / 4096UL;
npages += GCPAGE_NUM_PAGES;
- if (uninitialized_page_stop - uninitialized_page_start <
- npages * 4096UL) {
+ if (uninitialized_page_stop - start < npages * 4096UL) {
stm_fatalerror("out of memory!"); /* XXX */
}
- setup_N_pages(uninitialized_page_start, npages);
- __sync_synchronize();
- uninitialized_page_start += npages * 4096UL;
+ setup_N_pages(start, npages);
+ if (!__sync_bool_compare_and_swap(&uninitialized_page_start,
+ start,
+ start + npages * 4096UL)) {
+ stm_fatalerror("uninitialized_page_start changed?");
+ }
}
spinlock_release(lock_growth_large);
return addr;
diff --git a/c7/stm/nursery.c b/c7/stm/nursery.c
--- a/c7/stm/nursery.c
+++ b/c7/stm/nursery.c
@@ -217,7 +217,9 @@
content); or add the object to 'large_overflow_objects'.
*/
if (STM_PSEGMENT->minor_collect_will_commit_now) {
+ acquire_privatization_lock();
synchronize_object_now(obj);
+ release_privatization_lock();
}
else
LIST_APPEND(STM_PSEGMENT->large_overflow_objects, obj);
diff --git a/c7/stm/pages.c b/c7/stm/pages.c
--- a/c7/stm/pages.c
+++ b/c7/stm/pages.c
@@ -108,18 +108,20 @@
{
/* check this thread's 'pages_privatized' bit */
uint64_t bitmask = 1UL << (STM_SEGMENT->segment_num - 1);
- struct page_shared_s *ps = &pages_privatized[pagenum - PAGE_FLAG_START];
+ volatile struct page_shared_s *ps = (volatile struct page_shared_s *)
+ &pages_privatized[pagenum - PAGE_FLAG_START];
if (ps->by_segment & bitmask) {
/* the page is already privatized; nothing to do */
return;
}
-#ifndef NDEBUG
- spinlock_acquire(lock_pages_privatizing[STM_SEGMENT->segment_num]);
-#endif
+ long i;
+ for (i = 1; i <= NB_SEGMENTS; i++) {
+ spinlock_acquire(get_priv_segment(i)->privatization_lock);
+ }
/* add this thread's 'pages_privatized' bit */
- __sync_fetch_and_add(&ps->by_segment, bitmask);
+ ps->by_segment |= bitmask;
/* "unmaps" the page to make the address space location correspond
again to its underlying file offset (XXX later we should again
@@ -133,9 +135,9 @@
/* copy the content from the shared (segment 0) source */
pagecopy(new_page, stm_object_pages + pagenum * 4096UL);
-#ifndef NDEBUG
- spinlock_release(lock_pages_privatizing[STM_SEGMENT->segment_num]);
-#endif
+ for (i = NB_SEGMENTS; i >= 1; i--) {
+ spinlock_release(get_priv_segment(i)->privatization_lock);
+ }
}
static void _page_do_reshare(long segnum, uintptr_t pagenum)
diff --git a/c7/stm/pages.h b/c7/stm/pages.h
--- a/c7/stm/pages.h
+++ b/c7/stm/pages.h
@@ -34,20 +34,6 @@
};
static struct page_shared_s pages_privatized[PAGE_FLAG_END - PAGE_FLAG_START];
-/* Rules for concurrent access to this array, possibly with is_private_page():
-
- - we clear bits only during major collection, when all threads are
- synchronized anyway
-
- - we set only the bit corresponding to our segment number, using
- an atomic addition; and we do it _before_ we actually make the
- page private.
-
- - concurrently, other threads checking the bits might (rarely)
- get the answer 'true' to is_private_page() even though it is not
- actually private yet. This inconsistency is in the direction
- that we want for synchronize_object_now().
-*/
static void pages_initialize_shared(uintptr_t pagenum, uintptr_t count);
static void page_privatize(uintptr_t pagenum);
@@ -72,7 +58,3 @@
if (pages_privatized[pagenum - PAGE_FLAG_START].by_segment != 0)
page_reshare(pagenum);
}
-
-#ifndef NDEBUG
-static char lock_pages_privatizing[NB_SEGMENTS + 1] = { 0 };
-#endif
More information about the pypy-commit
mailing list