[pypy-commit] stmgc default: Give up trying to understand what was wrong with the lock-free

arigo noreply at buildbot.pypy.org
Tue Apr 22 20:46:39 CEST 2014


Author: Armin Rigo <arigo at tunes.org>
Branch: 
Changeset: r1177:e4fa937a6860
Date: 2014-04-22 20:46 +0200
http://bitbucket.org/pypy/stmgc/changeset/e4fa937a6860/

Log:	Give up trying to understand what was wrong with the lock-free
	model, and reintroduce locks (in a way that shouldn't have a
	noticeable cost, hopefully).

diff --git a/c7/stm/core.c b/c7/stm/core.c
--- a/c7/stm/core.c
+++ b/c7/stm/core.c
@@ -14,13 +14,10 @@
 #define EVENTUALLY(condition)                                   \
     {                                                           \
         if (!(condition)) {                                     \
-            int _i;                                             \
-            for (_i = 1; _i <= NB_SEGMENTS; _i++)               \
-                spinlock_acquire(lock_pages_privatizing[_i]);   \
+            acquire_privatization_lock();                       \
             if (!(condition))                                   \
                 stm_fatalerror("fails: " #condition);           \
-            for (_i = 1; _i <= NB_SEGMENTS; _i++)               \
-                spinlock_release(lock_pages_privatizing[_i]);   \
+            release_privatization_lock();                       \
         }                                                       \
     }
 #endif
@@ -337,9 +334,12 @@
     /* Copy around the version of 'obj' that lives in our own segment.
        It is first copied into the shared pages, and then into other
        segments' own private pages.
+
+       Must be called with the privatization lock acquired.
     */
     assert(!_is_young(obj));
     assert(obj->stm_flags & GCFLAG_WRITE_BARRIER);
+    assert(STM_PSEGMENT->privatization_lock == 1);
 
     uintptr_t start = (uintptr_t)obj;
     uintptr_t first_page = start / 4096UL;
@@ -381,26 +381,9 @@
                     memcpy(dst, src, copy_size);
             }
             else {
-                EVENTUALLY(memcmp(dst, src, copy_size) == 0);  /* same page */
+                assert(memcmp(dst, src, copy_size) == 0);  /* same page */
             }
 
-            /* Do a full memory barrier.  We must make sure that other
-               CPUs see the changes we did to the shared page ("S",
-               above) before we check the other segments below with
-               is_private_page().  Otherwise, we risk the following:
-               this CPU writes "S" but the writes are not visible yet;
-               then it checks is_private_page() and gets false, and does
-               nothing more; just afterwards another CPU sets its own
-               private_page bit and copies the page; but it risks doing
-               so before seeing the "S" writes.
-
-               XXX what is the cost of this?  If it's high, then we
-               should reorganize the code so that we buffer the second
-               parts and do them by bunch of N, after just one call to
-               __sync_synchronize()...
-            */
-            __sync_synchronize();
-
             for (i = 1; i <= NB_SEGMENTS; i++) {
                 if (i == myself)
                     continue;
@@ -417,7 +400,7 @@
                         memcpy(dst, src, copy_size);
                 }
                 else {
-                    EVENTUALLY(!memcmp(dst, src, copy_size));  /* same page */
+                    assert(!memcmp(dst, src, copy_size));  /* same page */
                 }
             }
 
@@ -431,12 +414,15 @@
     if (STM_PSEGMENT->large_overflow_objects == NULL)
         return;
 
+    acquire_privatization_lock();
     LIST_FOREACH_R(STM_PSEGMENT->large_overflow_objects, object_t *,
                    synchronize_object_now(item));
+    release_privatization_lock();
 }
 
 static void push_modified_to_other_segments(void)
 {
+    acquire_privatization_lock();
     LIST_FOREACH_R(
         STM_PSEGMENT->modified_old_objects,
         object_t * /*item*/,
@@ -456,6 +442,7 @@
                private pages as needed */
             synchronize_object_now(item);
         }));
+    release_privatization_lock();
 
     list_clear(STM_PSEGMENT->modified_old_objects);
 }
diff --git a/c7/stm/core.h b/c7/stm/core.h
--- a/c7/stm/core.h
+++ b/c7/stm/core.h
@@ -148,6 +148,12 @@
     /* For sleeping contention management */
     bool signal_when_done;
 
+    /* This lock is acquired when that segment calls synchronize_object_now.
+       On the rare event of a page_privatize(), the latter will acquire
+       all the locks in all segments.  Otherwise, for the common case,
+       it's cheap. */
+    uint8_t privatization_lock;
+
     /* In case of abort, we restore the 'shadowstack' field and the
        'thread_local_obj' field. */
     struct stm_shadowentry_s *shadowstack_at_start_of_transaction;
@@ -226,3 +232,17 @@
 
 static void copy_object_to_shared(object_t *obj, int source_segment_num);
 static void synchronize_object_now(object_t *obj);
+
+static inline void acquire_privatization_lock(void)
+{
+    uint8_t *lock = (uint8_t *)REAL_ADDRESS(STM_SEGMENT->segment_base,
+                                            &STM_PSEGMENT->privatization_lock);
+    spinlock_acquire(*lock);
+}
+
+static inline void release_privatization_lock(void)
+{
+    uint8_t *lock = (uint8_t *)REAL_ADDRESS(STM_SEGMENT->segment_base,
+                                            &STM_PSEGMENT->privatization_lock);
+    spinlock_release(*lock);
+}
diff --git a/c7/stm/gcpage.c b/c7/stm/gcpage.c
--- a/c7/stm/gcpage.c
+++ b/c7/stm/gcpage.c
@@ -92,17 +92,20 @@
     /* uncommon case: need to initialize some more pages */
     spinlock_acquire(lock_growth_large);
 
-    if (addr + size > uninitialized_page_start) {
+    char *start = uninitialized_page_start;
+    if (addr + size > start) {
         uintptr_t npages;
-        npages = (addr + size - uninitialized_page_start) / 4096UL;
+        npages = (addr + size - start) / 4096UL;
         npages += GCPAGE_NUM_PAGES;
-        if (uninitialized_page_stop - uninitialized_page_start <
-                npages * 4096UL) {
+        if (uninitialized_page_stop - start < npages * 4096UL) {
             stm_fatalerror("out of memory!");   /* XXX */
         }
-        setup_N_pages(uninitialized_page_start, npages);
-        __sync_synchronize();
-        uninitialized_page_start += npages * 4096UL;
+        setup_N_pages(start, npages);
+        if (!__sync_bool_compare_and_swap(&uninitialized_page_start,
+                                          start,
+                                          start + npages * 4096UL)) {
+            stm_fatalerror("uninitialized_page_start changed?");
+        }
     }
     spinlock_release(lock_growth_large);
     return addr;
diff --git a/c7/stm/nursery.c b/c7/stm/nursery.c
--- a/c7/stm/nursery.c
+++ b/c7/stm/nursery.c
@@ -217,7 +217,9 @@
                content); or add the object to 'large_overflow_objects'.
             */
             if (STM_PSEGMENT->minor_collect_will_commit_now) {
+                acquire_privatization_lock();
                 synchronize_object_now(obj);
+                release_privatization_lock();
             }
             else
                 LIST_APPEND(STM_PSEGMENT->large_overflow_objects, obj);
diff --git a/c7/stm/pages.c b/c7/stm/pages.c
--- a/c7/stm/pages.c
+++ b/c7/stm/pages.c
@@ -108,18 +108,20 @@
 {
     /* check this thread's 'pages_privatized' bit */
     uint64_t bitmask = 1UL << (STM_SEGMENT->segment_num - 1);
-    struct page_shared_s *ps = &pages_privatized[pagenum - PAGE_FLAG_START];
+    volatile struct page_shared_s *ps = (volatile struct page_shared_s *)
+        &pages_privatized[pagenum - PAGE_FLAG_START];
     if (ps->by_segment & bitmask) {
         /* the page is already privatized; nothing to do */
         return;
     }
 
-#ifndef NDEBUG
-    spinlock_acquire(lock_pages_privatizing[STM_SEGMENT->segment_num]);
-#endif
+    long i;
+    for (i = 1; i <= NB_SEGMENTS; i++) {
+        spinlock_acquire(get_priv_segment(i)->privatization_lock);
+    }
 
     /* add this thread's 'pages_privatized' bit */
-    __sync_fetch_and_add(&ps->by_segment, bitmask);
+    ps->by_segment |= bitmask;
 
     /* "unmaps" the page to make the address space location correspond
        again to its underlying file offset (XXX later we should again
@@ -133,9 +135,9 @@
     /* copy the content from the shared (segment 0) source */
     pagecopy(new_page, stm_object_pages + pagenum * 4096UL);
 
-#ifndef NDEBUG
-    spinlock_release(lock_pages_privatizing[STM_SEGMENT->segment_num]);
-#endif
+    for (i = NB_SEGMENTS; i >= 1; i--) {
+        spinlock_release(get_priv_segment(i)->privatization_lock);
+    }
 }
 
 static void _page_do_reshare(long segnum, uintptr_t pagenum)
diff --git a/c7/stm/pages.h b/c7/stm/pages.h
--- a/c7/stm/pages.h
+++ b/c7/stm/pages.h
@@ -34,20 +34,6 @@
 };
 
 static struct page_shared_s pages_privatized[PAGE_FLAG_END - PAGE_FLAG_START];
-/* Rules for concurrent access to this array, possibly with is_private_page():
-
-   - we clear bits only during major collection, when all threads are
-     synchronized anyway
-
-   - we set only the bit corresponding to our segment number, using
-     an atomic addition; and we do it _before_ we actually make the
-     page private.
-
-   - concurrently, other threads checking the bits might (rarely)
-     get the answer 'true' to is_private_page() even though it is not
-     actually private yet.  This inconsistency is in the direction
-     that we want for synchronize_object_now().
-*/
 
 static void pages_initialize_shared(uintptr_t pagenum, uintptr_t count);
 static void page_privatize(uintptr_t pagenum);
@@ -72,7 +58,3 @@
     if (pages_privatized[pagenum - PAGE_FLAG_START].by_segment != 0)
         page_reshare(pagenum);
 }
-
-#ifndef NDEBUG
-static char lock_pages_privatizing[NB_SEGMENTS + 1] = { 0 };
-#endif


More information about the pypy-commit mailing list