[pypy-commit] pypy stmgc-c8: import stmgc/f0d995d5609d, branch c8-locking

arigo noreply at buildbot.pypy.org
Mon Jun 1 16:59:11 CEST 2015


Author: Armin Rigo <arigo at tunes.org>
Branch: stmgc-c8
Changeset: r77743:a3a33fcdc546
Date: 2015-06-01 15:58 +0100
http://bitbucket.org/pypy/pypy/changeset/a3a33fcdc546/

Log:	import stmgc/f0d995d5609d, branch c8-locking

diff --git a/rpython/translator/stm/src_stm/revision b/rpython/translator/stm/src_stm/revision
--- a/rpython/translator/stm/src_stm/revision
+++ b/rpython/translator/stm/src_stm/revision
@@ -1,1 +1,1 @@
-e55658d12179
+f0d995d5609d
diff --git a/rpython/translator/stm/src_stm/stm/core.c b/rpython/translator/stm/src_stm/stm/core.c
--- a/rpython/translator/stm/src_stm/stm/core.c
+++ b/rpython/translator/stm/src_stm/stm/core.c
@@ -50,8 +50,8 @@
     char *src_segment_base = (from_segnum >= 0 ? get_segment_base(from_segnum)
                                                : NULL);
 
-    assert(IMPLY(from_segnum >= 0, get_priv_segment(from_segnum)->modification_lock));
-    assert(STM_PSEGMENT->modification_lock);
+    assert(IMPLY(from_segnum >= 0, modification_lock_check_rdlock(from_segnum)));
+    assert(modification_lock_check_wrlock(STM_SEGMENT->segment_num));
 
     long my_segnum = STM_SEGMENT->segment_num;
     DEBUG_EXPECT_SEGFAULT(false);
@@ -131,7 +131,7 @@
                            struct stm_commit_log_entry_s *from,
                            struct stm_commit_log_entry_s *to)
 {
-    assert(STM_PSEGMENT->modification_lock);
+    assert(modification_lock_check_wrlock(STM_SEGMENT->segment_num));
     assert(from->rev_num >= to->rev_num);
     /* walk BACKWARDS the commit log and update the page 'pagenum',
        initially at revision 'from', until we reach the revision 'to'. */
@@ -199,8 +199,8 @@
 
     /* before copying anything, acquire modification locks from our and
        the other segment */
-    uint64_t to_lock = (1UL << copy_from_segnum)| (1UL << my_segnum);
-    acquire_modification_lock_set(to_lock);
+    uint64_t to_lock = (1UL << copy_from_segnum);
+    acquire_modification_lock_set(to_lock, my_segnum);
     pagecopy(get_virtual_page(my_segnum, pagenum),
              get_virtual_page(copy_from_segnum, pagenum));
 
@@ -223,7 +223,7 @@
     if (src_version->rev_num > target_version->rev_num)
         go_to_the_past(pagenum, src_version, target_version);
 
-    release_modification_lock_set(to_lock);
+    release_modification_lock_set(to_lock, my_segnum);
     release_all_privatization_locks();
 }
 
@@ -308,7 +308,7 @@
 
 static void reset_modified_from_backup_copies(int segment_num);  /* forward */
 
-static bool _stm_validate()
+static bool _stm_validate(void)
 {
     /* returns true if we reached a valid state, or false if
        we need to abort now */
@@ -357,7 +357,7 @@
         }
 
         /* Find the set of segments we need to copy from and lock them: */
-        uint64_t segments_to_lock = 1UL << my_segnum;
+        uint64_t segments_to_lock = 0;
         cl = first_cl;
         while ((next_cl = cl->next) != NULL) {
             if (next_cl == INEV_RUNNING) {
@@ -375,8 +375,8 @@
 
         /* HERE */
 
-        acquire_privatization_lock(STM_SEGMENT->segment_num);
-        acquire_modification_lock_set(segments_to_lock);
+        acquire_privatization_lock(my_segnum);
+        acquire_modification_lock_set(segments_to_lock, my_segnum);
 
 
         /* import objects from first_cl to last_cl: */
@@ -466,8 +466,8 @@
         }
 
         /* done with modifications */
-        release_modification_lock_set(segments_to_lock);
-        release_privatization_lock(STM_SEGMENT->segment_num);
+        release_modification_lock_set(segments_to_lock, my_segnum);
+        release_privatization_lock(my_segnum);
     }
 
     return !needs_abort;
@@ -545,7 +545,7 @@
                time" as the attach to commit log. Otherwise, another thread may
                see the new CL entry, import it, look for backup copies in this
                segment and find the old backup copies! */
-            acquire_modification_lock(STM_SEGMENT->segment_num);
+            acquire_modification_lock_wr(STM_SEGMENT->segment_num);
         }
 
         /* try to attach to commit log: */
@@ -559,7 +559,7 @@
         }
 
         if (is_commit) {
-            release_modification_lock(STM_SEGMENT->segment_num);
+            release_modification_lock_wr(STM_SEGMENT->segment_num);
             /* XXX: unfortunately, if we failed to attach our CL entry,
                we have to re-add the WB_EXECUTED flags before we try to
                validate again because of said condition (s.a) */
@@ -596,7 +596,7 @@
 
         list_clear(STM_PSEGMENT->modified_old_objects);
         STM_PSEGMENT->last_commit_log_entry = new;
-        release_modification_lock(STM_SEGMENT->segment_num);
+        release_modification_lock_wr(STM_SEGMENT->segment_num);
     }
 }
 
@@ -692,7 +692,7 @@
         increment_total_allocated(slice_sz);
         memcpy(bk_slice, realobj + slice_off, slice_sz);
 
-        acquire_modification_lock(STM_SEGMENT->segment_num);
+        acquire_modification_lock_wr(STM_SEGMENT->segment_num);
         /* !! follows layout of "struct stm_undo_s" !! */
         STM_PSEGMENT->modified_old_objects = list_append3(
             STM_PSEGMENT->modified_old_objects,
@@ -700,7 +700,7 @@
             (uintptr_t)bk_slice,  /* bk_addr */
             NEW_SLICE(slice_off, slice_sz));
         dprintf(("> append slice %p, off=%lu, sz=%lu\n", bk_slice, slice_off, slice_sz));
-        release_modification_lock(STM_SEGMENT->segment_num);
+        release_modification_lock_wr(STM_SEGMENT->segment_num);
 
         slice_off += slice_sz;
     }
@@ -896,6 +896,8 @@
 
 static void touch_all_pages_of_obj(object_t *obj, size_t obj_size)
 {
+    /* XXX should it be simpler, just really trying to read a dummy
+       byte in each page? */
     int my_segnum = STM_SEGMENT->segment_num;
     uintptr_t end_page, first_page = ((uintptr_t)obj) / 4096UL;
 
@@ -1345,7 +1347,7 @@
 #pragma push_macro("STM_SEGMENT")
 #undef STM_PSEGMENT
 #undef STM_SEGMENT
-    assert(get_priv_segment(segment_num)->modification_lock);
+    assert(modification_lock_check_wrlock(segment_num));
 
     struct stm_priv_segment_info_s *pseg = get_priv_segment(segment_num);
     struct list_s *list = pseg->modified_old_objects;
@@ -1407,9 +1409,9 @@
             _reset_object_cards(pseg, item, CARD_CLEAR, false, false);
         });
 
-    acquire_modification_lock(segment_num);
+    acquire_modification_lock_wr(segment_num);
     reset_modified_from_backup_copies(segment_num);
-    release_modification_lock(segment_num);
+    release_modification_lock_wr(segment_num);
     _verify_cards_cleared_in_all_lists(pseg);
 
     stm_thread_local_t *tl = pseg->pub.running_thread;
diff --git a/rpython/translator/stm/src_stm/stm/core.h b/rpython/translator/stm/src_stm/stm/core.h
--- a/rpython/translator/stm/src_stm/stm/core.h
+++ b/rpython/translator/stm/src_stm/stm/core.h
@@ -74,11 +74,6 @@
 struct stm_priv_segment_info_s {
     struct stm_segment_info_s pub;
 
-    /* lock protecting from concurrent modification of
-       'modified_old_objects', page-revision-changes, ...
-       Always acquired in global order of segments to avoid deadlocks. */
-    uint8_t modification_lock;
-
     /* All the old objects (older than the current transaction) that
        the current transaction attempts to modify.  This is used to
        track the STM status: these are old objects that where written
@@ -297,7 +292,7 @@
 static void synchronize_objects_flush(void);
 
 static void _signal_handler(int sig, siginfo_t *siginfo, void *context);
-static bool _stm_validate();
+static bool _stm_validate(void);
 
 static inline bool was_read_remote(char *base, object_t *obj)
 {
@@ -329,7 +324,7 @@
     spinlock_release(get_priv_segment(segnum)->privatization_lock);
 }
 
-static inline bool all_privatization_locks_acquired()
+static inline bool all_privatization_locks_acquired(void)
 {
 #ifndef NDEBUG
     long l;
@@ -343,7 +338,7 @@
 #endif
 }
 
-static inline void acquire_all_privatization_locks()
+static inline void acquire_all_privatization_locks(void)
 {
     /* XXX: don't do for the sharing seg0 */
     long l;
@@ -352,60 +347,10 @@
     }
 }
 
-static inline void release_all_privatization_locks()
+static inline void release_all_privatization_locks(void)
 {
     long l;
     for (l = NB_SEGMENTS-1; l >= 0; l--) {
         release_privatization_lock(l);
     }
 }
-
-
-
-/* Modification locks are used to prevent copying from a segment
-   where either the revision of some pages is inconsistent with the
-   rest, or the modified_old_objects list is being modified (bk_copys).
-
-   Lock ordering: acquire privatization lock around acquiring a set
-   of modification locks!
-*/
-
-static inline void acquire_modification_lock(int segnum)
-{
-    spinlock_acquire(get_priv_segment(segnum)->modification_lock);
-}
-
-static inline void release_modification_lock(int segnum)
-{
-    spinlock_release(get_priv_segment(segnum)->modification_lock);
-}
-
-static inline void acquire_modification_lock_set(uint64_t seg_set)
-{
-    assert(NB_SEGMENTS <= 64);
-    OPT_ASSERT(seg_set < (1 << NB_SEGMENTS));
-
-    /* acquire locks in global order */
-    int i;
-    for (i = 0; i < NB_SEGMENTS; i++) {
-        if ((seg_set & (1 << i)) == 0)
-            continue;
-
-        spinlock_acquire(get_priv_segment(i)->modification_lock);
-    }
-}
-
-static inline void release_modification_lock_set(uint64_t seg_set)
-{
-    assert(NB_SEGMENTS <= 64);
-    OPT_ASSERT(seg_set < (1 << NB_SEGMENTS));
-
-    int i;
-    for (i = 0; i < NB_SEGMENTS; i++) {
-        if ((seg_set & (1 << i)) == 0)
-            continue;
-
-        assert(get_priv_segment(i)->modification_lock);
-        spinlock_release(get_priv_segment(i)->modification_lock);
-    }
-}
diff --git a/rpython/translator/stm/src_stm/stm/forksupport.c b/rpython/translator/stm/src_stm/stm/forksupport.c
--- a/rpython/translator/stm/src_stm/stm/forksupport.c
+++ b/rpython/translator/stm/src_stm/stm/forksupport.c
@@ -120,6 +120,9 @@
        just release these locks early */
     s_mutex_unlock();
 
+    /* Re-init these locks; might be needed after a fork() */
+    setup_modification_locks();
+
 
     /* Unregister all other stm_thread_local_t, mostly as a way to free
        the memory used by the shadowstacks
diff --git a/rpython/translator/stm/src_stm/stm/gcpage.c b/rpython/translator/stm/src_stm/stm/gcpage.c
--- a/rpython/translator/stm/src_stm/stm/gcpage.c
+++ b/rpython/translator/stm/src_stm/stm/gcpage.c
@@ -681,7 +681,7 @@
     _stm_smallmalloc_sweep();
 }
 
-static void clean_up_commit_log_entries()
+static void clean_up_commit_log_entries(void)
 {
     struct stm_commit_log_entry_s *cl, *next;
 
diff --git a/rpython/translator/stm/src_stm/stm/locks.h b/rpython/translator/stm/src_stm/stm/locks.h
new file mode 100644
--- /dev/null
+++ b/rpython/translator/stm/src_stm/stm/locks.h
@@ -0,0 +1,124 @@
+/* Imported by rpython/translator/stm/import_stmgc.py */
+/* Modification locks protect from concurrent modification of
+   'modified_old_objects', page-revision-changes, ...
+
+   Modification locks are used to prevent copying from a segment
+   where either the revision of some pages is inconsistent with the
+   rest, or the modified_old_objects list is being modified (bk_copys).
+
+   Lock ordering: acquire privatization lock around acquiring a set
+   of modification locks!
+*/
+
+typedef struct {
+    pthread_rwlock_t lock;
+#ifndef NDEBUG
+    volatile bool write_locked;
+#endif
+} modification_lock_t __attribute__((aligned(64)));
+
+static modification_lock_t _modlocks[NB_SEGMENTS - 1];
+
+
+static void setup_modification_locks(void)
+{
+    int i;
+    for (i = 1; i < NB_SEGMENTS; i++) {
+        if (pthread_rwlock_init(&_modlocks[i - 1].lock, NULL) != 0)
+            stm_fatalerror("pthread_rwlock_init: %m");
+    }
+}
+
+static void teardown_modification_locks(void)
+{
+    int i;
+    for (i = 1; i < NB_SEGMENTS; i++)
+        pthread_rwlock_destroy(&_modlocks[i - 1].lock);
+    memset(_modlocks, 0, sizeof(_modlocks));
+}
+
+
+static inline void acquire_modification_lock_wr(int segnum)
+{
+    if (UNLIKELY(pthread_rwlock_wrlock(&_modlocks[segnum - 1].lock) != 0))
+        stm_fatalerror("pthread_rwlock_wrlock: %m");
+#ifndef NDEBUG
+    assert(!_modlocks[segnum - 1].write_locked);
+    _modlocks[segnum - 1].write_locked = true;
+#endif
+}
+
+static inline void release_modification_lock_wr(int segnum)
+{
+#ifndef NDEBUG
+    assert(_modlocks[segnum - 1].write_locked);
+    _modlocks[segnum - 1].write_locked = false;
+#endif
+    if (UNLIKELY(pthread_rwlock_unlock(&_modlocks[segnum - 1].lock) != 0))
+        stm_fatalerror("pthread_rwlock_unlock(wr): %m");
+}
+
+static void acquire_modification_lock_set(uint64_t readset, int write)
+{
+    /* acquire the modification lock in 'read' mode for all segments
+       in 'readset', plus the modification lock in 'write' mode for
+       the segment number 'write'.
+    */
+    assert(NB_SEGMENTS <= 64);
+    OPT_ASSERT(readset < (1 << NB_SEGMENTS));
+    assert((readset & 1) == 0);       /* segment numbers normally start at 1 */
+    assert(0 <= write && write < NB_SEGMENTS);     /* use 0 to mean "nobody" */
+
+    /* acquire locks in global order */
+    readset |= (1UL << write);
+    int i;
+    for (i = 1; i < NB_SEGMENTS; i++) {
+        if ((readset & (1UL << i)) == 0)
+            continue;
+        if (i == write) {
+            acquire_modification_lock_wr(write);
+        }
+        else {
+            if (UNLIKELY(pthread_rwlock_rdlock(&_modlocks[i - 1].lock) != 0))
+                stm_fatalerror("pthread_rwlock_rdlock: %m");
+        }
+    }
+}
+
+static void release_modification_lock_set(uint64_t readset, int write)
+{
+    assert(NB_SEGMENTS <= 64);
+    OPT_ASSERT(readset < (1 << NB_SEGMENTS));
+
+    /* release lock order does not matter; prefer early release of
+       the write lock */
+    if (write > 0) {
+        release_modification_lock_wr(write);
+        readset &= ~(1UL << write);
+    }
+    int i;
+    for (i = 1; i < NB_SEGMENTS; i++) {
+        if ((readset & (1UL << i)) == 0)
+            continue;
+        if (UNLIKELY(pthread_rwlock_unlock(&_modlocks[i - 1].lock) != 0))
+            stm_fatalerror("pthread_rwlock_unlock(rd): %m");
+    }
+}
+
+#ifndef NDEBUG
+static bool modification_lock_check_rdlock(int segnum)
+{
+    assert(segnum > 0);
+    if (_modlocks[segnum - 1].write_locked)
+        return false;
+    if (pthread_rwlock_trywrlock(&_modlocks[segnum - 1].lock) == 0) {
+        pthread_rwlock_unlock(&_modlocks[segnum - 1].lock);
+        return false;
+    }
+    return true;
+}
+static bool modification_lock_check_wrlock(int segnum)
+{
+    return segnum == 0 || _modlocks[segnum - 1].write_locked;
+}
+#endif
diff --git a/rpython/translator/stm/src_stm/stm/misc.c b/rpython/translator/stm/src_stm/stm/misc.c
--- a/rpython/translator/stm/src_stm/stm/misc.c
+++ b/rpython/translator/stm/src_stm/stm/misc.c
@@ -44,7 +44,7 @@
     return obj->stm_flags & _STM_GCFLAG_CARDS_SET;
 }
 
-long _stm_count_cl_entries()
+long _stm_count_cl_entries(void)
 {
     struct stm_commit_log_entry_s *cl = &commit_log_root;
 
@@ -115,7 +115,7 @@
     return cards[get_index_to_card_index(idx)].rm;
 }
 
-uint8_t _stm_get_transaction_read_version()
+uint8_t _stm_get_transaction_read_version(void)
 {
     return STM_SEGMENT->transaction_read_version;
 }
@@ -124,7 +124,7 @@
 
 static struct stm_commit_log_entry_s *_last_cl_entry;
 static long _last_cl_entry_index;
-void _stm_start_enum_last_cl_entry()
+void _stm_start_enum_last_cl_entry(void)
 {
     _last_cl_entry = &commit_log_root;
     struct stm_commit_log_entry_s *cl = &commit_log_root;
@@ -135,7 +135,7 @@
     _last_cl_entry_index = 0;
 }
 
-object_t *_stm_next_last_cl_entry()
+object_t *_stm_next_last_cl_entry(void)
 {
     if (_last_cl_entry == &commit_log_root)
         return NULL;
@@ -150,7 +150,7 @@
 }
 
 
-void _stm_smallmalloc_sweep_test()
+void _stm_smallmalloc_sweep_test(void)
 {
     acquire_all_privatization_locks();
     _stm_smallmalloc_sweep();
diff --git a/rpython/translator/stm/src_stm/stm/setup.c b/rpython/translator/stm/src_stm/stm/setup.c
--- a/rpython/translator/stm/src_stm/stm/setup.c
+++ b/rpython/translator/stm/src_stm/stm/setup.c
@@ -127,6 +127,7 @@
        private range of addresses.
     */
 
+    setup_modification_locks();
     setup_sync();
     setup_nursery();
     setup_gcpage();
@@ -174,6 +175,7 @@
     teardown_gcpage();
     teardown_smallmalloc();
     teardown_pages();
+    teardown_modification_locks();
 }
 
 static void _shadowstack_trap_page(char *start, int prot)
diff --git a/rpython/translator/stm/src_stm/stmgc.c b/rpython/translator/stm/src_stm/stmgc.c
--- a/rpython/translator/stm/src_stm/stmgc.c
+++ b/rpython/translator/stm/src_stm/stmgc.c
@@ -18,6 +18,7 @@
 #include "stm/marker.h"
 #include "stm/rewind_setjmp.h"
 #include "stm/finalizer.h"
+#include "stm/locks.h"
 #include "stm/misc.c"
 #include "stm/list.c"
 #include "stm/smallmalloc.c"
diff --git a/rpython/translator/stm/src_stm/stmgc.h b/rpython/translator/stm/src_stm/stmgc.h
--- a/rpython/translator/stm/src_stm/stmgc.h
+++ b/rpython/translator/stm/src_stm/stmgc.h
@@ -57,13 +57,16 @@
 typedef struct stm_thread_local_s {
     /* rewind_setjmp's interface */
     rewind_jmp_thread rjthread;
+    /* every thread should handle the shadow stack itself */
     struct stm_shadowentry_s *shadowstack, *shadowstack_base;
-
     /* a generic optional thread-local object */
     object_t *thread_local_obj;
-
+    /* in case this thread runs a transaction that aborts,
+       the following raw region of memory is cleared. */
     char *mem_clear_on_abort;
     size_t mem_bytes_to_clear_on_abort;
+    /* after an abort, some details about the abort are stored there.
+       (this field is not modified on a successful commit) */
     long last_abort__bytes_in_nursery;
     /* the next fields are handled internally by the library */
     int associated_segment_num;
@@ -73,34 +76,22 @@
     void *creating_pthread[2];
 } stm_thread_local_t;
 
-#ifndef _STM_NURSERY_ZEROED
-#define _STM_NURSERY_ZEROED               0
-#endif
 
-#define _STM_GCFLAG_WRITE_BARRIER      0x01
-#define _STM_FAST_ALLOC           (66*1024)
-#define _STM_NSE_SIGNAL_ABORT             1
-#define _STM_NSE_SIGNAL_MAX               2
-
-#define _STM_CARD_MARKED 1      /* should always be 1... */
-#define _STM_GCFLAG_CARDS_SET          0x8
-#define _STM_CARD_BITS                 5   /* must be 5/6/7 for the pypy jit */
-#define _STM_CARD_SIZE                 (1 << _STM_CARD_BITS)
-#define _STM_MIN_CARD_COUNT            17
-#define _STM_MIN_CARD_OBJ_SIZE         (_STM_CARD_SIZE * _STM_MIN_CARD_COUNT)
-
+/* this should use llvm's coldcc calling convention,
+   but it's not exposed to C code so far */
 void _stm_write_slowpath(object_t *);
 void _stm_write_slowpath_card(object_t *, uintptr_t);
 object_t *_stm_allocate_slowpath(ssize_t);
 object_t *_stm_allocate_external(ssize_t);
 void _stm_become_inevitable(const char*);
-void _stm_collectable_safe_point();
+void _stm_collectable_safe_point(void);
 
+/* for tests, but also used in duhton: */
 object_t *_stm_allocate_old(ssize_t size_rounded_up);
 char *_stm_real_address(object_t *o);
 #ifdef STM_TESTS
 #include <stdbool.h>
-uint8_t _stm_get_transaction_read_version();
+uint8_t _stm_get_transaction_read_version(void);
 uint8_t _stm_get_card_value(object_t *obj, long idx);
 bool _stm_was_read(object_t *obj);
 bool _stm_was_written(object_t *obj);
@@ -137,14 +128,32 @@
 long _stm_count_objects_pointing_to_nursery(void);
 object_t *_stm_enum_modified_old_objects(long index);
 object_t *_stm_enum_objects_pointing_to_nursery(long index);
-object_t *_stm_next_last_cl_entry();
-void _stm_start_enum_last_cl_entry();
-long _stm_count_cl_entries();
+object_t *_stm_next_last_cl_entry(void);
+void _stm_start_enum_last_cl_entry(void);
+long _stm_count_cl_entries(void);
 long _stm_count_old_objects_with_cards_set(void);
 object_t *_stm_enum_old_objects_with_cards_set(long index);
 uint64_t _stm_total_allocated(void);
 #endif
 
+
+#ifndef _STM_NURSERY_ZEROED
+#define _STM_NURSERY_ZEROED               0
+#endif
+
+#define _STM_GCFLAG_WRITE_BARRIER      0x01
+#define _STM_FAST_ALLOC           (66*1024)
+#define _STM_NSE_SIGNAL_ABORT             1
+#define _STM_NSE_SIGNAL_MAX               2
+
+#define _STM_CARD_MARKED 1      /* should always be 1... */
+#define _STM_GCFLAG_CARDS_SET          0x8
+#define _STM_CARD_BITS                 5   /* must be 5/6/7 for the pypy jit */
+#define _STM_CARD_SIZE                 (1 << _STM_CARD_BITS)
+#define _STM_MIN_CARD_COUNT            17
+#define _STM_MIN_CARD_OBJ_SIZE         (_STM_CARD_SIZE * _STM_MIN_CARD_COUNT)
+
+
 /* ==================== HELPERS ==================== */
 #ifdef NDEBUG
 #define OPT_ASSERT(cond) do { if (!(cond)) __builtin_unreachable(); } while (0)
@@ -165,30 +174,32 @@
 */
 #define STM_NB_SEGMENTS    4
 
+/* Structure of objects
+   --------------------
 
+   Objects manipulated by the user program, and managed by this library,
+   must start with a "struct object_s" field.  Pointers to any user object
+   must use the "TLPREFIX struct foo *" type --- don't forget TLPREFIX.
+   The best is to use typedefs like above.
+
+   The object_s part contains some fields reserved for the STM library.
+   Right now this is only four bytes.
+*/
 struct object_s {
     uint32_t stm_flags;            /* reserved for the STM library */
 };
 
-extern ssize_t stmcb_size_rounded_up(struct object_s *);
-void stmcb_trace(struct object_s *obj, void visit(object_t **));
-/* a special trace-callback that is only called for the marked
-   ranges of indices (using stm_write_card(o, index)) */
-extern void stmcb_trace_cards(struct object_s *, void (object_t **),
-                              uintptr_t start, uintptr_t stop);
-/* this function will be called on objects that support cards.
-   It returns the base_offset (in bytes) inside the object from
-   where the indices start, and item_size (in bytes) for the size of
-   one item */
-extern void stmcb_get_card_base_itemsize(struct object_s *,
-                                         uintptr_t offset_itemsize[2]);
-/* returns whether this object supports cards. we will only call
-   stmcb_get_card_base_itemsize on objs that do so. */
-extern long stmcb_obj_supports_cards(struct object_s *);
 
-
-
-
+/* The read barrier must be called whenever the object 'obj' is read.
+   It is not required to call it before reading: it can be delayed for a
+   bit, but we must still be in the same "scope": no allocation, no
+   transaction commit, nothing that can potentially collect or do a safe
+   point (like stm_write() on a different object).  Also, if we might
+   have finished the transaction and started the next one, then
+   stm_read() needs to be called again.  It can be omitted if
+   stm_write() is called, or immediately after getting the object from
+   stm_allocate(), as long as the rules above are respected.
+*/
 __attribute__((always_inline))
 static inline void stm_read(object_t *obj)
 {
@@ -199,6 +210,11 @@
 #define _STM_WRITE_CHECK_SLOWPATH(obj)  \
     UNLIKELY(((obj)->stm_flags & _STM_GCFLAG_WRITE_BARRIER) != 0)
 
+/* The write barrier must be called *before* doing any change to the
+   object 'obj'.  If we might have finished the transaction and started
+   the next one, then stm_write() needs to be called again.  It is not
+   necessary to call it immediately after stm_allocate().
+*/
 __attribute__((always_inline))
 static inline void stm_write(object_t *obj)
 {
@@ -206,7 +222,14 @@
         _stm_write_slowpath(obj);
 }
 
-
+/* The following is a GC-optimized barrier that works on the granularity
+   of CARD_SIZE.  It can be used on any array object, but it is only
+   useful with those that were internally marked with GCFLAG_HAS_CARDS.
+   It has the same purpose as stm_write() for TM and allows write-access
+   to a part of an object/array.
+   'index' is the array-item-based position within the object, which
+   is measured in units returned by stmcb_get_card_base_itemsize().
+*/
 __attribute__((always_inline))
 static inline void stm_write_card(object_t *obj, uintptr_t index)
 {
@@ -245,7 +268,34 @@
     }
 }
 
+/* Must be provided by the user of this library.
+   The "size rounded up" must be a multiple of 8 and at least 16.
+   "Tracing" an object means enumerating all GC references in it,
+   by invoking the callback passed as argument.
+*/
+extern ssize_t stmcb_size_rounded_up(struct object_s *);
+void stmcb_trace(struct object_s *obj, void visit(object_t **));
+/* a special trace-callback that is only called for the marked
+   ranges of indices (using stm_write_card(o, index)) */
+extern void stmcb_trace_cards(struct object_s *, void (object_t **),
+                              uintptr_t start, uintptr_t stop);
+/* this function will be called on objects that support cards.
+   It returns the base_offset (in bytes) inside the object from
+   where the indices start, and item_size (in bytes) for the size of
+   one item */
+extern void stmcb_get_card_base_itemsize(struct object_s *,
+                                         uintptr_t offset_itemsize[2]);
+/* returns whether this object supports cards. we will only call
+   stmcb_get_card_base_itemsize on objs that do so. */
+extern long stmcb_obj_supports_cards(struct object_s *);
 
+
+
+
+/* Allocate an object of the given size, which must be a multiple
+   of 8 and at least 16.  In the fast-path, this is inlined to just
+   a few assembler instructions.
+*/
 __attribute__((always_inline))
 static inline object_t *stm_allocate(ssize_t size_rounded_up)
 {
@@ -267,21 +317,48 @@
     return (object_t *)p;
 }
 
-
+/* Allocate a weakref object. Weakref objects have a
+   reference to an object at the byte-offset
+       stmcb_size_rounded_up(obj) - sizeof(void*)
+   You must assign the reference before the next collection may happen.
+   After that, you must not mutate the reference anymore. However,
+   it can become NULL after any GC if the reference dies during that
+   collection.
+   NOTE: For performance, we assume stmcb_size_rounded_up(weakref)==16
+*/
 object_t *stm_allocate_weakref(ssize_t size_rounded_up);
 
 
+/* stm_setup() needs to be called once at the beginning of the program.
+   stm_teardown() can be called at the end, but that's not necessary
+   and rather meant for tests.
+ */
 void stm_setup(void);
 void stm_teardown(void);
 
+/* The size of each shadow stack, in number of entries.
+   Must be big enough to accomodate all STM_PUSH_ROOTs! */
 #define STM_SHADOW_STACK_DEPTH   163840
+
+/* Push and pop roots from/to the shadow stack. Only allowed inside
+   transaction. */
 #define STM_PUSH_ROOT(tl, p)   ((tl).shadowstack++->ss = (object_t *)(p))
 #define STM_POP_ROOT(tl, p)    ((p) = (typeof(p))((--(tl).shadowstack)->ss))
 #define STM_POP_ROOT_RET(tl)   ((--(tl).shadowstack)->ss)
 
+/* Every thread needs to have a corresponding stm_thread_local_t
+   structure.  It may be a "__thread" global variable or something else.
+   Use the following functions at the start and at the end of a thread.
+   The user of this library needs to maintain the two shadowstack fields;
+   at any call to stm_allocate(), these fields should point to a range
+   of memory that can be walked in order to find the stack roots.
+*/
 void stm_register_thread_local(stm_thread_local_t *tl);
 void stm_unregister_thread_local(stm_thread_local_t *tl);
 
+/* At some key places, like the entry point of the thread and in the
+   function with the interpreter's dispatch loop, you need to declare
+   a local variable of type 'rewind_jmp_buf' and call these macros. */
 #define stm_rewind_jmp_enterprepframe(tl, rjbuf)                        \
     rewind_jmp_enterprepframe(&(tl)->rjthread, rjbuf, (tl)->shadowstack)
 #define stm_rewind_jmp_enterframe(tl, rjbuf)       \
@@ -303,37 +380,23 @@
     rewind_jmp_enum_shadowstack(&(tl)->rjthread, callback)
 
 
+/* Starting and ending transactions.  stm_read(), stm_write() and
+   stm_allocate() should only be called from within a transaction.
+   The stm_start_transaction() call returns the number of times it
+   returned, starting at 0.  If it is > 0, then the transaction was
+   aborted and restarted this number of times. */
 long stm_start_transaction(stm_thread_local_t *tl);
 void stm_start_inevitable_transaction(stm_thread_local_t *tl);
-
 void stm_commit_transaction(void);
 
 /* Temporary fix?  Call this outside a transaction.  If there is an
    inevitable transaction running somewhere else, wait until it finishes. */
 void stm_wait_for_current_inevitable_transaction(void);
 
+/* Abort the currently running transaction.  This function never
+   returns: it jumps back to the stm_start_transaction(). */
 void stm_abort_transaction(void) __attribute__((noreturn));
 
-void stm_collect(long level);
-
-long stm_identityhash(object_t *obj);
-long stm_id(object_t *obj);
-void stm_set_prebuilt_identityhash(object_t *obj, long hash);
-
-long stm_can_move(object_t *obj);
-
-object_t *stm_setup_prebuilt(object_t *);
-object_t *stm_setup_prebuilt_weakref(object_t *);
-
-long stm_call_on_abort(stm_thread_local_t *, void *key, void callback(void *));
-long stm_call_on_commit(stm_thread_local_t *, void *key, void callback(void *));
-
-static inline void stm_safe_point(void) {
-    if (STM_SEGMENT->nursery_end <= _STM_NSE_SIGNAL_MAX)
-        _stm_collectable_safe_point();
-}
-
-
 #ifdef STM_NO_AUTOMATIC_SETJMP
 int stm_is_inevitable(void);
 #else
@@ -341,6 +404,10 @@
     return !rewind_jmp_armed(&STM_SEGMENT->running_thread->rjthread);
 }
 #endif
+
+/* Turn the current transaction inevitable.
+   stm_become_inevitable() itself may still abort the transaction instead
+   of returning. */
 static inline void stm_become_inevitable(stm_thread_local_t *tl,
                                          const char* msg) {
     assert(STM_SEGMENT->running_thread == tl);
@@ -348,7 +415,64 @@
         _stm_become_inevitable(msg);
 }
 
+/* Forces a safe-point if needed.  Normally not needed: this is
+   automatic if you call stm_allocate(). */
+static inline void stm_safe_point(void) {
+    if (STM_SEGMENT->nursery_end <= _STM_NSE_SIGNAL_MAX)
+        _stm_collectable_safe_point();
+}
+
+/* Forces a collection. */
+void stm_collect(long level);
+
+
+/* Prepare an immortal "prebuilt" object managed by the GC.  Takes a
+   pointer to an 'object_t', which should not actually be a GC-managed
+   structure but a real static structure.  Returns the equivalent
+   GC-managed pointer.  Works by copying it into the GC pages, following
+   and fixing all pointers it contains, by doing stm_setup_prebuilt() on
+   each of them recursively.  (Note that this will leave garbage in the
+   static structure, but it should never be used anyway.) */
+object_t *stm_setup_prebuilt(object_t *);
+/* The same, if the prebuilt object is actually a weakref. */
+object_t *stm_setup_prebuilt_weakref(object_t *);
+
+/* Hash, id.  The id is just the address of the object (of the address
+   where it *will* be after the next minor collection).  The hash is the
+   same, mangled -- except on prebuilt objects, where it can be
+   controlled for each prebuilt object individually.  (Useful uor PyPy) */
+long stm_identityhash(object_t *obj);
+long stm_id(object_t *obj);
+void stm_set_prebuilt_identityhash(object_t *obj, long hash);
+
+/* Returns 1 if the object can still move (it's in the nursery), or 0
+   otherwise.  After a minor collection no object can move any more. */
+long stm_can_move(object_t *obj);
+
+/* If the current transaction aborts later, invoke 'callback(key)'.  If
+   the current transaction commits, then the callback is forgotten.  You
+   can only register one callback per key.  You can call
+   'stm_call_on_abort(key, NULL)' to cancel an existing callback
+   (returns 0 if there was no existing callback to cancel).
+   Note: 'key' must be aligned to a multiple of 8 bytes. */
+long stm_call_on_abort(stm_thread_local_t *, void *key, void callback(void *));
+/* If the current transaction commits later, invoke 'callback(key)'.  If
+   the current transaction aborts, then the callback is forgotten.  Same
+   restrictions as stm_call_on_abort().  If the transaction is or becomes
+   inevitable, 'callback(key)' is called immediately. */
+long stm_call_on_commit(stm_thread_local_t *, void *key, void callback(void *));
+
+
+/* Similar to stm_become_inevitable(), but additionally suspend all
+   other threads.  A very heavy-handed way to make sure that no other
+   transaction is running concurrently.  Avoid as much as possible.
+   Other transactions will continue running only after this transaction
+   commits.  (xxx deprecated and may be removed) */
 void stm_become_globally_unique_transaction(stm_thread_local_t *tl, const char *msg);
+
+/* Moves the transaction forward in time by validating the read and
+   write set with all commits that happened since the last validation
+   (explicit or implicit). */
 void stm_validate(void);
 
 /* Temporarily stop all the other threads, by waiting until they
@@ -407,8 +531,8 @@
 /* The markers pushed in the shadowstack are an odd number followed by a
    regular object pointer. */
 typedef struct {
-    uintptr_t odd_number;
-    object_t *object;
+    uintptr_t odd_number;  /* marker odd number, or 0 if marker is missing */
+    object_t *object;      /* marker object, or NULL if marker is missing */
 } stm_loc_marker_t;
 extern void (*stmcb_timing_event)(stm_thread_local_t *tl, /* the local thread */
                                   enum stm_event_e event,


More information about the pypy-commit mailing list