[pypy-commit] pypy stmgc-c7: import stmgc/194265597fad [hashtable]

arigo noreply at buildbot.pypy.org
Mon Nov 10 16:15:16 CET 2014


Author: Armin Rigo <arigo at tunes.org>
Branch: stmgc-c7
Changeset: r74428:f3d738ba20db
Date: 2014-11-10 16:14 +0100
http://bitbucket.org/pypy/pypy/changeset/f3d738ba20db/

Log:	import stmgc/194265597fad [hashtable]

diff --git a/rpython/translator/stm/src_stm/revision b/rpython/translator/stm/src_stm/revision
--- a/rpython/translator/stm/src_stm/revision
+++ b/rpython/translator/stm/src_stm/revision
@@ -1,1 +1,1 @@
-b27bcdd6bc87
+194265597fad
diff --git a/rpython/translator/stm/src_stm/stm/hashtable.c b/rpython/translator/stm/src_stm/stm/hashtable.c
new file mode 100644
--- /dev/null
+++ b/rpython/translator/stm/src_stm/stm/hashtable.c
@@ -0,0 +1,380 @@
+/* Imported by rpython/translator/stm/import_stmgc.py */
+/*
+Design of stmgc's "hashtable" objects
+=====================================
+
+A "hashtable" is theoretically a lazily-filled array of objects of
+length 2**64.  Initially it is full of NULLs.  It's obviously
+implemented as a dictionary in which NULL objects are not needed.
+
+The only operations on a hashtable are reading or writing an object at
+a given index.
+
+There are two markers for every index (a read and a write marker).
+This is unlike regular arrays, which have only two markers in total.
+
+
+Implementation
+--------------
+
+First idea: have the hashtable in raw memory, pointing to "entry"
+objects.  The entry objects themselves point to the user-specified
+objects, and they have the read/write markers.  Every entry object
+itself, once created, stays around.  It is only removed by the next
+major GC if it points to NULL and its read/write markers are not set
+in any currently-running transaction.
+*/
+
+
+uint32_t stm_hashtable_entry_userdata;
+
+
+#define INITIAL_HASHTABLE_SIZE   8
+#define PERTURB_SHIFT            5
+#define RESIZING_LOCK            0
+
+typedef struct {
+    uintptr_t mask;
+
+    /* 'resize_counter' start at an odd value, and is decremented (by
+       6) for every new item put in 'items'.  When it crosses 0, we
+       instead allocate a bigger table and change 'resize_counter' to
+       be a regular pointer to it (which is then even).  The whole
+       structure is immutable then.
+
+       The field 'resize_counter' also works as a write lock: changes
+       go via the intermediate value RESIZING_LOCK (0).
+    */
+    uintptr_t resize_counter;
+
+    stm_hashtable_entry_t *items[INITIAL_HASHTABLE_SIZE];
+} stm_hashtable_table_t;
+
+#define IS_EVEN(p) (((p) & 1) == 0)
+
+struct stm_hashtable_s {
+    stm_hashtable_table_t *table;
+    stm_hashtable_table_t initial_table;
+    uint64_t additions;
+};
+
+
+static inline void init_table(stm_hashtable_table_t *table, uintptr_t itemcount)
+{
+    table->mask = itemcount - 1;
+    table->resize_counter = itemcount * 4 + 1;
+    memset(table->items, 0, itemcount * sizeof(stm_hashtable_entry_t *));
+}
+
+stm_hashtable_t *stm_hashtable_create(void)
+{
+    stm_hashtable_t *hashtable = malloc(sizeof(stm_hashtable_t));
+    assert(hashtable);
+    hashtable->table = &hashtable->initial_table;
+    hashtable->additions = 0;
+    init_table(&hashtable->initial_table, INITIAL_HASHTABLE_SIZE);
+    return hashtable;
+}
+
+void stm_hashtable_free(stm_hashtable_t *hashtable)
+{
+    uintptr_t rc = hashtable->initial_table.resize_counter;
+    free(hashtable);
+    while (IS_EVEN(rc)) {
+        assert(rc != RESIZING_LOCK);
+
+        stm_hashtable_table_t *table = (stm_hashtable_table_t *)rc;
+        rc = table->resize_counter;
+        free(table);
+    }
+}
+
+static bool _stm_was_read_by_anybody(object_t *obj)
+{
+    long i;
+    for (i = 1; i <= NB_SEGMENTS; i++) {
+        char *remote_base = get_segment_base(i);
+        uint8_t remote_version = get_segment(i)->transaction_read_version;
+        if (was_read_remote(remote_base, obj, remote_version))
+            return true;
+    }
+    return false;
+}
+
+#define VOLATILE_HASHTABLE(p)    ((volatile stm_hashtable_t *)(p))
+#define VOLATILE_TABLE(p)  ((volatile stm_hashtable_table_t *)(p))
+
+static void _insert_clean(stm_hashtable_table_t *table,
+                          stm_hashtable_entry_t *entry)
+{
+    uintptr_t mask = table->mask;
+    uintptr_t i = entry->index & mask;
+    if (table->items[i] == NULL) {
+        table->items[i] = entry;
+        return;
+    }
+
+    uintptr_t perturb = entry->index;
+    while (1) {
+        i = (i << 2) + i + perturb + 1;
+        i &= mask;
+        if (table->items[i] == NULL) {
+            table->items[i] = entry;
+            return;
+        }
+
+        perturb >>= PERTURB_SHIFT;
+    }
+}
+
+static void _stm_rehash_hashtable(stm_hashtable_t *hashtable,
+                                  uintptr_t biggercount,
+                                  bool remove_unread)
+{
+    dprintf(("rehash %p to %ld, remove_unread=%d\n",
+             hashtable, biggercount, (int)remove_unread));
+
+    size_t size = (offsetof(stm_hashtable_table_t, items)
+                   + biggercount * sizeof(stm_hashtable_entry_t *));
+    stm_hashtable_table_t *biggertable = malloc(size);
+    assert(biggertable);   // XXX
+
+    stm_hashtable_table_t *table = hashtable->table;
+    table->resize_counter = (uintptr_t)biggertable;
+    /* ^^^ this unlocks the table by writing a non-zero value to
+       table->resize_counter, but the new value is a pointer to the
+       new bigger table, so IS_EVEN() is still true */
+
+    init_table(biggertable, biggercount);
+
+    uintptr_t j, mask = table->mask;
+    uintptr_t rc = biggertable->resize_counter;
+    for (j = 0; j <= mask; j++) {
+        stm_hashtable_entry_t *entry = table->items[j];
+        if (entry == NULL)
+            continue;
+        if (remove_unread) {
+            if (entry->object == NULL &&
+                   !_stm_was_read_by_anybody((object_t *)entry))
+                continue;
+        }
+        _insert_clean(biggertable, entry);
+        rc -= 6;
+    }
+    biggertable->resize_counter = rc;
+
+    write_fence();   /* make sure that 'biggertable' is valid here,
+                        and make sure 'table->resize_counter' is updated
+                        ('table' must be immutable from now on). */
+    VOLATILE_HASHTABLE(hashtable)->table = biggertable;
+}
+
+stm_hashtable_entry_t *stm_hashtable_lookup(object_t *hashtableobj,
+                                            stm_hashtable_t *hashtable,
+                                            uintptr_t index)
+{
+    stm_hashtable_table_t *table;
+    uintptr_t mask;
+    uintptr_t i;
+    stm_hashtable_entry_t *entry;
+
+ restart:
+    /* classical dict lookup logic */
+    table = VOLATILE_HASHTABLE(hashtable)->table;
+    mask = table->mask;      /* read-only field */
+    i = index & mask;
+    entry = VOLATILE_TABLE(table)->items[i];
+    if (entry != NULL) {
+        if (entry->index == index)
+            return entry;           /* found at the first try */
+
+        uintptr_t perturb = index;
+        while (1) {
+            i = (i << 2) + i + perturb + 1;
+            i &= mask;
+            entry = VOLATILE_TABLE(table)->items[i];
+            if (entry != NULL) {
+                if (entry->index == index)
+                    return entry;    /* found */
+            }
+            else
+                break;
+            perturb >>= PERTURB_SHIFT;
+        }
+    }
+    /* here, we didn't find the 'entry' with the correct index. */
+
+    uintptr_t rc = VOLATILE_TABLE(table)->resize_counter;
+
+    /* if rc is RESIZING_LOCK (which is 0, so even), a concurrent thread
+       is writing to the hashtable.  Or, if rc is another even number, it is
+       actually a pointer to the next version of the table, installed
+       just now.  In both cases, this thread must simply spin loop.
+    */
+    if (IS_EVEN(rc)) {
+        spin_loop();
+        goto restart;
+    }
+    /* in the other cases, we need to grab the RESIZING_LOCK.
+     */
+    if (!__sync_bool_compare_and_swap(&table->resize_counter,
+                                      rc, RESIZING_LOCK)) {
+        goto restart;
+    }
+    /* we now have the lock.  The only table with a non-even value of
+       'resize_counter' should be the last one in the chain, so if we
+       succeeded in locking it, check this. */
+    assert(table == hashtable->table);
+
+    /* Check that 'table->items[i]' is still NULL,
+       i.e. hasn't been populated under our feet.
+    */
+    if (table->items[i] != NULL) {
+        table->resize_counter = rc;    /* unlock */
+        goto restart;
+    }
+    /* if rc is greater than 6, there is enough room for a new
+       item in the current table.
+    */
+    if (rc > 6) {
+        /* we can only enter here once!  If we allocate stuff, we may
+           run the GC, and so 'hashtableobj' might move afterwards. */
+        if (_is_in_nursery(hashtableobj)) {
+            entry = (stm_hashtable_entry_t *)
+                stm_allocate(sizeof(stm_hashtable_entry_t));
+            entry->userdata = stm_hashtable_entry_userdata;
+            entry->index = index;
+            entry->object = NULL;
+        }
+        else {
+            /* for a non-nursery 'hashtableobj', we pretend that the
+               'entry' object we're about to return was already
+               existing all along, with NULL in all segments.  If the
+               caller of this function is going to modify the 'object'
+               field, it will call stm_write(entry) first, which will
+               correctly schedule 'entry' for write propagation.  We
+               do that even if 'hashtableobj' was created by the
+               running transaction: the new 'entry' object is created
+               as if it was older than the transaction.
+
+               Note the following difference: if 'hashtableobj' is
+               still in the nursery (case above), the 'entry' object
+               is also allocated from the nursery, and after a minor
+               collection it ages as an old-but-created-by-the-
+               current-transaction object.  We could try to emulate
+               this here, or to create young 'entry' objects, but
+               doing either of these would require careful
+               synchronization with other pieces of the code that may
+               change.
+            */
+            acquire_privatization_lock();
+            char *p = allocate_outside_nursery_large(
+                          sizeof(stm_hashtable_entry_t));
+            entry = (stm_hashtable_entry_t *)(p - stm_object_pages);
+
+            long j;
+            for (j = 0; j <= NB_SEGMENTS; j++) {
+                struct stm_hashtable_entry_s *e;
+                e = (struct stm_hashtable_entry_s *)
+                        REAL_ADDRESS(get_segment_base(j), entry);
+                e->header.stm_flags = GCFLAG_WRITE_BARRIER;
+                e->userdata = stm_hashtable_entry_userdata;
+                e->index = index;
+                e->object = NULL;
+            }
+            release_privatization_lock();
+        }
+        write_fence();     /* make sure 'entry' is fully initialized here */
+        table->items[i] = entry;
+        hashtable->additions += 1;
+        write_fence();     /* make sure 'table->items' is written here */
+        VOLATILE_TABLE(table)->resize_counter = rc - 6;    /* unlock */
+        return entry;
+    }
+    else {
+        /* if rc is smaller than 6, we must allocate a new bigger table.
+         */
+        uintptr_t biggercount = table->mask + 1;
+        if (biggercount < 50000)
+            biggercount *= 4;
+        else
+            biggercount *= 2;
+        _stm_rehash_hashtable(hashtable, biggercount, /*remove_unread=*/false);
+        goto restart;
+    }
+}
+
+object_t *stm_hashtable_read(object_t *hobj, stm_hashtable_t *hashtable,
+                             uintptr_t key)
+{
+    stm_hashtable_entry_t *e = stm_hashtable_lookup(hobj, hashtable, key);
+    stm_read((object_t *)e);
+    return e->object;
+}
+
+void stm_hashtable_write(object_t *hobj, stm_hashtable_t *hashtable,
+                         uintptr_t key, object_t *nvalue,
+                         stm_thread_local_t *tl)
+{
+    STM_PUSH_ROOT(*tl, nvalue);
+    stm_hashtable_entry_t *e = stm_hashtable_lookup(hobj, hashtable, key);
+    stm_write((object_t *)e);
+    STM_POP_ROOT(*tl, nvalue);
+    e->object = nvalue;
+}
+
+static void _stm_compact_hashtable(stm_hashtable_t *hashtable)
+{
+    stm_hashtable_table_t *table = hashtable->table;
+    assert(!IS_EVEN(table->resize_counter));
+
+    if (hashtable->additions * 4 > table->mask) {
+        hashtable->additions = 0;
+        uintptr_t initial_rc = (table->mask + 1) * 4 + 1;
+        uintptr_t num_entries_times_6 = initial_rc - table->resize_counter;
+        uintptr_t count = INITIAL_HASHTABLE_SIZE;
+        while (count * 4 < num_entries_times_6)
+            count *= 2;
+        /* sanity-check: 'num_entries_times_6 < initial_rc', and so 'count'
+           can never grow larger than the current table size. */
+        assert(count <= table->mask + 1);
+
+        _stm_rehash_hashtable(hashtable, count, /*remove_unread=*/true);
+    }
+
+    table = hashtable->table;
+    assert(!IS_EVEN(table->resize_counter));
+
+    if (table != &hashtable->initial_table) {
+        uintptr_t rc = hashtable->initial_table.resize_counter;
+        while (1) {
+            assert(IS_EVEN(rc));
+            assert(rc != RESIZING_LOCK);
+
+            stm_hashtable_table_t *old_table = (stm_hashtable_table_t *)rc;
+            if (old_table == table)
+                break;
+            rc = old_table->resize_counter;
+            free(old_table);
+        }
+        hashtable->initial_table.resize_counter = (uintptr_t)table;
+    }
+}
+
+void stm_hashtable_tracefn(stm_hashtable_t *hashtable, void trace(object_t **))
+{
+    if (trace == TRACE_FOR_MAJOR_COLLECTION)
+        _stm_compact_hashtable(hashtable);
+
+    stm_hashtable_table_t *table;
+    table = VOLATILE_HASHTABLE(hashtable)->table;
+
+    uintptr_t j, mask = table->mask;
+    for (j = 0; j <= mask; j++) {
+        stm_hashtable_entry_t *volatile *pentry;
+        pentry = &VOLATILE_TABLE(table)->items[j];
+        if (*pentry != NULL) {
+            trace((object_t **)pentry);
+        }
+    }
+}
diff --git a/rpython/translator/stm/src_stm/stmgc.c b/rpython/translator/stm/src_stm/stmgc.c
--- a/rpython/translator/stm/src_stm/stmgc.c
+++ b/rpython/translator/stm/src_stm/stmgc.c
@@ -17,7 +17,6 @@
 #include "stm/weakref.h"
 #include "stm/marker.h"
 #include "stm/finalizer.h"
-#include "stm/hashtable.h"
 
 #include "stm/misc.c"
 #include "stm/list.c"
diff --git a/rpython/translator/stm/src_stm/stmgc.h b/rpython/translator/stm/src_stm/stmgc.h
--- a/rpython/translator/stm/src_stm/stmgc.h
+++ b/rpython/translator/stm/src_stm/stmgc.h
@@ -535,8 +535,12 @@
    If you want to embed the hashtable inside an 'object_t' you
    probably need a light finalizer to do the freeing. */
 typedef struct stm_hashtable_s stm_hashtable_t;
+typedef TLPREFIX struct stm_hashtable_entry_s stm_hashtable_entry_t;
+
 stm_hashtable_t *stm_hashtable_create(void);
 void stm_hashtable_free(stm_hashtable_t *);
+stm_hashtable_entry_t *stm_hashtable_lookup(object_t *, stm_hashtable_t *,
+                                            uintptr_t key);
 object_t *stm_hashtable_read(object_t *, stm_hashtable_t *, uintptr_t key);
 void stm_hashtable_write(object_t *, stm_hashtable_t *, uintptr_t key,
                          object_t *nvalue, stm_thread_local_t *);


More information about the pypy-commit mailing list