[pypy-commit] stmgc default: Merge with c8-hashtable

Raemi noreply at buildbot.pypy.org
Thu Apr 9 16:03:35 CEST 2015


Author: Remi Meier <remi.meier at inf.ethz.ch>
Branch: 
Changeset: r1753:a92ba2549987
Date: 2015-04-09 15:18 +0200
http://bitbucket.org/pypy/stmgc/changeset/a92ba2549987/

Log:	Merge with c8-hashtable

diff --git a/c8/stm/core.c b/c8/stm/core.c
--- a/c8/stm/core.c
+++ b/c8/stm/core.c
@@ -280,8 +280,14 @@
         struct stm_undo_s *end = undo + cl->written_count;
         for (; undo < end; undo++) {
             if (undo->type == TYPE_POSITION_MARKER) {
-                fprintf(stderr, "    marker %p %lu\n",
-                        undo->marker_object, undo->marker_odd_number);
+                if (undo->type2 == TYPE_MODIFIED_HASHTABLE) {
+                    fprintf(stderr, "    hashtable %p\n",
+                            undo->modif_hashtable);
+                }
+                else {
+                    fprintf(stderr, "    marker %p %lu\n",
+                            undo->marker_object, undo->marker_odd_number);
+                }
                 continue;
             }
             fprintf(stderr, "    obj %p, size %d, ofs %lu: ", undo->object,
@@ -383,21 +389,40 @@
                     struct stm_undo_s *undo = cl->written;
                     struct stm_undo_s *end = cl->written + cl->written_count;
                     for (; undo < end; undo++) {
-                        if (undo->type == TYPE_POSITION_MARKER)
+                        object_t *obj;
+
+                        if (undo->type != TYPE_POSITION_MARKER) {
+                            /* common case: 'undo->object' was written to
+                               in this past commit, so we must check that
+                               it was not read by us. */
+                            obj = undo->object;
+                        }
+                        else if (undo->type2 != TYPE_MODIFIED_HASHTABLE)
                             continue;
-                        if (_stm_was_read(undo->object)) {
-                            /* first reset all modified objects from the backup
-                               copies as soon as the first conflict is detected;
-                               then we will proceed below to update our segment from
-                               the old (but unmodified) version to the newer version.
-                            */
-                            reset_modified_from_backup_copies(my_segnum);
-                            timing_write_read_contention(cl->written, undo);
-                            needs_abort = true;
+                        else {
+                            /* the previous stm_undo_s is about a written
+                               'entry' object, which belongs to the hashtable
+                               given now.  Check that we haven't read the
+                               hashtable (via stm_hashtable_list()). */
+                            obj = undo->modif_hashtable;
+                        }
 
-                            dprintf(("_stm_validate() failed for obj %p\n", undo->object));
-                            break;
-                        }
+                        if (LIKELY(!_stm_was_read(obj)))
+                            continue;
+
+                        /* conflict! */
+                        dprintf(("_stm_validate() failed for obj %p\n", obj));
+
+                        /* first reset all modified objects from the backup
+                           copies as soon as the first conflict is detected;
+                           then we will proceed below to update our segment
+                           from the old (but unmodified) version to the newer
+                           version.
+                        */
+                        reset_modified_from_backup_copies(my_segnum);
+                        timing_write_read_contention(cl->written, undo);
+                        needs_abort = true;
+                        break;
                     }
                 }
 
diff --git a/c8/stm/core.h b/c8/stm/core.h
--- a/c8/stm/core.h
+++ b/c8/stm/core.h
@@ -190,9 +190,15 @@
         uintptr_t marker_odd_number; /* the odd number part of the marker */
         object_t *marker_object;     /* the object part of the marker */
     };
+    struct {
+        intptr_t type1;             /* TYPE_POSITION_MARKER (again) */
+        intptr_t type2;             /* TYPE_MODIFIED_HASHTABLE */
+        object_t *modif_hashtable;  /* modified entry is previous stm_undo_s */
+    };
   };
 };
 #define TYPE_POSITION_MARKER    (-1)
+#define TYPE_MODIFIED_HASHTABLE (-2)
 #define SLICE_OFFSET(slice)  ((slice) >> 16)
 #define SLICE_SIZE(slice)    ((int)((slice) & 0xFFFF))
 #define NEW_SLICE(offset, size) (((uint64_t)(offset)) << 16 | (size))
@@ -251,6 +257,14 @@
     return stm_object_pages + segment_num * (NB_PAGES * 4096UL);
 }
 
+static inline long get_num_segment_containing_address(char *addr)
+{
+    uintptr_t delta = addr - stm_object_pages;
+    uintptr_t result = delta / (NB_PAGES * 4096UL);
+    assert(result < NB_SEGMENTS);
+    return result;
+}
+
 static inline
 struct stm_segment_info_s *get_segment(long segment_num) {
     return (struct stm_segment_info_s *)REAL_ADDRESS(
@@ -285,6 +299,17 @@
 static void _signal_handler(int sig, siginfo_t *siginfo, void *context);
 static bool _stm_validate();
 
+static inline bool was_read_remote(char *base, object_t *obj)
+{
+    uint8_t other_transaction_read_version =
+        ((struct stm_segment_info_s *)REAL_ADDRESS(base, STM_PSEGMENT))
+            ->transaction_read_version;
+    uint8_t rm = ((struct stm_read_marker_s *)
+                  (base + (((uintptr_t)obj) >> 4)))->rm;
+    assert(rm <= other_transaction_read_version);
+    return rm == other_transaction_read_version;
+}
+
 static inline void _duck(void) {
     /* put a call to _duck() between two instructions that set 0 into
        a %gs-prefixed address and that may otherwise be replaced with
diff --git a/c8/stm/gcpage.c b/c8/stm/gcpage.c
--- a/c8/stm/gcpage.c
+++ b/c8/stm/gcpage.c
@@ -130,6 +130,58 @@
     return o;
 }
 
+static void _fill_preexisting_slice(long segnum, char *dest,
+                                    const char *src, uintptr_t size)
+{
+    uintptr_t np = dest - get_segment_base(segnum);
+    if (get_page_status_in(segnum, np / 4096) != PAGE_NO_ACCESS)
+        memcpy(dest, src, size);
+}
+
+object_t *stm_allocate_preexisting(ssize_t size_rounded_up,
+                                   const char *initial_data)
+{
+    stm_char *np = allocate_outside_nursery_large(size_rounded_up);
+    uintptr_t nobj = (uintptr_t)np;
+    dprintf(("allocate_preexisting: %p\n", (object_t *)nobj));
+
+    char *nobj_seg0 = stm_object_pages + nobj;
+    memcpy(nobj_seg0, initial_data, size_rounded_up);
+    ((struct object_s *)nobj_seg0)->stm_flags = GCFLAG_WRITE_BARRIER;
+
+    acquire_privatization_lock(STM_SEGMENT->segment_num);
+    DEBUG_EXPECT_SEGFAULT(false);
+
+    long j;
+    for (j = 1; j < NB_SEGMENTS; j++) {
+        const char *src = nobj_seg0;
+        char *dest = get_segment_base(j) + nobj;
+        char *end = dest + size_rounded_up;
+
+        while (((uintptr_t)dest) / 4096 != ((uintptr_t)end - 1) / 4096) {
+            uintptr_t count = 4096 - (((uintptr_t)dest) & 4095);
+            _fill_preexisting_slice(j, dest, src, count);
+            src += count;
+            dest += count;
+        }
+        _fill_preexisting_slice(j, dest, src, end - dest);
+
+#ifdef STM_TESTS
+        /* can't really enable this check outside tests, because there is
+           a change that the transaction_state changes in parallel */
+        if (get_priv_segment(j)->transaction_state != TS_NONE) {
+            assert(!was_read_remote(get_segment_base(j), (object_t *)nobj));
+        }
+#endif
+    }
+
+    DEBUG_EXPECT_SEGFAULT(true);
+    release_privatization_lock(STM_SEGMENT->segment_num);
+
+    write_fence();     /* make sure 'nobj' is fully initialized from
+                          all threads here */
+    return (object_t *)nobj;
+}
 
 /************************************************************/
 
@@ -249,6 +301,8 @@
 }
 
 
+#define TRACE_FOR_MAJOR_COLLECTION  (&mark_record_trace)
+
 static void mark_and_trace(
     object_t *obj,
     char *segment_base, /* to trace obj in */
@@ -408,7 +462,8 @@
         struct stm_undo_s *modified = (struct stm_undo_s *)lst->items;
         struct stm_undo_s *end = (struct stm_undo_s *)(lst->items + lst->count);
         for (; modified < end; modified++) {
-            if (modified->type == TYPE_POSITION_MARKER)
+            if (modified->type == TYPE_POSITION_MARKER &&
+                    modified->type2 != TYPE_MODIFIED_HASHTABLE)
                 mark_visit_possibly_new_object(modified->marker_object, pseg);
         }
     }
@@ -541,6 +596,31 @@
                 list_set_item(lst, n, list_pop_item(lst));
             }
         }
+
+        /* Remove from 'modified_old_objects' all old hashtables that die */
+        {
+            lst = pseg->modified_old_objects;
+            uintptr_t j, k = 0, limit = list_count(lst);
+            for (j = 0; j < limit; j += 3) {
+                uintptr_t e0 = list_item(lst, j + 0);
+                uintptr_t e1 = list_item(lst, j + 1);
+                uintptr_t e2 = list_item(lst, j + 2);
+                if (e0 == TYPE_POSITION_MARKER &&
+                    e1 == TYPE_MODIFIED_HASHTABLE &&
+                    !mark_visited_test((object_t *)e2)) {
+                    /* hashtable object dies */
+                }
+                else {
+                    if (j != k) {
+                        list_set_item(lst, k + 0, e0);
+                        list_set_item(lst, k + 1, e1);
+                        list_set_item(lst, k + 2, e2);
+                    }
+                    k += 3;
+                }
+            }
+            lst->count = k;
+        }
     }
 #pragma pop_macro("STM_SEGMENT")
 #pragma pop_macro("STM_PSEGMENT")
diff --git a/c8/stm/hashtable.c b/c8/stm/hashtable.c
new file mode 100644
--- /dev/null
+++ b/c8/stm/hashtable.c
@@ -0,0 +1,532 @@
+/*
+Design of stmgc's "hashtable" objects
+=====================================
+
+A "hashtable" is theoretically a lazily-filled array of objects of
+length 2**64.  Initially it is full of NULLs.  It's obviously
+implemented as a dictionary in which NULL objects are not needed.
+
+A real dictionary can be implemented on top of it, by using the index
+`hash(key)` in the hashtable, and storing a list of `(key, value)`
+pairs at that index (usually only one, unless there is a hash
+collision).
+
+The main operations on a hashtable are reading or writing an object at a
+given index.  It also supports fetching the list of non-NULL entries.
+
+There are two markers for every index (a read and a write marker).
+This is unlike regular arrays, which have only two markers in total.
+
+Additionally, we use the read marker for the hashtable object itself
+to mean "we have read the complete list of keys".  This plays the role
+of a "global" read marker: when any thread adds a new key/value object
+to the hashtable, this new object's read marker is initialized with a
+copy of the "global" read marker --- in all segments.
+
+
+Implementation
+--------------
+
+First idea: have the hashtable in raw memory, pointing to "entry"
+objects (which are regular, GC- and STM-managed objects).  The entry
+objects themselves point to the user-specified objects.  The entry
+objects hold the read/write markers.  Every entry object, once
+created, stays around.  It is only removed by the next major GC if it
+points to NULL and its read/write markers are not set in any
+currently-running transaction.
+
+References
+----------
+
+Inspired by: http://ppl.stanford.edu/papers/podc011-bronson.pdf
+*/
+
+
+uint32_t stm_hashtable_entry_userdata;
+
+
+#define INITIAL_HASHTABLE_SIZE   8
+#define PERTURB_SHIFT            5
+#define RESIZING_LOCK            0
+
+typedef struct {
+    uintptr_t mask;
+
+    /* 'resize_counter' start at an odd value, and is decremented (by
+       6) for every new item put in 'items'.  When it crosses 0, we
+       instead allocate a bigger table and change 'resize_counter' to
+       be a regular pointer to it (which is then even).  The whole
+       structure is immutable then.
+
+       The field 'resize_counter' also works as a write lock: changes
+       go via the intermediate value RESIZING_LOCK (0).
+    */
+    uintptr_t resize_counter;
+
+    stm_hashtable_entry_t *items[INITIAL_HASHTABLE_SIZE];
+} stm_hashtable_table_t;
+
+#define IS_EVEN(p) (((p) & 1) == 0)
+
+struct stm_hashtable_s {
+    stm_hashtable_table_t *table;
+    stm_hashtable_table_t initial_table;
+    uint64_t additions;
+};
+
+
+static inline void init_table(stm_hashtable_table_t *table, uintptr_t itemcount)
+{
+    table->mask = itemcount - 1;
+    table->resize_counter = itemcount * 4 + 1;
+    memset(table->items, 0, itemcount * sizeof(stm_hashtable_entry_t *));
+}
+
+stm_hashtable_t *stm_hashtable_create(void)
+{
+    stm_hashtable_t *hashtable = malloc(sizeof(stm_hashtable_t));
+    assert(hashtable);
+    hashtable->table = &hashtable->initial_table;
+    hashtable->additions = 0;
+    init_table(&hashtable->initial_table, INITIAL_HASHTABLE_SIZE);
+    return hashtable;
+}
+
+void stm_hashtable_free(stm_hashtable_t *hashtable)
+{
+    uintptr_t rc = hashtable->initial_table.resize_counter;
+    free(hashtable);
+    while (IS_EVEN(rc)) {
+        assert(rc != RESIZING_LOCK);
+
+        stm_hashtable_table_t *table = (stm_hashtable_table_t *)rc;
+        rc = table->resize_counter;
+        free(table);
+    }
+}
+
+static bool _stm_was_read_by_anybody(object_t *obj)
+{
+    /* can only be safely called during major GC, when all other threads
+       are suspended */
+    long i;
+    for (i = 1; i < NB_SEGMENTS; i++) {
+        if (get_priv_segment(i)->transaction_state == TS_NONE)
+            continue;
+        if (was_read_remote(get_segment_base(i), obj))
+            return true;
+    }
+    return false;
+}
+
+#define VOLATILE_HASHTABLE(p)    ((volatile stm_hashtable_t *)(p))
+#define VOLATILE_TABLE(p)  ((volatile stm_hashtable_table_t *)(p))
+
+static void _insert_clean(stm_hashtable_table_t *table,
+                          stm_hashtable_entry_t *entry,
+                          uintptr_t index)
+{
+    uintptr_t mask = table->mask;
+    uintptr_t i = index & mask;
+    if (table->items[i] == NULL) {
+        table->items[i] = entry;
+        return;
+    }
+
+    uintptr_t perturb = index;
+    while (1) {
+        i = (i << 2) + i + perturb + 1;
+        i &= mask;
+        if (table->items[i] == NULL) {
+            table->items[i] = entry;
+            return;
+        }
+
+        perturb >>= PERTURB_SHIFT;
+    }
+}
+
+static void _stm_rehash_hashtable(stm_hashtable_t *hashtable,
+                                  uintptr_t biggercount,
+                                  char *segment_base)
+{
+    dprintf(("rehash %p to size %ld, segment_base=%p\n",
+             hashtable, biggercount, segment_base));
+
+    size_t size = (offsetof(stm_hashtable_table_t, items)
+                   + biggercount * sizeof(stm_hashtable_entry_t *));
+    stm_hashtable_table_t *biggertable = malloc(size);
+    assert(biggertable);   // XXX
+
+    stm_hashtable_table_t *table = hashtable->table;
+    table->resize_counter = (uintptr_t)biggertable;
+    /* ^^^ this unlocks the table by writing a non-zero value to
+       table->resize_counter, but the new value is a pointer to the
+       new bigger table, so IS_EVEN() is still true */
+    assert(IS_EVEN(table->resize_counter));
+
+    init_table(biggertable, biggercount);
+
+    uintptr_t j, mask = table->mask;
+    uintptr_t rc = biggertable->resize_counter;
+    for (j = 0; j <= mask; j++) {
+        stm_hashtable_entry_t *entry = table->items[j];
+        if (entry == NULL)
+            continue;
+        if (segment_base != NULL) {
+            if (((struct stm_hashtable_entry_s *)
+                       REAL_ADDRESS(segment_base, entry))->object == NULL &&
+                   !_stm_was_read_by_anybody((object_t *)entry)) {
+                dprintf(("  removing dead %p\n", entry));
+                continue;
+            }
+        }
+
+        uintptr_t eindex;
+        if (segment_base == NULL)
+            eindex = entry->index;   /* read from STM_SEGMENT */
+        else
+            eindex = ((struct stm_hashtable_entry_s *)
+                       REAL_ADDRESS(segment_base, entry))->index;
+
+        dprintf(("  insert_clean %p at index=%ld\n",
+                 entry, eindex));
+        _insert_clean(biggertable, entry, eindex);
+        assert(rc > 6);
+        rc -= 6;
+    }
+    biggertable->resize_counter = rc;
+
+    write_fence();   /* make sure that 'biggertable' is valid here,
+                        and make sure 'table->resize_counter' is updated
+                        ('table' must be immutable from now on). */
+    VOLATILE_HASHTABLE(hashtable)->table = biggertable;
+}
+
+stm_hashtable_entry_t *stm_hashtable_lookup(object_t *hashtableobj,
+                                            stm_hashtable_t *hashtable,
+                                            uintptr_t index)
+{
+    stm_hashtable_table_t *table;
+    uintptr_t mask;
+    uintptr_t i;
+    stm_hashtable_entry_t *entry;
+
+ restart:
+    /* classical dict lookup logic */
+    table = VOLATILE_HASHTABLE(hashtable)->table;
+    mask = table->mask;      /* read-only field */
+    i = index & mask;
+    entry = VOLATILE_TABLE(table)->items[i];
+    if (entry != NULL) {
+        if (entry->index == index)
+            return entry;           /* found at the first try */
+
+        uintptr_t perturb = index;
+        while (1) {
+            i = (i << 2) + i + perturb + 1;
+            i &= mask;
+            entry = VOLATILE_TABLE(table)->items[i];
+            if (entry != NULL) {
+                if (entry->index == index)
+                    return entry;    /* found */
+            }
+            else
+                break;
+            perturb >>= PERTURB_SHIFT;
+        }
+    }
+    /* here, we didn't find the 'entry' with the correct index.  Note
+       that even if the same 'table' is modified or resized by other
+       threads concurrently, any new item found from a race condition
+       would anyway contain NULL in the present segment (ensured by
+       the first write_fence() below).  If the 'table' grows an entry
+       just after we checked above, then we go ahead and lock the
+       table; but after we get the lock, we will notice the new entry
+       (ensured by the second write_fence() below) and restart the
+       whole process.
+     */
+
+    uintptr_t rc = VOLATILE_TABLE(table)->resize_counter;
+
+    /* if rc is RESIZING_LOCK (which is 0, so even), a concurrent thread
+       is writing to the hashtable.  Or, if rc is another even number, it is
+       actually a pointer to the next version of the table, installed
+       just now.  In both cases, this thread must simply spin loop.
+    */
+    if (IS_EVEN(rc)) {
+        spin_loop();
+        goto restart;
+    }
+    /* in the other cases, we need to grab the RESIZING_LOCK.
+     */
+    if (!__sync_bool_compare_and_swap(&table->resize_counter,
+                                      rc, RESIZING_LOCK)) {
+        goto restart;
+    }
+    /* we now have the lock.  The only table with a non-even value of
+       'resize_counter' should be the last one in the chain, so if we
+       succeeded in locking it, check this. */
+    assert(table == hashtable->table);
+
+    /* Check that 'table->items[i]' is still NULL,
+       i.e. hasn't been populated under our feet.
+    */
+    if (table->items[i] != NULL) {
+        table->resize_counter = rc;    /* unlock */
+        goto restart;
+    }
+    /* if rc is greater than 6, there is enough room for a new
+       item in the current table.
+    */
+    if (rc > 6) {
+        /* we can only enter here once!  If we allocate stuff, we may
+           run the GC, and so 'hashtableobj' might move afterwards. */
+        if (_is_in_nursery(hashtableobj)) {
+            entry = (stm_hashtable_entry_t *)
+                stm_allocate(sizeof(stm_hashtable_entry_t));
+            entry->userdata = stm_hashtable_entry_userdata;
+            entry->index = index;
+            entry->object = NULL;
+        }
+        else {
+            /* for a non-nursery 'hashtableobj', we pretend that the
+               'entry' object we're about to return was already
+               existing all along, with NULL in all segments.  If the
+               caller of this function is going to modify the 'object'
+               field, it will call stm_write(entry) first, which will
+               correctly schedule 'entry' for write propagation.  We
+               do that even if 'hashtableobj' was created by the
+               running transaction: the new 'entry' object is created
+               as if it was older than the transaction.
+
+               Note the following difference: if 'hashtableobj' is
+               still in the nursery (case above), the 'entry' object
+               is also allocated from the nursery, and after a minor
+               collection it ages as an old-but-created-by-the-
+               current-transaction object.  We could try to emulate
+               this here, or to create young 'entry' objects, but
+               doing either of these would require careful
+               synchronization with other pieces of the code that may
+               change.
+            */
+            struct stm_hashtable_entry_s initial = {
+                .userdata = stm_hashtable_entry_userdata,
+                .index = index,
+                .object = NULL
+            };
+            entry = (stm_hashtable_entry_t *)
+                stm_allocate_preexisting(sizeof(stm_hashtable_entry_t),
+                                         (char *)&initial.header);
+            hashtable->additions++;
+        }
+        table->items[i] = entry;
+        write_fence();     /* make sure 'table->items' is written here */
+        VOLATILE_TABLE(table)->resize_counter = rc - 6;    /* unlock */
+        return entry;
+    }
+    else {
+        /* if rc is smaller than 6, we must allocate a new bigger table.
+         */
+        uintptr_t biggercount = table->mask + 1;
+        if (biggercount < 50000)
+            biggercount *= 4;
+        else
+            biggercount *= 2;
+        _stm_rehash_hashtable(hashtable, biggercount, /*segment_base=*/NULL);
+        goto restart;
+    }
+}
+
+object_t *stm_hashtable_read(object_t *hobj, stm_hashtable_t *hashtable,
+                             uintptr_t key)
+{
+    stm_hashtable_entry_t *e = stm_hashtable_lookup(hobj, hashtable, key);
+    stm_read((object_t *)e);
+    return e->object;
+}
+
+void stm_hashtable_write_entry(object_t *hobj, stm_hashtable_entry_t *entry,
+                               object_t *nvalue)
+{
+    if (_STM_WRITE_CHECK_SLOWPATH((object_t *)entry)) {
+
+        stm_write((object_t *)entry);
+
+        uintptr_t i = list_count(STM_PSEGMENT->modified_old_objects);
+        if (i > 0 && list_item(STM_PSEGMENT->modified_old_objects, i - 3)
+                     == (uintptr_t)entry) {
+            /* The stm_write() above recorded a write to 'entry'.  Here,
+               we add another stm_undo_s to modified_old_objects with
+               TYPE_MODIFIED_HASHTABLE.  It is ignored everywhere except
+               in _stm_validate().
+
+               The goal is that this TYPE_MODIFIED_HASHTABLE ends up in
+               the commit log's 'cl_written' array.  Later, another
+               transaction validating that log will check two things:
+
+               - the regular stm_undo_s entry put by stm_write() above
+                 will make the other transaction check that it didn't
+                 read the same 'entry' object;
+
+                 - the TYPE_MODIFIED_HASHTABLE entry we're adding now
+                   will make the other transaction check that it didn't
+                   do any stm_hashtable_list() on the complete hashtable.
+            */
+            STM_PSEGMENT->modified_old_objects = list_append3(
+                STM_PSEGMENT->modified_old_objects,
+                TYPE_POSITION_MARKER,      /* type1 */
+                TYPE_MODIFIED_HASHTABLE,   /* type2 */
+                (uintptr_t)hobj);          /* modif_hashtable */
+        }
+    }
+    entry->object = nvalue;
+}
+
+void stm_hashtable_write(object_t *hobj, stm_hashtable_t *hashtable,
+                         uintptr_t key, object_t *nvalue,
+                         stm_thread_local_t *tl)
+{
+    STM_PUSH_ROOT(*tl, nvalue);
+    STM_PUSH_ROOT(*tl, hobj);
+    stm_hashtable_entry_t *e = stm_hashtable_lookup(hobj, hashtable, key);
+    STM_POP_ROOT(*tl, hobj);
+    STM_POP_ROOT(*tl, nvalue);
+    stm_hashtable_write_entry(hobj, e, nvalue);
+}
+
+long stm_hashtable_length_upper_bound(stm_hashtable_t *hashtable)
+{
+    stm_hashtable_table_t *table;
+    uintptr_t rc;
+
+ restart:
+    table = VOLATILE_HASHTABLE(hashtable)->table;
+    rc = VOLATILE_TABLE(table)->resize_counter;
+    if (IS_EVEN(rc)) {
+        spin_loop();
+        goto restart;
+    }
+
+    uintptr_t initial_rc = (table->mask + 1) * 4 + 1;
+    uintptr_t num_entries_times_6 = initial_rc - rc;
+    return num_entries_times_6 / 6;
+}
+
+long stm_hashtable_list(object_t *hobj, stm_hashtable_t *hashtable,
+                        stm_hashtable_entry_t **results)
+{
+    /* Set the read marker.  It will be left as long as we're running
+       the same transaction.
+    */
+    stm_read(hobj);
+
+    /* Get the table.  No synchronization is needed: we may miss some
+       entries that are being added, but they would contain NULL in
+       this segment anyway. */
+    stm_hashtable_table_t *table = VOLATILE_HASHTABLE(hashtable)->table;
+
+    /* Read all entries, check which ones are not NULL, count them,
+       and optionally list them in 'results'.
+    */
+    uintptr_t i, mask = table->mask;
+    stm_hashtable_entry_t *entry;
+    long nresult = 0;
+
+    if (results != NULL) {
+        /* collect the results in the provided list */
+        for (i = 0; i <= mask; i++) {
+            entry = VOLATILE_TABLE(table)->items[i];
+            if (entry != NULL) {
+                stm_read((object_t *)entry);
+                if (entry->object != NULL)
+                    results[nresult++] = entry;
+            }
+        }
+    }
+    else {
+        /* don't collect, just get the exact number of results */
+        for (i = 0; i <= mask; i++) {
+            entry = VOLATILE_TABLE(table)->items[i];
+            if (entry != NULL) {
+                stm_read((object_t *)entry);
+                if (entry->object != NULL)
+                    nresult++;
+            }
+        }
+    }
+    return nresult;
+}
+
+static void _stm_compact_hashtable(struct object_s *hobj,
+                                   stm_hashtable_t *hashtable)
+{
+    stm_hashtable_table_t *table = hashtable->table;
+    uintptr_t rc = table->resize_counter;
+    assert(!IS_EVEN(rc));
+
+    if (hashtable->additions * 4 > table->mask) {
+        hashtable->additions = 0;
+
+        /* If 'hobj' was created in some current transaction, i.e. if it is
+           now an overflow object, then we have the risk that some of its
+           entry objects were not created with stm_allocate_preexisting().
+           In that situation, a valid workaround is to read all entry
+           objects in the segment of the running transaction.  Otherwise,
+           the base case is to read them all from segment zero.
+        */
+        long segnum = get_num_segment_containing_address((char *)hobj);
+        if (!IS_OVERFLOW_OBJ(get_priv_segment(segnum), hobj))
+            segnum = 0;
+
+        uintptr_t initial_rc = (table->mask + 1) * 4 + 1;
+        uintptr_t num_entries_times_6 = initial_rc - rc;
+        uintptr_t count = INITIAL_HASHTABLE_SIZE;
+        while (count * 4 < num_entries_times_6)
+            count *= 2;
+        /* sanity-check: 'num_entries_times_6 < initial_rc', and so 'count'
+           can never grow larger than the current table size. */
+        assert(count <= table->mask + 1);
+
+        dprintf(("compact with %ld items:\n", num_entries_times_6 / 6));
+        _stm_rehash_hashtable(hashtable, count, get_segment_base(segnum));
+    }
+
+    table = hashtable->table;
+    assert(!IS_EVEN(table->resize_counter));
+
+    if (table != &hashtable->initial_table) {
+        uintptr_t rc = hashtable->initial_table.resize_counter;
+        while (1) {
+            assert(IS_EVEN(rc));
+            assert(rc != RESIZING_LOCK);
+
+            stm_hashtable_table_t *old_table = (stm_hashtable_table_t *)rc;
+            if (old_table == table)
+                break;
+            rc = old_table->resize_counter;
+            free(old_table);
+        }
+        hashtable->initial_table.resize_counter = (uintptr_t)table;
+        assert(IS_EVEN(hashtable->initial_table.resize_counter));
+    }
+}
+
+void stm_hashtable_tracefn(struct object_s *hobj, stm_hashtable_t *hashtable,
+                           void trace(object_t **))
+{
+    if (trace == TRACE_FOR_MAJOR_COLLECTION)
+        _stm_compact_hashtable(hobj, hashtable);
+
+    stm_hashtable_table_t *table;
+    table = VOLATILE_HASHTABLE(hashtable)->table;
+
+    uintptr_t j, mask = table->mask;
+    for (j = 0; j <= mask; j++) {
+        stm_hashtable_entry_t *volatile *pentry;
+        pentry = &VOLATILE_TABLE(table)->items[j];
+        if (*pentry != NULL) {
+            trace((object_t **)pentry);
+        }
+    }
+}
diff --git a/c8/stm/marker.c b/c8/stm/marker.c
--- a/c8/stm/marker.c
+++ b/c8/stm/marker.c
@@ -42,7 +42,8 @@
     */
     while (contention != start) {
         --contention;
-        if (contention->type == TYPE_POSITION_MARKER) {
+        if (contention->type == TYPE_POSITION_MARKER &&
+            contention->type2 != TYPE_MODIFIED_HASHTABLE) {
             out_marker->odd_number = contention->marker_odd_number;
             out_marker->object = contention->marker_object;
             return;
@@ -69,6 +70,9 @@
             return;    /* already up-to-date */
     }
 
+    /* -2 is not odd */
+    assert(marker.odd_number != (uintptr_t)TYPE_MODIFIED_HASHTABLE);
+
     STM_PSEGMENT->position_markers_last = list_count(list);
     STM_PSEGMENT->modified_old_objects = list_append3(
         list,
diff --git a/c8/stm/misc.c b/c8/stm/misc.c
--- a/c8/stm/misc.c
+++ b/c8/stm/misc.c
@@ -31,10 +31,7 @@
 
 bool _stm_was_read(object_t *obj)
 {
-    uint8_t rm = ((struct stm_read_marker_s *)
-                  (STM_SEGMENT->segment_base + (((uintptr_t)obj) >> 4)))->rm;
-    assert(rm <= STM_SEGMENT->transaction_read_version);
-    return rm == STM_SEGMENT->transaction_read_version;
+    return was_read_remote(STM_SEGMENT->segment_base, obj);
 }
 
 bool _stm_was_written(object_t *obj)
diff --git a/c8/stm/nursery.c b/c8/stm/nursery.c
--- a/c8/stm/nursery.c
+++ b/c8/stm/nursery.c
@@ -424,6 +424,7 @@
     struct stm_undo_s *end = (struct stm_undo_s *)(list->items + list->count);
 
     for (; undo < end; undo++) {
+        /* this logic also works if type2 == TYPE_MODIFIED_HASHTABLE */
         if (undo->type == TYPE_POSITION_MARKER)
             minor_trace_if_young(&undo->marker_object);
     }
diff --git a/c8/stm/pages.h b/c8/stm/pages.h
--- a/c8/stm/pages.h
+++ b/c8/stm/pages.h
@@ -62,7 +62,11 @@
 
 static inline bool get_page_status_in(long segnum, uintptr_t pagenum)
 {
-    /* reading page status requires "read"-lock: */
+    /* reading page status requires "read"-lock, which is defined as
+       "any segment has the privatization_lock".  This is enough to
+       prevent the "write"-lock from being acquired by somebody else
+       (defined as "_all_ segments have the privatization_lock").
+    */
     assert(STM_PSEGMENT->privatization_lock);
 
     OPT_ASSERT(segnum < 8 * sizeof(struct page_shared_s));
diff --git a/c8/stm/setup.c b/c8/stm/setup.c
--- a/c8/stm/setup.c
+++ b/c8/stm/setup.c
@@ -250,8 +250,6 @@
     set_gs_register(get_segment_base(num + 1));
     s_mutex_unlock();
 
-    DEBUG_EXPECT_SEGFAULT(true);
-
     if (num == 0) {
         dprintf(("STM_GC_NURSERY: %d\n", STM_GC_NURSERY));
         dprintf(("NB_PAGES: %d\n", NB_PAGES));
diff --git a/c8/stm/setup.h b/c8/stm/setup.h
--- a/c8/stm/setup.h
+++ b/c8/stm/setup.h
@@ -3,8 +3,8 @@
 static pthread_t *_get_cpth(stm_thread_local_t *);
 
 #ifndef NDEBUG
-static __thread long _stm_segfault_expected = 0;
-#define DEBUG_EXPECT_SEGFAULT(v) do {if (v) _stm_segfault_expected++; else _stm_segfault_expected--;} while (0)
+static __thread long _stm_segfault_expected = 1;
+#define DEBUG_EXPECT_SEGFAULT(v) do {if (v) _stm_segfault_expected++; else _stm_segfault_expected--; assert(_stm_segfault_expected <= 1);} while (0)
 #else
 #define DEBUG_EXPECT_SEGFAULT(v) {}
 #endif
diff --git a/c8/stmgc.c b/c8/stmgc.c
--- a/c8/stmgc.c
+++ b/c8/stmgc.c
@@ -39,3 +39,4 @@
 #include "stm/prof.c"
 #include "stm/rewind_setjmp.c"
 #include "stm/finalizer.c"
+#include "stm/hashtable.c"
diff --git a/c8/stmgc.h b/c8/stmgc.h
--- a/c8/stmgc.h
+++ b/c8/stmgc.h
@@ -196,10 +196,13 @@
         STM_SEGMENT->transaction_read_version;
 }
 
+#define _STM_WRITE_CHECK_SLOWPATH(obj)  \
+    UNLIKELY(((obj)->stm_flags & _STM_GCFLAG_WRITE_BARRIER) != 0)
+
 __attribute__((always_inline))
 static inline void stm_write(object_t *obj)
 {
-    if (UNLIKELY((obj->stm_flags & _STM_GCFLAG_WRITE_BARRIER) != 0))
+    if (_STM_WRITE_CHECK_SLOWPATH(obj))
         _stm_write_slowpath(obj);
 }
 
@@ -208,7 +211,7 @@
 static inline void stm_write_card(object_t *obj, uintptr_t index)
 {
     /* if GCFLAG_WRITE_BARRIER is set, then don't do anything more. */
-    if (UNLIKELY((obj->stm_flags & _STM_GCFLAG_WRITE_BARRIER) != 0)) {
+    if (_STM_WRITE_CHECK_SLOWPATH(obj)) {
 
         /* GCFLAG_WRITE_BARRIER is not set.  This might be because
            it's the first time we see a given small array; or it might
@@ -479,6 +482,38 @@
 /* dummies for now: */
 static inline void stm_flush_timing(stm_thread_local_t *tl, int verbose) {}
 
+
+/* Hashtables.  Keys are 64-bit unsigned integers, values are
+   'object_t *'.  Note that the type 'stm_hashtable_t' is not an
+   object type at all; you need to allocate and free it explicitly.
+   If you want to embed the hashtable inside an 'object_t' you
+   probably need a light finalizer to do the freeing. */
+typedef struct stm_hashtable_s stm_hashtable_t;
+typedef TLPREFIX struct stm_hashtable_entry_s stm_hashtable_entry_t;
+
+stm_hashtable_t *stm_hashtable_create(void);
+void stm_hashtable_free(stm_hashtable_t *);
+stm_hashtable_entry_t *stm_hashtable_lookup(object_t *, stm_hashtable_t *,
+                                            uintptr_t key);
+object_t *stm_hashtable_read(object_t *, stm_hashtable_t *, uintptr_t key);
+void stm_hashtable_write(object_t *, stm_hashtable_t *, uintptr_t key,
+                         object_t *nvalue, stm_thread_local_t *);
+void stm_hashtable_write_entry(object_t *hobj, stm_hashtable_entry_t *entry,
+                               object_t *nvalue);
+long stm_hashtable_length_upper_bound(stm_hashtable_t *);
+long stm_hashtable_list(object_t *, stm_hashtable_t *,
+                        stm_hashtable_entry_t **results);
+extern uint32_t stm_hashtable_entry_userdata;
+void stm_hashtable_tracefn(struct object_s *, stm_hashtable_t *,
+                           void (object_t **));
+
+struct stm_hashtable_entry_s {
+    struct object_s header;
+    uint32_t userdata;
+    uintptr_t index;
+    object_t *object;
+};
+
 /* ==================== END ==================== */
 
 static void (*stmcb_expand_marker)(char *segment_base, uintptr_t odd_number,
diff --git a/c8/test/support.py b/c8/test/support.py
--- a/c8/test/support.py
+++ b/c8/test/support.py
@@ -197,6 +197,26 @@
 void stm_enable_light_finalizer(object_t *);
 
 void (*stmcb_finalizer)(object_t *);
+
+typedef struct stm_hashtable_s stm_hashtable_t;
+typedef ... stm_hashtable_entry_t;
+stm_hashtable_t *stm_hashtable_create(void);
+void stm_hashtable_free(stm_hashtable_t *);
+bool _check_hashtable_read(object_t *, stm_hashtable_t *, uintptr_t key);
+object_t *hashtable_read_result;
+bool _check_hashtable_write(object_t *, stm_hashtable_t *, uintptr_t key,
+                            object_t *nvalue, stm_thread_local_t *tl);
+long stm_hashtable_length_upper_bound(stm_hashtable_t *);
+long stm_hashtable_list(object_t *, stm_hashtable_t *,
+                        stm_hashtable_entry_t **results);
+uint32_t stm_hashtable_entry_userdata;
+void stm_hashtable_tracefn(struct object_s *, stm_hashtable_t *,
+                           void trace(object_t **));
+
+void _set_hashtable(object_t *obj, stm_hashtable_t *h);
+stm_hashtable_t *_get_hashtable(object_t *obj);
+uintptr_t _get_entry_index(stm_hashtable_entry_t *entry);
+object_t *_get_entry_object(stm_hashtable_entry_t *entry);
 """)
 
 
@@ -299,6 +319,19 @@
     CHECKED(stm_validate());
 }
 
+object_t *hashtable_read_result;
+
+bool _check_hashtable_read(object_t *hobj, stm_hashtable_t *h, uintptr_t key)
+{
+    CHECKED(hashtable_read_result = stm_hashtable_read(hobj, h, key));
+}
+
+bool _check_hashtable_write(object_t *hobj, stm_hashtable_t *h, uintptr_t key,
+                            object_t *nvalue, stm_thread_local_t *tl)
+{
+    CHECKED(stm_hashtable_write(hobj, h, key, nvalue, tl));
+}
+
 #undef CHECKED
 
 
@@ -326,6 +359,32 @@
     return *WEAKREF_PTR(obj, size);
 }
 
+void _set_hashtable(object_t *obj, stm_hashtable_t *h)
+{
+    stm_char *field_addr = ((stm_char*)obj);
+    field_addr += SIZEOF_MYOBJ; /* header */
+    *(stm_hashtable_t *TLPREFIX *)field_addr = h;
+}
+
+stm_hashtable_t *_get_hashtable(object_t *obj)
+{
+    stm_char *field_addr = ((stm_char*)obj);
+    field_addr += SIZEOF_MYOBJ; /* header */
+    return *(stm_hashtable_t *TLPREFIX *)field_addr;
+}
+
+uintptr_t _get_entry_index(stm_hashtable_entry_t *entry)
+{
+    stm_read((object_t *)entry);
+    return entry->index;
+}
+
+object_t *_get_entry_object(stm_hashtable_entry_t *entry)
+{
+    stm_read((object_t *)entry);
+    return entry->object;
+}
+
 void _set_ptr(object_t *obj, int n, object_t *v)
 {
     long nrefs = (long)((myobj_t*)obj)->type_id - 421420;
@@ -351,11 +410,17 @@
 }
 
 
-
 ssize_t stmcb_size_rounded_up(struct object_s *obj)
 {
     struct myobj_s *myobj = (struct myobj_s*)obj;
+    assert(myobj->type_id != 0);
     if (myobj->type_id < 421420) {
+        if (myobj->type_id == 421419) {    /* hashtable */
+            return sizeof(struct myobj_s) + 1 * sizeof(void*);
+        }
+        if (myobj->type_id == 421418) {    /* hashtable entry */
+            return sizeof(struct stm_hashtable_entry_s);
+        }
         /* basic case: tid equals 42 plus the size of the object */
         assert(myobj->type_id >= 42 + sizeof(struct myobj_s));
         assert((myobj->type_id - 42) >= 16);
@@ -371,11 +436,21 @@
     }
 }
 
-
 void stmcb_trace(struct object_s *obj, void visit(object_t **))
 {
     int i;
     struct myobj_s *myobj = (struct myobj_s*)obj;
+    if (myobj->type_id == 421419) {
+        /* hashtable */
+        stm_hashtable_t *h = *((stm_hashtable_t **)(myobj + 1));
+        stm_hashtable_tracefn(obj, h, visit);
+        return;
+    }
+    if (myobj->type_id == 421418) {
+        /* hashtable entry */
+        object_t **ref = &((struct stm_hashtable_entry_s *)myobj)->object;
+        visit(ref);
+    }
     if (myobj->type_id < 421420) {
         /* basic case: no references */
         return;
@@ -396,6 +471,7 @@
 {
     int i;
     struct myobj_s *myobj = (struct myobj_s*)obj;
+    assert(myobj->type_id != 0);
     assert(myobj->type_id != 421419);
     assert(myobj->type_id != 421418);
     if (myobj->type_id < 421420) {
@@ -413,6 +489,9 @@
                                   uintptr_t offset_itemsize[2])
 {
     struct myobj_s *myobj = (struct myobj_s*)obj;
+    assert(myobj->type_id != 0);
+    assert(myobj->type_id != 421419);
+    assert(myobj->type_id != 421418);
     if (myobj->type_id < 421420) {
         offset_itemsize[0] = SIZEOF_MYOBJ;
         offset_itemsize[1] = 1;
@@ -468,6 +547,7 @@
 CARD_CLEAR = 0
 CARD_MARKED = lib._STM_CARD_MARKED
 CARD_MARKED_OLD = lib._stm_get_transaction_read_version
+lib.stm_hashtable_entry_userdata = 421418
 
 
 class Conflict(Exception):
@@ -530,6 +610,18 @@
     lib._set_type_id(o, tid)
     return o
 
+def stm_allocate_hashtable():
+    o = lib.stm_allocate(16)
+    tid = 421419
+    lib._set_type_id(o, tid)
+    h = lib.stm_hashtable_create()
+    lib._set_hashtable(o, h)
+    return o
+
+def get_hashtable(o):
+    assert lib._get_type_id(o) == 421419
+    return lib._get_hashtable(o)
+
 def stm_get_weakref(o):
     return lib._get_weakref(o)
 
diff --git a/c8/test/test_hashtable.py b/c8/test/test_hashtable.py
new file mode 100644
--- /dev/null
+++ b/c8/test/test_hashtable.py
@@ -0,0 +1,561 @@
+from support import *
+import random
+import py, sys
+
+
+def htget(o, key):
+    h = get_hashtable(o)
+    res = lib._check_hashtable_read(o, h, key)
+    if res:
+        raise Conflict
+    return lib.hashtable_read_result
+
+def htset(o, key, nvalue, tl):
+    h = get_hashtable(o)
+    res = lib._check_hashtable_write(o, h, key, nvalue, tl)
+    if res:
+        raise Conflict
+
+def ht_length_upper_bound(o):
+    h = get_hashtable(o)
+    return lib.stm_hashtable_length_upper_bound(h)
+
+def htitems(o):
+    h = get_hashtable(o)
+    upper_bound = lib.stm_hashtable_length_upper_bound(h)
+    entries = ffi.new("stm_hashtable_entry_t *[]", upper_bound)
+    count = lib.stm_hashtable_list(o, h, entries)
+    assert count <= upper_bound
+    return [(lib._get_entry_index(entries[i]),
+             lib._get_entry_object(entries[i])) for i in range(count)]
+
+def htlen(o):
+    h = get_hashtable(o)
+    count = lib.stm_hashtable_list(o, h, ffi.NULL)
+    return count
+
+
+class BaseTestHashtable(BaseTest):
+
+    def setup_method(self, meth):
+        BaseTest.setup_method(self, meth)
+        #
+        @ffi.callback("void(object_t *)")
+        def light_finalizer(obj):
+            print 'light_finalizer:', obj
+            try:
+                assert lib._get_type_id(obj) == 421419
+                self.seen_hashtables -= 1
+            except:
+                self.errors.append(sys.exc_info()[2])
+                raise
+
+        lib.stmcb_light_finalizer = light_finalizer
+        self._light_finalizer_keepalive = light_finalizer
+        self.seen_hashtables = 0
+        self.errors = []
+
+    def teardown_method(self, meth):
+        BaseTest.teardown_method(self, meth)
+        lib.stmcb_light_finalizer = ffi.NULL
+        assert self.errors == []
+        assert self.seen_hashtables == 0
+
+    def allocate_hashtable(self):
+        h = stm_allocate_hashtable()
+        lib.stm_enable_light_finalizer(h)
+        self.seen_hashtables += 1
+        return h
+
+
+class TestHashtable(BaseTestHashtable):
+
+    def test_empty(self):
+        self.start_transaction()
+        h = self.allocate_hashtable()
+        for i in range(100):
+            index = random.randrange(0, 1<<64)
+            got = htget(h, index)
+            assert got == ffi.NULL
+
+    def test_set_value(self):
+        self.start_transaction()
+        tl0 = self.tls[self.current_thread]
+        h = self.allocate_hashtable()
+        lp1 = stm_allocate(16)
+        htset(h, 12345678901, lp1, tl0)
+        assert htget(h, 12345678901) == lp1
+        for i in range(64):
+            index = 12345678901 ^ (1 << i)
+            assert htget(h, index) == ffi.NULL
+        assert htget(h, 12345678901) == lp1
+
+    def test_no_conflict(self):
+        lp1 = stm_allocate_old(16)
+        lp2 = stm_allocate_old(16)
+        #
+        self.start_transaction()
+        tl0 = self.tls[self.current_thread]
+        h = self.allocate_hashtable()
+        self.push_root(h)
+        stm_set_char(lp1, 'A')
+        htset(h, 1234, lp1, tl0)
+        self.commit_transaction()
+        #
+        self.start_transaction()
+        h = self.pop_root()
+        stm_set_char(lp2, 'B')
+        self.switch(1)
+        self.start_transaction()
+        self.switch(0)
+        htset(h, 9991234, lp2, tl0)
+        #
+        self.switch(1)
+        lp1b = htget(h, 1234)
+        assert lp1b != ffi.NULL
+        assert stm_get_char(lp1b) == 'A'
+        assert lp1b == lp1
+        self.commit_transaction()
+        #
+        self.switch(0)
+        assert htget(h, 9991234) == lp2
+        assert stm_get_char(lp2) == 'B'
+        assert htget(h, 1234) == lp1
+        htset(h, 1234, ffi.NULL, tl0)
+        self.commit_transaction()
+        #
+        self.start_transaction()
+        stm_major_collect()       # to get rid of the hashtable object
+
+    def test_conflict(self):
+        lp1 = stm_allocate_old(16)
+        lp2 = stm_allocate_old(16)
+        #
+        self.start_transaction()
+        h = self.allocate_hashtable()
+        self.push_root(h)
+        self.commit_transaction()
+        #
+        self.start_transaction()
+        h = self.pop_root()
+        self.push_root(h)
+        tl0 = self.tls[self.current_thread]
+        htset(h, 1234, lp1, tl0)
+        #
+        self.switch(1)
+        self.start_transaction()
+        tl1 = self.tls[self.current_thread]
+        htset(h, 1234, lp2, tl1)
+        #
+        self.switch(0)
+        self.commit_transaction()
+        #
+        py.test.raises(Conflict, self.switch, 1)
+        #
+        self.switch(0)
+        self.start_transaction()
+        self.pop_root()
+        stm_major_collect()       # to get rid of the hashtable object
+        self.commit_transaction()
+
+    def test_keepalive_minor(self):
+        self.start_transaction()
+        h = self.allocate_hashtable()
+        self.push_root(h)
+        lp1 = stm_allocate(16)
+        stm_set_char(lp1, 'N')
+        tl0 = self.tls[self.current_thread]
+        htset(h, 1234, lp1, tl0)
+        stm_minor_collect()
+        h = self.pop_root()
+        lp1b = htget(h, 1234)
+        assert lp1b != ffi.NULL
+        assert stm_get_char(lp1b) == 'N'
+        assert lp1b != lp1
+
+    def test_keepalive_major(self):
+        lp1 = stm_allocate_old(16)
+        #
+        self.start_transaction()
+        h = self.allocate_hashtable()
+        self.push_root(h)
+        stm_set_char(lp1, 'N')
+        tl0 = self.tls[self.current_thread]
+        htset(h, 1234, lp1, tl0)
+        self.commit_transaction()
+        #
+        self.start_transaction()
+        stm_major_collect()
+        h = self.pop_root()
+        lp1b = htget(h, 1234)
+        assert lp1b == lp1
+        assert stm_get_char(lp1b) == 'N'
+        #
+        stm_major_collect()       # to get rid of the hashtable object
+        self.commit_transaction()
+
+    def test_minor_collect_bug1(self):
+        self.start_transaction()
+        lp1 = stm_allocate(32)
+        self.push_root(lp1)
+        h = self.allocate_hashtable()
+        self.push_root(h)
+        stm_minor_collect()
+        h = self.pop_root()
+        lp1 = self.pop_root()
+        print 'h', h                       # 0xa040010
+        print 'lp1', lp1                   # 0xa040040
+        tl0 = self.tls[self.current_thread]
+        htset(h, 1, lp1, tl0)
+        self.commit_transaction()
+        #
+        self.start_transaction()
+        assert htget(h, 1) == lp1
+        stm_major_collect()       # to get rid of the hashtable object
+
+    def test_minor_collect_bug1_different_thread(self):
+        self.start_transaction()
+        lp1 = stm_allocate(32)
+        self.push_root(lp1)
+        h = self.allocate_hashtable()
+        self.push_root(h)
+        stm_minor_collect()
+        h = self.pop_root()
+        lp1 = self.pop_root()
+        print 'h', h                       # 0xa040010
+        print 'lp1', lp1                   # 0xa040040
+        tl0 = self.tls[self.current_thread]
+        htset(h, 1, lp1, tl0)
+        self.commit_transaction()
+        #
+        self.switch(1)            # in a different thread
+        self.start_transaction()
+        assert htget(h, 1) == lp1
+        stm_major_collect()       # to get rid of the hashtable object
+
+    def test_major_collect_bug2(self):
+        self.start_transaction()
+        lp1 = stm_allocate(24)
+        self.push_root(lp1)
+        self.commit_transaction()
+        lp1 = self.pop_root()
+        #
+        self.switch(1)
+        self.start_transaction()
+        stm_write(lp1)            # force this page to be shared
+        #
+        self.switch(0)
+        self.start_transaction()
+        h = self.allocate_hashtable()
+        tl0 = self.tls[self.current_thread]
+        htset(h, 10, stm_allocate(32), tl0)
+        htset(h, 11, stm_allocate(32), tl0)
+        htset(h, 12, stm_allocate(32), tl0)
+        self.push_root(h)
+        #
+        self.switch(1)            # in a different thread
+        stm_major_collect()       # force a _stm_rehash_hashtable()
+        #
+        self.switch(0)            # back to the original thread
+        h = self.pop_root()
+        assert htget(h, 10) != ffi.NULL
+        assert htget(h, 11) != ffi.NULL
+        assert htget(h, 12) != ffi.NULL
+
+    def test_list_1(self):
+        self.start_transaction()
+        h = self.allocate_hashtable()
+        tl0 = self.tls[self.current_thread]
+        for i in range(32):
+            assert ht_length_upper_bound(h) == i
+            htset(h, 19 ^ i, stm_allocate(32), tl0)
+        assert ht_length_upper_bound(h) == 32
+
+    def test_list_2(self):
+        self.start_transaction()
+        h = self.allocate_hashtable()
+        tl0 = self.tls[self.current_thread]
+        expected = []
+        for i in range(29):
+            lp = stm_allocate(32)
+            htset(h, 19 ^ i, lp, tl0)
+            expected.append((19 ^ i, lp))
+        lst = htitems(h)
+        assert len(lst) == 29
+        assert sorted(lst) == sorted(expected)
+
+    def test_list_3(self):
+        self.start_transaction()
+        h = self.allocate_hashtable()
+        tl0 = self.tls[self.current_thread]
+        for i in range(29):
+            htset(h, 19 ^ i, stm_allocate(32), tl0)
+        assert htlen(h) == 29
+
+    def test_len_conflicts_with_additions(self):
+        self.start_transaction()
+        h = self.allocate_hashtable()
+        self.push_root(h)
+        self.commit_transaction()
+        #
+        self.start_transaction()
+        h = self.pop_root()
+        self.push_root(h)
+        tl0 = self.tls[self.current_thread]
+        htset(h, 10, stm_allocate(32), tl0)
+        #
+        self.switch(1)
+        self.start_transaction()
+        assert htlen(h) == 0
+        #
+        self.switch(0)
+        self.commit_transaction()
+        #
+        py.test.raises(Conflict, self.switch, 1)
+        #
+        self.switch(0)
+        self.start_transaction()
+        self.pop_root()
+        stm_major_collect()       # to get rid of the hashtable object
+        self.commit_transaction()
+
+    def test_grow_without_conflict(self):
+        self.start_transaction()
+        h = self.allocate_hashtable()
+        self.push_root(h)
+        self.commit_transaction()
+        h = self.pop_root()
+        self.push_root(h)
+        #
+        STEPS = 50
+        for i in range(STEPS):
+            self.switch(1)
+            self.start_transaction()
+            tl0 = self.tls[self.current_thread]
+            htset(h, i + STEPS, stm_allocate(32), tl0)
+            #
+            self.switch(0)
+            self.start_transaction()
+            tl0 = self.tls[self.current_thread]
+            htset(h, i, stm_allocate(24), tl0)
+            #
+            self.switch(1)
+            self.commit_transaction()
+            #
+            self.switch(0)
+            self.commit_transaction()
+        #
+        self.pop_root()
+        self.start_transaction()
+        stm_major_collect()       # to get rid of the hashtable object
+
+
+class TestRandomHashtable(BaseTestHashtable):
+
+    def setup_method(self, meth):
+        BaseTestHashtable.setup_method(self, meth)
+        self.values = []
+        self.mirror = None
+        self.roots = []
+        self.other_thread = ([], [])
+
+    def push_roots(self):
+        assert self.roots is None
+        self.roots = []
+        for k, hitems in self.mirror.items():
+            assert lib._get_type_id(k) == 421419
+            for key, value in hitems.items():
+                assert lib._get_type_id(value) < 1000
+                self.push_root(value)
+                self.roots.append(key)
+            self.push_root(k)
+            self.roots.append(None)
+        for v in self.values:
+            self.push_root(v)
+        self.mirror = None
+
+    def pop_roots(self):
+        assert self.mirror is None
+        for i in reversed(range(len(self.values))):
+            self.values[i] = self.pop_root()
+            assert stm_get_char(self.values[i]) == chr((i + 1) & 255)
+        self.mirror = {}
+        for r in reversed(self.roots):
+            obj = self.pop_root()
+            if r is None:
+                assert lib._get_type_id(obj) == 421419
+                self.mirror[obj] = curhitems = {}
+            else:
+                assert lib._get_type_id(obj) < 1000
+                curhitems[r] = obj
+        self.roots = None
+
+    def exchange_threads(self):
+        old_thread = (self.values, self.roots)
+        self.switch(1 - self.current_thread)
+        (self.values, self.roots) = self.other_thread
+        self.mirror = None
+        self.other_thread = old_thread
+        print "----- switch to %s -----" % (self.current_thread,)
+
+    def test_random_single_thread(self):
+        import random
+        #
+        for i in range(100):
+            print "start_transaction"
+            self.start_transaction()
+            self.pop_roots()
+            for j in range(10):
+                r = random.random()
+                if r < 0.05:
+                    h = self.allocate_hashtable()
+                    print "allocate_hashtable ->", h
+                    self.mirror[h] = {}
+                elif r < 0.10:
+                    print "stm_minor_collect"
+                    self.push_roots()
+                    stm_minor_collect()
+                    self.pop_roots()
+                elif r < 0.11:
+                    print "stm_major_collect"
+                    self.push_roots()
+                    stm_major_collect()
+                    self.pop_roots()
+                elif r < 0.5:
+                    if not self.mirror: continue
+                    h = random.choice(self.mirror.keys())
+                    if not self.mirror[h]: continue
+                    key = random.choice(self.mirror[h].keys())
+                    value = self.mirror[h][key]
+                    print "htget(%r, %r) == %r" % (h, key, value)
+                    self.push_roots()
+                    self.push_root(value)
+                    result = htget(h, key)
+                    value = self.pop_root()
+                    assert result == value
+                    self.pop_roots()
+                elif r < 0.6:
+                    if not self.mirror: continue
+                    h = random.choice(self.mirror.keys())
+                    key = random.randrange(0, 40)
+                    if key in self.mirror[h]: continue
+                    print "htget(%r, %r) == NULL" % (h, key)
+                    self.push_roots()
+                    assert htget(h, key) == ffi.NULL
+                    self.pop_roots()
+                elif r < 0.63:
+                    if not self.mirror: continue
+                    h, _ = self.mirror.popitem()
+                    print "popped", h
+                elif r < 0.75:
+                    obj = stm_allocate(32)
+                    self.values.append(obj)
+                    stm_set_char(obj, chr(len(self.values) & 255))
+                else:
+                    if not self.mirror or not self.values: continue
+                    h = random.choice(self.mirror.keys())
+                    key = random.randrange(0, 32)
+                    value = random.choice(self.values)
+                    print "htset(%r, %r, %r)" % (h, key, value)
+                    self.push_roots()
+                    tl = self.tls[self.current_thread]
+                    htset(h, key, value, tl)
+                    self.pop_roots()
+                    self.mirror[h][key] = value
+            self.push_roots()
+            print "commit_transaction"
+            self.commit_transaction()
+        #
+        self.start_transaction()
+        self.become_inevitable()
+        self.pop_roots()
+        stm_major_collect()       # to get rid of the hashtable objects
+
+    def test_random_multiple_threads(self):
+        from random import randrange, Random
+        seed = randrange(0, 10000)
+        print "----------------------------------------- seed:", seed
+        random = Random(seed)
+        #
+        self.start_transaction()
+        self.exchange_threads()
+        self.start_transaction()
+        self.pop_roots()
+        #
+        for j in range(1000):
+            r = random.random()
+            if r > 0.9:
+                if r > 0.95:
+                    self.push_roots()
+                    self.commit_transaction()
+                    self.start_transaction()
+                    self.pop_roots()
+                else:
+                    self.push_roots()
+                    self.exchange_threads()
+                    self.pop_roots()
+                continue
+
+            if r < 0.05:
+                h = self.allocate_hashtable()
+                print "allocate_hashtable -> %r/%r" % (h, get_hashtable(h))
+                self.mirror[h] = {}
+            elif r < 0.10:
+                print "stm_minor_collect"
+                self.push_roots()
+                stm_minor_collect()
+                self.pop_roots()
+            elif r < 0.11:
+                print "stm_major_collect"
+                self.push_roots()
+                stm_major_collect()
+                self.pop_roots()
+            elif r < 0.5:
+                if not self.mirror: continue
+                h = random.choice(self.mirror.keys())
+                if not self.mirror[h]: continue
+                key = random.choice(self.mirror[h].keys())
+                value = self.mirror[h][key]
+                print "htget(%r/%r, %r) == %r" % (h, get_hashtable(h), key, value)
+                self.push_roots()
+                self.push_root(value)
+                result = htget(h, key)
+                value = self.pop_root()
+                assert result == value
+                self.pop_roots()
+            elif r < 0.6:
+                if not self.mirror: continue
+                h = random.choice(self.mirror.keys())
+                key = random.randrange(0, 40)
+                if key in self.mirror[h]: continue
+                print "htget(%r/%r, %r) == NULL" % (h, get_hashtable(h), key)
+                self.push_roots()
+                assert htget(h, key) == ffi.NULL
+                self.pop_roots()
+            elif r < 0.63:
+                if not self.mirror: continue
+                h, _ = self.mirror.popitem()
+                print "popped", h
+            elif r < 0.75:
+                obj = stm_allocate(32)
+                self.values.append(obj)
+                stm_set_char(obj, chr(len(self.values) & 255))
+            else:
+                if not self.mirror or not self.values: continue
+                h = random.choice(self.mirror.keys())
+                key = random.randrange(0, 32)
+                value = random.choice(self.values)
+                print "htset(%r/%r, %r, %r)" % (h, get_hashtable(h), key, value)
+                self.push_roots()
+                tl = self.tls[self.current_thread]
+                htset(h, key, value, tl)
+                self.pop_roots()
+                self.mirror[h][key] = value
+        #
+        print 'closing down...'
+        self.become_inevitable()
+        self.commit_transaction()
+        self.exchange_threads()
+        self.pop_roots()
+        self.become_inevitable()
+        stm_major_collect()       # to get rid of the hashtable objects


More information about the pypy-commit mailing list