[pypy-commit] pypy stmgc-c8: Merge with stmgc-c8-hashtable

Thu Apr 9 17:12:28 CEST 2015

Author: Remi Meier <remi.meier at inf.ethz.ch>
Branch: stmgc-c8
Changeset: r76767:0524ad7cc770
Date: 2015-04-09 17:10 +0200
http://bitbucket.org/pypy/pypy/changeset/0524ad7cc770/

Log:	Merge with stmgc-c8-hashtable

diff --git a/rpython/rlib/rstm.py b/rpython/rlib/rstm.py
--- a/rpython/rlib/rstm.py
+++ b/rpython/rlib/rstm.py
@@ -259,14 +259,13 @@
                                       'freelist': _ll_hashtable_freelist,
                                         'lookup': _ll_hashtable_lookup,
                                       'writeobj': _ll_hashtable_writeobj})
-# NULL_HASHTABLE = lltype.nullptr(_HASHTABLE_OBJ)
-NULL_HASHTABLE = None
+NULL_HASHTABLE = lltype.nullptr(_HASHTABLE_OBJ)
 
 def _ll_hashtable_trace(gc, obj, callback, arg):
     from rpython.memory.gctransform.stmframework import get_visit_function
     visit_fn = get_visit_function(callback, arg)
     addr = obj + llmemory.offsetof(_HASHTABLE_OBJ, 'll_raw_hashtable')
-    llop.stm_hashtable_tracefn(lltype.Void, addr.address[0], visit_fn)
+    llop.stm_hashtable_tracefn(lltype.Void, obj, addr.address[0], visit_fn)
 lambda_hashtable_trace = lambda: _ll_hashtable_trace
 
 def _ll_hashtable_finalizer(h):
@@ -280,66 +279,22 @@
 def create_hashtable():
     if not we_are_translated():
         return HashtableForTest()      # for tests
-    return HashtableEmulation()
-    # rgc.register_custom_light_finalizer(_HASHTABLE_OBJ, lambda_hashtable_finlz)
-    # rgc.register_custom_trace_hook(_HASHTABLE_OBJ, lambda_hashtable_trace)
-    # # Pass a null pointer to _STM_HASHTABLE_ENTRY to stm_hashtable_create().
-    # # Make sure we see a malloc() of it, so that its typeid is correctly
-    # # initialized.  It can be done in a NonConstant(False) path so that
-    # # the C compiler will actually drop it.
-    # if _false:
-    #     p = lltype.malloc(_STM_HASHTABLE_ENTRY)
-    # else:
-    #     p = lltype.nullptr(_STM_HASHTABLE_ENTRY)
-    # h = lltype.malloc(_HASHTABLE_OBJ, zero=True)
-    # h.ll_raw_hashtable = llop.stm_hashtable_create(_STM_HASHTABLE_P, p)
-    # return h
+    rgc.register_custom_light_finalizer(_HASHTABLE_OBJ, lambda_hashtable_finlz)
+    rgc.register_custom_trace_hook(_HASHTABLE_OBJ, lambda_hashtable_trace)
+    # Pass a null pointer to _STM_HASHTABLE_ENTRY to stm_hashtable_create().
+    # Make sure we see a malloc() of it, so that its typeid is correctly
+    # initialized.  It can be done in a NonConstant(False) path so that
+    # the C compiler will actually drop it.
+    if _false:
+        p = lltype.malloc(_STM_HASHTABLE_ENTRY)
+    else:
+        p = lltype.nullptr(_STM_HASHTABLE_ENTRY)
+    h = lltype.malloc(_HASHTABLE_OBJ, zero=True)
+    h.ll_raw_hashtable = llop.stm_hashtable_create(_STM_HASHTABLE_P, p)
+    return h
 
 NULL_GCREF = lltype.nullptr(llmemory.GCREF.TO)
 
-class HashtableEmulation(object):
-    def __init__(self):
-        self._content = {}      # dict {integer: GCREF}
-
-    def get(self, key):
-        return self._content.get(key, NULL_GCREF)
-
-    def set(self, key, value):
-        if value:
-            self._content[key] = value
-        else:
-            try:
-                del self._content[key]
-            except KeyError:
-                pass
-
-    def len(self):
-        return len(self._content)
-
-    def list(self):
-        items = []
-        for key in self._content.keys():
-            items.append(self.lookup(key))
-        count = len(items)
-        return items, count
-
-    def freelist(self, array):
-        pass
-
-    def lookup(self, key):
-        return EntryObjectEmulation(self, key)
-
-    def writeobj(self, entry, nvalue):
-        self.set(entry.key, nvalue)
-
-class EntryObjectEmulation(object):
-    def __init__(self, hashtable, key):
-        self.hashtable = hashtable
-        self.key = key
-        self.index = r_uint(key)
-        self.object = hashtable.get(key)
-
-
 class HashtableForTest(object):
     def __init__(self):
         self._content = {}      # dict {integer: GCREF}
diff --git a/rpython/translator/stm/funcgen.py b/rpython/translator/stm/funcgen.py
--- a/rpython/translator/stm/funcgen.py
+++ b/rpython/translator/stm/funcgen.py
@@ -343,8 +343,9 @@
     arg1 = funcgen.expr(op.args[1])
     arg2 = funcgen.expr(op.args[2])
     result = funcgen.expr(op.result)
-    return '%s = stm_hashtable_lookup((object_t *)%s, %s, %s);' % (
-        result, arg0, arg1, arg2)
+    typename = cdecl(funcgen.lltypename(op.result), '')
+    return '%s = (%s)stm_hashtable_lookup((object_t *)%s, %s, %s);' % (
+        result, typename, arg0, arg1, arg2)
 
 def stm_hashtable_length_upper_bound(funcgen, op):
     arg0 = funcgen.expr(op.args[0])
@@ -357,10 +358,12 @@
     arg1 = funcgen.expr(op.args[1])
     arg2 = funcgen.expr(op.args[2])
     result = funcgen.expr(op.result)
-    return '%s = stm_hashtable_list((object_t *)%s, %s, %s);' % (
-        result, arg0, arg1, arg2)
+    return ('%s = stm_hashtable_list((object_t *)%s, %s, '
+            '(stm_hashtable_entry_t **)%s);' % (result, arg0, arg1, arg2))
 
 def stm_hashtable_tracefn(funcgen, op):
     arg0 = funcgen.expr(op.args[0])
     arg1 = funcgen.expr(op.args[1])
-    return 'stm_hashtable_tracefn((stm_hashtable_t *)%s, %s);' % (arg0, arg1)
+    arg2 = funcgen.expr(op.args[2])
+    return ('stm_hashtable_tracefn(%s, (stm_hashtable_t *)%s, '
+            ' (void(*)(object_t**))%s);' % (arg0, arg1, arg2))
diff --git a/rpython/translator/stm/src_stm/stm/core.c b/rpython/translator/stm/src_stm/stm/core.c
--- a/rpython/translator/stm/src_stm/stm/core.c
+++ b/rpython/translator/stm/src_stm/stm/core.c
@@ -280,8 +280,14 @@
         struct stm_undo_s *end = undo + cl->written_count;
         for (; undo < end; undo++) {
             if (undo->type == TYPE_POSITION_MARKER) {
-                fprintf(stderr, "    marker %p %lu\n",
-                        undo->marker_object, undo->marker_odd_number);
+                if (undo->type2 == TYPE_MODIFIED_HASHTABLE) {
+                    fprintf(stderr, "    hashtable %p\n",
+                            undo->modif_hashtable);
+                }
+                else {
+                    fprintf(stderr, "    marker %p %lu\n",
+                            undo->marker_object, undo->marker_odd_number);
+                }
                 continue;
             }
             fprintf(stderr, "    obj %p, size %d, ofs %lu: ", undo->object,
@@ -383,21 +389,40 @@
                     struct stm_undo_s *undo = cl->written;
                     struct stm_undo_s *end = cl->written + cl->written_count;
                     for (; undo < end; undo++) {
-                        if (undo->type == TYPE_POSITION_MARKER)
+                        object_t *obj;
+
+                        if (undo->type != TYPE_POSITION_MARKER) {
+                            /* common case: 'undo->object' was written to
+                               in this past commit, so we must check that
+                               it was not read by us. */
+                            obj = undo->object;
+                        }
+                        else if (undo->type2 != TYPE_MODIFIED_HASHTABLE)
                             continue;
-                        if (_stm_was_read(undo->object)) {
-                            /* first reset all modified objects from the backup
-                               copies as soon as the first conflict is detected;
-                               then we will proceed below to update our segment from
-                               the old (but unmodified) version to the newer version.
-                            */
-                            reset_modified_from_backup_copies(my_segnum);
-                            timing_write_read_contention(cl->written, undo);
-                            needs_abort = true;
+                        else {
+                            /* the previous stm_undo_s is about a written
+                               'entry' object, which belongs to the hashtable
+                               given now.  Check that we haven't read the
+                               hashtable (via stm_hashtable_list()). */
+                            obj = undo->modif_hashtable;
+                        }
 
-                            dprintf(("_stm_validate() failed for obj %p\n", undo->object));
-                            break;
-                        }
+                        if (LIKELY(!_stm_was_read(obj)))
+                            continue;
+
+                        /* conflict! */
+                        dprintf(("_stm_validate() failed for obj %p\n", obj));
+
+                        /* first reset all modified objects from the backup
+                           copies as soon as the first conflict is detected;
+                           then we will proceed below to update our segment
+                           from the old (but unmodified) version to the newer
+                           version.
+                        */
+                        reset_modified_from_backup_copies(my_segnum);
+                        timing_write_read_contention(cl->written, undo);
+                        needs_abort = true;
+                        break;
                     }
                 }
 
diff --git a/rpython/translator/stm/src_stm/stm/core.h b/rpython/translator/stm/src_stm/stm/core.h
--- a/rpython/translator/stm/src_stm/stm/core.h
+++ b/rpython/translator/stm/src_stm/stm/core.h
@@ -190,9 +190,15 @@
         uintptr_t marker_odd_number; /* the odd number part of the marker */
         object_t *marker_object;     /* the object part of the marker */
     };
+    struct {
+        intptr_t type1;             /* TYPE_POSITION_MARKER (again) */
+        intptr_t type2;             /* TYPE_MODIFIED_HASHTABLE */
+        object_t *modif_hashtable;  /* modified entry is previous stm_undo_s */
+    };
   };
 };
 #define TYPE_POSITION_MARKER    (-1)
+#define TYPE_MODIFIED_HASHTABLE (-2)
 #define SLICE_OFFSET(slice)  ((slice) >> 16)
 #define SLICE_SIZE(slice)    ((int)((slice) & 0xFFFF))
 #define NEW_SLICE(offset, size) (((uint64_t)(offset)) << 16 | (size))
@@ -251,6 +257,14 @@
     return stm_object_pages + segment_num * (NB_PAGES * 4096UL);
 }
 
+static inline long get_num_segment_containing_address(char *addr)
+{
+    uintptr_t delta = addr - stm_object_pages;
+    uintptr_t result = delta / (NB_PAGES * 4096UL);
+    assert(result < NB_SEGMENTS);
+    return result;
+}
+
 static inline
 struct stm_segment_info_s *get_segment(long segment_num) {
     return (struct stm_segment_info_s *)REAL_ADDRESS(
@@ -285,6 +299,17 @@
 static void _signal_handler(int sig, siginfo_t *siginfo, void *context);
 static bool _stm_validate();
 
+static inline bool was_read_remote(char *base, object_t *obj)
+{
+    uint8_t other_transaction_read_version =
+        ((struct stm_segment_info_s *)REAL_ADDRESS(base, STM_PSEGMENT))
+            ->transaction_read_version;
+    uint8_t rm = ((struct stm_read_marker_s *)
+                  (base + (((uintptr_t)obj) >> 4)))->rm;
+    assert(rm <= other_transaction_read_version);
+    return rm == other_transaction_read_version;
+}
+
 static inline void _duck(void) {
     /* put a call to _duck() between two instructions that set 0 into
        a %gs-prefixed address and that may otherwise be replaced with
diff --git a/rpython/translator/stm/src_stm/stm/gcpage.c b/rpython/translator/stm/src_stm/stm/gcpage.c
--- a/rpython/translator/stm/src_stm/stm/gcpage.c
+++ b/rpython/translator/stm/src_stm/stm/gcpage.c
@@ -127,6 +127,58 @@
     return o;
 }
 
+static void _fill_preexisting_slice(long segnum, char *dest,
+                                    const char *src, uintptr_t size)
+{
+    uintptr_t np = dest - get_segment_base(segnum);
+    if (get_page_status_in(segnum, np / 4096) != PAGE_NO_ACCESS)
+        memcpy(dest, src, size);
+}
+
+object_t *stm_allocate_preexisting(ssize_t size_rounded_up,
+                                   const char *initial_data)
+{
+    stm_char *np = allocate_outside_nursery_large(size_rounded_up);
+    uintptr_t nobj = (uintptr_t)np;
+    dprintf(("allocate_preexisting: %p\n", (object_t *)nobj));
+
+    char *nobj_seg0 = stm_object_pages + nobj;
+    memcpy(nobj_seg0, initial_data, size_rounded_up);
+    ((struct object_s *)nobj_seg0)->stm_flags = GCFLAG_WRITE_BARRIER;
+
+    acquire_privatization_lock(STM_SEGMENT->segment_num);
+    DEBUG_EXPECT_SEGFAULT(false);
+
+    long j;
+    for (j = 1; j < NB_SEGMENTS; j++) {
+        const char *src = nobj_seg0;
+        char *dest = get_segment_base(j) + nobj;
+        char *end = dest + size_rounded_up;
+
+        while (((uintptr_t)dest) / 4096 != ((uintptr_t)end - 1) / 4096) {
+            uintptr_t count = 4096 - (((uintptr_t)dest) & 4095);
+            _fill_preexisting_slice(j, dest, src, count);
+            src += count;
+            dest += count;
+        }
+        _fill_preexisting_slice(j, dest, src, end - dest);
+
+#ifdef STM_TESTS
+        /* can't really enable this check outside tests, because there is
+           a change that the transaction_state changes in parallel */
+        if (get_priv_segment(j)->transaction_state != TS_NONE) {
+            assert(!was_read_remote(get_segment_base(j), (object_t *)nobj));
+        }
+#endif
+    }
+
+    DEBUG_EXPECT_SEGFAULT(true);
+    release_privatization_lock(STM_SEGMENT->segment_num);
+
+    write_fence();     /* make sure 'nobj' is fully initialized from
+                          all threads here */
+    return (object_t *)nobj;
+}
 
 /************************************************************/
 
@@ -246,6 +298,8 @@
 }
 
 
+#define TRACE_FOR_MAJOR_COLLECTION  (&mark_record_trace)
+
 static void mark_and_trace(
     object_t *obj,
     char *segment_base, /* to trace obj in */
@@ -405,7 +459,8 @@
         struct stm_undo_s *modified = (struct stm_undo_s *)lst->items;
         struct stm_undo_s *end = (struct stm_undo_s *)(lst->items + lst->count);
         for (; modified < end; modified++) {
-            if (modified->type == TYPE_POSITION_MARKER)
+            if (modified->type == TYPE_POSITION_MARKER &&
+                    modified->type2 != TYPE_MODIFIED_HASHTABLE)
                 mark_visit_possibly_new_object(modified->marker_object, pseg);
         }
     }
@@ -538,6 +593,31 @@
                 list_set_item(lst, n, list_pop_item(lst));
             }
         }
+
+        /* Remove from 'modified_old_objects' all old hashtables that die */
+        {
+            lst = pseg->modified_old_objects;
+            uintptr_t j, k = 0, limit = list_count(lst);
+            for (j = 0; j < limit; j += 3) {
+                uintptr_t e0 = list_item(lst, j + 0);
+                uintptr_t e1 = list_item(lst, j + 1);
+                uintptr_t e2 = list_item(lst, j + 2);
+                if (e0 == TYPE_POSITION_MARKER &&
+                    e1 == TYPE_MODIFIED_HASHTABLE &&
+                    !mark_visited_test((object_t *)e2)) {
+                    /* hashtable object dies */
+                }
+                else {
+                    if (j != k) {
+                        list_set_item(lst, k + 0, e0);
+                        list_set_item(lst, k + 1, e1);
+                        list_set_item(lst, k + 2, e2);
+                    }
+                    k += 3;
+                }
+            }
+            lst->count = k;
+        }
     }
 #pragma pop_macro("STM_SEGMENT")
 #pragma pop_macro("STM_PSEGMENT")
diff --git a/rpython/translator/stm/src_stm/stm/hashtable.c b/rpython/translator/stm/src_stm/stm/hashtable.c
new file mode 100644
--- /dev/null
+++ b/rpython/translator/stm/src_stm/stm/hashtable.c
@@ -0,0 +1,532 @@
+/* Imported by rpython/translator/stm/import_stmgc.py */
+/*
+Design of stmgc's "hashtable" objects
+=====================================
+A "hashtable" is theoretically a lazily-filled array of objects of
+length 2**64.  Initially it is full of NULLs.  It's obviously
+implemented as a dictionary in which NULL objects are not needed.
+
+A real dictionary can be implemented on top of it, by using the index
+`hash(key)` in the hashtable, and storing a list of `(key, value)`
+pairs at that index (usually only one, unless there is a hash
+collision).
+
+The main operations on a hashtable are reading or writing an object at a
+given index.  It also supports fetching the list of non-NULL entries.
+
+There are two markers for every index (a read and a write marker).
+This is unlike regular arrays, which have only two markers in total.
+
+Additionally, we use the read marker for the hashtable object itself
+to mean "we have read the complete list of keys".  This plays the role
+of a "global" read marker: when any thread adds a new key/value object
+to the hashtable, this new object's read marker is initialized with a
+copy of the "global" read marker --- in all segments.
+
+
+Implementation
+--------------
+
+First idea: have the hashtable in raw memory, pointing to "entry"
+objects (which are regular, GC- and STM-managed objects).  The entry
+objects themselves point to the user-specified objects.  The entry
+objects hold the read/write markers.  Every entry object, once
+created, stays around.  It is only removed by the next major GC if it
+points to NULL and its read/write markers are not set in any
+currently-running transaction.
+
+References
+----------
+
+Inspired by: http://ppl.stanford.edu/papers/podc011-bronson.pdf
+*/
+
+
+uint32_t stm_hashtable_entry_userdata;
+
+
+#define INITIAL_HASHTABLE_SIZE   8
+#define PERTURB_SHIFT            5
+#define RESIZING_LOCK            0
+
+typedef struct {
+    uintptr_t mask;
+
+    /* 'resize_counter' start at an odd value, and is decremented (by
+       6) for every new item put in 'items'.  When it crosses 0, we
+       instead allocate a bigger table and change 'resize_counter' to
+       be a regular pointer to it (which is then even).  The whole
+       structure is immutable then.
+
+       The field 'resize_counter' also works as a write lock: changes
+       go via the intermediate value RESIZING_LOCK (0).
+    */
+    uintptr_t resize_counter;
+
+    stm_hashtable_entry_t *items[INITIAL_HASHTABLE_SIZE];
+} stm_hashtable_table_t;
+
+#define IS_EVEN(p) (((p) & 1) == 0)
+
+struct stm_hashtable_s {
+    stm_hashtable_table_t *table;
+    stm_hashtable_table_t initial_table;
+    uint64_t additions;
+};
+
+
+static inline void init_table(stm_hashtable_table_t *table, uintptr_t itemcount)
+{
+    table->mask = itemcount - 1;
+    table->resize_counter = itemcount * 4 + 1;
+    memset(table->items, 0, itemcount * sizeof(stm_hashtable_entry_t *));
+}
+
+stm_hashtable_t *stm_hashtable_create(void)
+{
+    stm_hashtable_t *hashtable = malloc(sizeof(stm_hashtable_t));
+    assert(hashtable);
+    hashtable->table = &hashtable->initial_table;
+    hashtable->additions = 0;
+    init_table(&hashtable->initial_table, INITIAL_HASHTABLE_SIZE);
+    return hashtable;
+}
+
+void stm_hashtable_free(stm_hashtable_t *hashtable)
+{
+    uintptr_t rc = hashtable->initial_table.resize_counter;
+    free(hashtable);
+    while (IS_EVEN(rc)) {
+        assert(rc != RESIZING_LOCK);
+
+        stm_hashtable_table_t *table = (stm_hashtable_table_t *)rc;
+        rc = table->resize_counter;
+        free(table);
+    }
+}
+
+static bool _stm_was_read_by_anybody(object_t *obj)
+{
+    /* can only be safely called during major GC, when all other threads
+       are suspended */
+    long i;
+    for (i = 1; i < NB_SEGMENTS; i++) {
+        if (get_priv_segment(i)->transaction_state == TS_NONE)
+            continue;
+        if (was_read_remote(get_segment_base(i), obj))
+            return true;
+    }
+    return false;
+}
+
+#define VOLATILE_HASHTABLE(p)    ((volatile stm_hashtable_t *)(p))
+#define VOLATILE_TABLE(p)  ((volatile stm_hashtable_table_t *)(p))
+
+static void _insert_clean(stm_hashtable_table_t *table,
+                          stm_hashtable_entry_t *entry,
+                          uintptr_t index)
+{
+    uintptr_t mask = table->mask;
+    uintptr_t i = index & mask;
+    if (table->items[i] == NULL) {
+        table->items[i] = entry;
+        return;
+    }
+
+    uintptr_t perturb = index;
+    while (1) {
+        i = (i << 2) + i + perturb + 1;
+        i &= mask;
+        if (table->items[i] == NULL) {
+            table->items[i] = entry;
+            return;
+        }
+
+        perturb >>= PERTURB_SHIFT;
+    }
+}
+
+static void _stm_rehash_hashtable(stm_hashtable_t *hashtable,
+                                  uintptr_t biggercount,
+                                  char *segment_base)
+{
+    dprintf(("rehash %p to size %ld, segment_base=%p\n",
+             hashtable, biggercount, segment_base));
+
+    size_t size = (offsetof(stm_hashtable_table_t, items)
+                   + biggercount * sizeof(stm_hashtable_entry_t *));
+    stm_hashtable_table_t *biggertable = malloc(size);
+    assert(biggertable);   // XXX
+
+    stm_hashtable_table_t *table = hashtable->table;
+    table->resize_counter = (uintptr_t)biggertable;
+    /* ^^^ this unlocks the table by writing a non-zero value to
+       table->resize_counter, but the new value is a pointer to the
+       new bigger table, so IS_EVEN() is still true */
+    assert(IS_EVEN(table->resize_counter));
+
+    init_table(biggertable, biggercount);
+
+    uintptr_t j, mask = table->mask;
+    uintptr_t rc = biggertable->resize_counter;
+    for (j = 0; j <= mask; j++) {
+        stm_hashtable_entry_t *entry = table->items[j];
+        if (entry == NULL)
+            continue;
+        if (segment_base != NULL) {
+            if (((struct stm_hashtable_entry_s *)
+                       REAL_ADDRESS(segment_base, entry))->object == NULL &&
+                   !_stm_was_read_by_anybody((object_t *)entry)) {
+                dprintf(("  removing dead %p\n", entry));
+                continue;
+            }
+        }
+
+        uintptr_t eindex;
+        if (segment_base == NULL)
+            eindex = entry->index;   /* read from STM_SEGMENT */
+        else
+            eindex = ((struct stm_hashtable_entry_s *)
+                       REAL_ADDRESS(segment_base, entry))->index;
+
+        dprintf(("  insert_clean %p at index=%ld\n",
+                 entry, eindex));
+        _insert_clean(biggertable, entry, eindex);
+        assert(rc > 6);
+        rc -= 6;
+    }
+    biggertable->resize_counter = rc;
+
+    write_fence();   /* make sure that 'biggertable' is valid here,
+                        and make sure 'table->resize_counter' is updated
+                        ('table' must be immutable from now on). */
+    VOLATILE_HASHTABLE(hashtable)->table = biggertable;
+}
+
+stm_hashtable_entry_t *stm_hashtable_lookup(object_t *hashtableobj,
+                                            stm_hashtable_t *hashtable,
+                                            uintptr_t index)
+{
+    stm_hashtable_table_t *table;
+    uintptr_t mask;
+    uintptr_t i;
+    stm_hashtable_entry_t *entry;
+
+ restart:
+    /* classical dict lookup logic */
+    table = VOLATILE_HASHTABLE(hashtable)->table;
+    mask = table->mask;      /* read-only field */
+    i = index & mask;
+    entry = VOLATILE_TABLE(table)->items[i];
+    if (entry != NULL) {
+        if (entry->index == index)
+            return entry;           /* found at the first try */
+
+        uintptr_t perturb = index;
+        while (1) {
+            i = (i << 2) + i + perturb + 1;
+            i &= mask;
+            entry = VOLATILE_TABLE(table)->items[i];
+            if (entry != NULL) {
+                if (entry->index == index)
+                    return entry;    /* found */
+            }
+            else
+                break;
+            perturb >>= PERTURB_SHIFT;
+        }
+    }
+    /* here, we didn't find the 'entry' with the correct index.  Note
+       that even if the same 'table' is modified or resized by other
+       threads concurrently, any new item found from a race condition
+       would anyway contain NULL in the present segment (ensured by
+       the first write_fence() below).  If the 'table' grows an entry
+       just after we checked above, then we go ahead and lock the
+       table; but after we get the lock, we will notice the new entry
+       (ensured by the second write_fence() below) and restart the
+       whole process.
+     */
+
+    uintptr_t rc = VOLATILE_TABLE(table)->resize_counter;
+
+    /* if rc is RESIZING_LOCK (which is 0, so even), a concurrent thread
+       is writing to the hashtable.  Or, if rc is another even number, it is
+       actually a pointer to the next version of the table, installed
+       just now.  In both cases, this thread must simply spin loop.
+    */
+    if (IS_EVEN(rc)) {
+        spin_loop();
+        goto restart;
+    }
+    /* in the other cases, we need to grab the RESIZING_LOCK.
+     */
+    if (!__sync_bool_compare_and_swap(&table->resize_counter,
+                                      rc, RESIZING_LOCK)) {
+        goto restart;
+    }
+    /* we now have the lock.  The only table with a non-even value of
+       'resize_counter' should be the last one in the chain, so if we
+       succeeded in locking it, check this. */
+    assert(table == hashtable->table);
+
+    /* Check that 'table->items[i]' is still NULL,
+       i.e. hasn't been populated under our feet.
+    */
+    if (table->items[i] != NULL) {
+        table->resize_counter = rc;    /* unlock */
+        goto restart;
+    }
+    /* if rc is greater than 6, there is enough room for a new
+       item in the current table.
+    */
+    if (rc > 6) {
+        /* we can only enter here once!  If we allocate stuff, we may
+           run the GC, and so 'hashtableobj' might move afterwards. */
+        if (_is_in_nursery(hashtableobj)) {
+            entry = (stm_hashtable_entry_t *)
+                stm_allocate(sizeof(stm_hashtable_entry_t));
+            entry->userdata = stm_hashtable_entry_userdata;
+            entry->index = index;
+            entry->object = NULL;
+        }
+        else {
+            /* for a non-nursery 'hashtableobj', we pretend that the
+               'entry' object we're about to return was already
+               existing all along, with NULL in all segments.  If the
+               caller of this function is going to modify the 'object'
+               field, it will call stm_write(entry) first, which will
+               correctly schedule 'entry' for write propagation.  We
+               do that even if 'hashtableobj' was created by the
+               running transaction: the new 'entry' object is created
+               as if it was older than the transaction.
+
+               Note the following difference: if 'hashtableobj' is
+               still in the nursery (case above), the 'entry' object
+               is also allocated from the nursery, and after a minor
+               collection it ages as an old-but-created-by-the-
+               current-transaction object.  We could try to emulate
+               this here, or to create young 'entry' objects, but
+               doing either of these would require careful
+               synchronization with other pieces of the code that may
+               change.
+            */
+            struct stm_hashtable_entry_s initial = {
+                .userdata = stm_hashtable_entry_userdata,
+                .index = index,
+                .object = NULL
+            };
+            entry = (stm_hashtable_entry_t *)
+                stm_allocate_preexisting(sizeof(stm_hashtable_entry_t),
+                                         (char *)&initial.header);
+            hashtable->additions++;
+        }
+        table->items[i] = entry;
+        write_fence();     /* make sure 'table->items' is written here */
+        VOLATILE_TABLE(table)->resize_counter = rc - 6;    /* unlock */
+        return entry;
+    }
+    else {
+        /* if rc is smaller than 6, we must allocate a new bigger table.
+         */
+        uintptr_t biggercount = table->mask + 1;
+        if (biggercount < 50000)
+            biggercount *= 4;
+        else
+            biggercount *= 2;
+        _stm_rehash_hashtable(hashtable, biggercount, /*segment_base=*/NULL);
+        goto restart;
+    }
+}
+
+object_t *stm_hashtable_read(object_t *hobj, stm_hashtable_t *hashtable,
+                             uintptr_t key)
+{
+    stm_hashtable_entry_t *e = stm_hashtable_lookup(hobj, hashtable, key);
+    stm_read((object_t *)e);
+    return e->object;
+}
+
+void stm_hashtable_write_entry(object_t *hobj, stm_hashtable_entry_t *entry,
+                               object_t *nvalue)
+{
+    if (_STM_WRITE_CHECK_SLOWPATH((object_t *)entry)) {
+
+        stm_write((object_t *)entry);
+
+        uintptr_t i = list_count(STM_PSEGMENT->modified_old_objects);
+        if (i > 0 && list_item(STM_PSEGMENT->modified_old_objects, i - 3)
+                     == (uintptr_t)entry) {
+            /* The stm_write() above recorded a write to 'entry'.  Here,
+               we add another stm_undo_s to modified_old_objects with
+               TYPE_MODIFIED_HASHTABLE.  It is ignored everywhere except
+               in _stm_validate().
+
+               The goal is that this TYPE_MODIFIED_HASHTABLE ends up in
+               the commit log's 'cl_written' array.  Later, another
+               transaction validating that log will check two things:
+
+               - the regular stm_undo_s entry put by stm_write() above
+                 will make the other transaction check that it didn't
+                 read the same 'entry' object;
+
+                 - the TYPE_MODIFIED_HASHTABLE entry we're adding now
+                   will make the other transaction check that it didn't
+                   do any stm_hashtable_list() on the complete hashtable.
+            */
+            STM_PSEGMENT->modified_old_objects = list_append3(
+                STM_PSEGMENT->modified_old_objects,
+                TYPE_POSITION_MARKER,      /* type1 */
+                TYPE_MODIFIED_HASHTABLE,   /* type2 */
+                (uintptr_t)hobj);          /* modif_hashtable */
+        }
+    }
+    entry->object = nvalue;
+}
+
+void stm_hashtable_write(object_t *hobj, stm_hashtable_t *hashtable,
+                         uintptr_t key, object_t *nvalue,
+                         stm_thread_local_t *tl)
+{
+    STM_PUSH_ROOT(*tl, nvalue);
+    STM_PUSH_ROOT(*tl, hobj);
+    stm_hashtable_entry_t *e = stm_hashtable_lookup(hobj, hashtable, key);
+    STM_POP_ROOT(*tl, hobj);
+    STM_POP_ROOT(*tl, nvalue);
+    stm_hashtable_write_entry(hobj, e, nvalue);
+}
+
+long stm_hashtable_length_upper_bound(stm_hashtable_t *hashtable)
+{
+    stm_hashtable_table_t *table;
+    uintptr_t rc;
+
+ restart:
+    table = VOLATILE_HASHTABLE(hashtable)->table;
+    rc = VOLATILE_TABLE(table)->resize_counter;
+    if (IS_EVEN(rc)) {
+        spin_loop();
+        goto restart;
+    }
+
+    uintptr_t initial_rc = (table->mask + 1) * 4 + 1;
+    uintptr_t num_entries_times_6 = initial_rc - rc;
+    return num_entries_times_6 / 6;
+}
+
+long stm_hashtable_list(object_t *hobj, stm_hashtable_t *hashtable,
+                        stm_hashtable_entry_t **results)
+{
+    /* Set the read marker.  It will be left as long as we're running
+       the same transaction.
+    */
+    stm_read(hobj);
+
+    /* Get the table.  No synchronization is needed: we may miss some
+       entries that are being added, but they would contain NULL in
+       this segment anyway. */
+    stm_hashtable_table_t *table = VOLATILE_HASHTABLE(hashtable)->table;
+
+    /* Read all entries, check which ones are not NULL, count them,
+       and optionally list them in 'results'.
+    */
+    uintptr_t i, mask = table->mask;
+    stm_hashtable_entry_t *entry;
+    long nresult = 0;
+
+    if (results != NULL) {
+        /* collect the results in the provided list */
+        for (i = 0; i <= mask; i++) {
+            entry = VOLATILE_TABLE(table)->items[i];
+            if (entry != NULL) {
+                stm_read((object_t *)entry);
+                if (entry->object != NULL)
+                    results[nresult++] = entry;
+            }
+        }
+    }
+    else {
+        /* don't collect, just get the exact number of results */
+        for (i = 0; i <= mask; i++) {
+            entry = VOLATILE_TABLE(table)->items[i];
+            if (entry != NULL) {
+                stm_read((object_t *)entry);
+                if (entry->object != NULL)
+                    nresult++;
+            }
+        }
+    }
+    return nresult;
+}
+
+static void _stm_compact_hashtable(struct object_s *hobj,
+                                   stm_hashtable_t *hashtable)
+{
+    stm_hashtable_table_t *table = hashtable->table;
+    uintptr_t rc = table->resize_counter;
+    assert(!IS_EVEN(rc));
+
+    if (hashtable->additions * 4 > table->mask) {
+        hashtable->additions = 0;
+
+        /* If 'hobj' was created in some current transaction, i.e. if it is
+           now an overflow object, then we have the risk that some of its
+           entry objects were not created with stm_allocate_preexisting().
+           In that situation, a valid workaround is to read all entry
+           objects in the segment of the running transaction.  Otherwise,
+           the base case is to read them all from segment zero.
+        */
+        long segnum = get_num_segment_containing_address((char *)hobj);
+        if (!IS_OVERFLOW_OBJ(get_priv_segment(segnum), hobj))
+            segnum = 0;
+
+        uintptr_t initial_rc = (table->mask + 1) * 4 + 1;
+        uintptr_t num_entries_times_6 = initial_rc - rc;
+        uintptr_t count = INITIAL_HASHTABLE_SIZE;
+        while (count * 4 < num_entries_times_6)
+            count *= 2;
+        /* sanity-check: 'num_entries_times_6 < initial_rc', and so 'count'
+           can never grow larger than the current table size. */
+        assert(count <= table->mask + 1);
+
+        dprintf(("compact with %ld items:\n", num_entries_times_6 / 6));
+        _stm_rehash_hashtable(hashtable, count, get_segment_base(segnum));
+    }
+
+    table = hashtable->table;
+    assert(!IS_EVEN(table->resize_counter));
+
+    if (table != &hashtable->initial_table) {
+        uintptr_t rc = hashtable->initial_table.resize_counter;
+        while (1) {
+            assert(IS_EVEN(rc));
+            assert(rc != RESIZING_LOCK);
+
+            stm_hashtable_table_t *old_table = (stm_hashtable_table_t *)rc;
+            if (old_table == table)
+                break;
+            rc = old_table->resize_counter;
+            free(old_table);
+        }
+        hashtable->initial_table.resize_counter = (uintptr_t)table;
+        assert(IS_EVEN(hashtable->initial_table.resize_counter));
+    }
+}
+
+void stm_hashtable_tracefn(struct object_s *hobj, stm_hashtable_t *hashtable,
+                           void trace(object_t **))
+{
+    if (trace == TRACE_FOR_MAJOR_COLLECTION)
+        _stm_compact_hashtable(hobj, hashtable);
+
+    stm_hashtable_table_t *table;
+    table = VOLATILE_HASHTABLE(hashtable)->table;
+
+    uintptr_t j, mask = table->mask;
+    for (j = 0; j <= mask; j++) {
+        stm_hashtable_entry_t *volatile *pentry;
+        pentry = &VOLATILE_TABLE(table)->items[j];
+        if (*pentry != NULL) {
+            trace((object_t **)pentry);
+        }
+    }
+}
diff --git a/rpython/translator/stm/src_stm/stm/marker.c b/rpython/translator/stm/src_stm/stm/marker.c
--- a/rpython/translator/stm/src_stm/stm/marker.c
+++ b/rpython/translator/stm/src_stm/stm/marker.c
@@ -42,7 +42,8 @@
     */
     while (contention != start) {
         --contention;
-        if (contention->type == TYPE_POSITION_MARKER) {
+        if (contention->type == TYPE_POSITION_MARKER &&
+            contention->type2 != TYPE_MODIFIED_HASHTABLE) {
             out_marker->odd_number = contention->marker_odd_number;
             out_marker->object = contention->marker_object;
             return;
@@ -69,6 +70,9 @@
             return;    /* already up-to-date */
     }
 
+    /* -2 is not odd */
+    assert(marker.odd_number != (uintptr_t)TYPE_MODIFIED_HASHTABLE);
+
     STM_PSEGMENT->position_markers_last = list_count(list);
     STM_PSEGMENT->modified_old_objects = list_append3(
         list,
diff --git a/rpython/translator/stm/src_stm/stm/misc.c b/rpython/translator/stm/src_stm/stm/misc.c
--- a/rpython/translator/stm/src_stm/stm/misc.c
+++ b/rpython/translator/stm/src_stm/stm/misc.c
@@ -31,10 +31,7 @@
 
 bool _stm_was_read(object_t *obj)
 {
-    uint8_t rm = ((struct stm_read_marker_s *)
-                  (STM_SEGMENT->segment_base + (((uintptr_t)obj) >> 4)))->rm;
-    assert(rm <= STM_SEGMENT->transaction_read_version);
-    return rm == STM_SEGMENT->transaction_read_version;
+    return was_read_remote(STM_SEGMENT->segment_base, obj);
 }
 
 bool _stm_was_written(object_t *obj)
diff --git a/rpython/translator/stm/src_stm/stm/nursery.c b/rpython/translator/stm/src_stm/stm/nursery.c
--- a/rpython/translator/stm/src_stm/stm/nursery.c
+++ b/rpython/translator/stm/src_stm/stm/nursery.c
@@ -424,6 +424,7 @@
     struct stm_undo_s *end = (struct stm_undo_s *)(list->items + list->count);
 
     for (; undo < end; undo++) {
+        /* this logic also works if type2 == TYPE_MODIFIED_HASHTABLE */
         if (undo->type == TYPE_POSITION_MARKER)
             minor_trace_if_young(&undo->marker_object);
     }
diff --git a/rpython/translator/stm/src_stm/stm/pages.h b/rpython/translator/stm/src_stm/stm/pages.h
--- a/rpython/translator/stm/src_stm/stm/pages.h
+++ b/rpython/translator/stm/src_stm/stm/pages.h
@@ -62,7 +62,11 @@
 
 static inline bool get_page_status_in(long segnum, uintptr_t pagenum)
 {
-    /* reading page status requires "read"-lock: */
+    /* reading page status requires "read"-lock, which is defined as
+       "any segment has the privatization_lock".  This is enough to
+       prevent the "write"-lock from being acquired by somebody else
+       (defined as "_all_ segments have the privatization_lock").
+    */
     assert(STM_PSEGMENT->privatization_lock);
 
     OPT_ASSERT(segnum < 8 * sizeof(struct page_shared_s));
diff --git a/rpython/translator/stm/src_stm/stm/setup.c b/rpython/translator/stm/src_stm/stm/setup.c
--- a/rpython/translator/stm/src_stm/stm/setup.c
+++ b/rpython/translator/stm/src_stm/stm/setup.c
@@ -250,8 +250,6 @@
     set_gs_register(get_segment_base(num + 1));
     s_mutex_unlock();
 
-    DEBUG_EXPECT_SEGFAULT(true);
-
     if (num == 0) {
         dprintf(("STM_GC_NURSERY: %d\n", STM_GC_NURSERY));
         dprintf(("NB_PAGES: %d\n", NB_PAGES));
diff --git a/rpython/translator/stm/src_stm/stm/setup.h b/rpython/translator/stm/src_stm/stm/setup.h
--- a/rpython/translator/stm/src_stm/stm/setup.h
+++ b/rpython/translator/stm/src_stm/stm/setup.h
@@ -3,8 +3,8 @@
 static void setup_protection_settings(void);
 static pthread_t *_get_cpth(stm_thread_local_t *);
 #ifndef NDEBUG
-static __thread long _stm_segfault_expected = 0;
-#define DEBUG_EXPECT_SEGFAULT(v) do {if (v) _stm_segfault_expected++; else _stm_segfault_expected--;} while (0)
+static __thread long _stm_segfault_expected = 1;
+#define DEBUG_EXPECT_SEGFAULT(v) do {if (v) _stm_segfault_expected++; else _stm_segfault_expected--; assert(_stm_segfault_expected <= 1);} while (0)
 #else
 #define DEBUG_EXPECT_SEGFAULT(v) {}
 #endif
diff --git a/rpython/translator/stm/src_stm/stmgc.c b/rpython/translator/stm/src_stm/stmgc.c
--- a/rpython/translator/stm/src_stm/stmgc.c
+++ b/rpython/translator/stm/src_stm/stmgc.c
@@ -39,3 +39,4 @@
 #include "stm/prof.c"
 #include "stm/rewind_setjmp.c"
 #include "stm/finalizer.c"
+#include "stm/hashtable.c"
diff --git a/rpython/translator/stm/src_stm/stmgc.h b/rpython/translator/stm/src_stm/stmgc.h
--- a/rpython/translator/stm/src_stm/stmgc.h
+++ b/rpython/translator/stm/src_stm/stmgc.h
@@ -196,10 +196,13 @@
         STM_SEGMENT->transaction_read_version;
 }
 
+#define _STM_WRITE_CHECK_SLOWPATH(obj)  \
+    UNLIKELY(((obj)->stm_flags & _STM_GCFLAG_WRITE_BARRIER) != 0)
+
 __attribute__((always_inline))
 static inline void stm_write(object_t *obj)
 {
-    if (UNLIKELY((obj->stm_flags & _STM_GCFLAG_WRITE_BARRIER) != 0))
+    if (_STM_WRITE_CHECK_SLOWPATH(obj))
         _stm_write_slowpath(obj);
 }
 
@@ -208,7 +211,7 @@
 static inline void stm_write_card(object_t *obj, uintptr_t index)
 {
     /* if GCFLAG_WRITE_BARRIER is set, then don't do anything more. */
-    if (UNLIKELY((obj->stm_flags & _STM_GCFLAG_WRITE_BARRIER) != 0)) {
+    if (_STM_WRITE_CHECK_SLOWPATH(obj)) {
 
         /* GCFLAG_WRITE_BARRIER is not set.  This might be because
            it's the first time we see a given small array; or it might
@@ -479,6 +482,38 @@
 /* dummies for now: */
 static inline void stm_flush_timing(stm_thread_local_t *tl, int verbose) {}
 
+
+/* Hashtables.  Keys are 64-bit unsigned integers, values are
+   'object_t *'.  Note that the type 'stm_hashtable_t' is not an
+   object type at all; you need to allocate and free it explicitly.
+   If you want to embed the hashtable inside an 'object_t' you
+   probably need a light finalizer to do the freeing. */
+typedef struct stm_hashtable_s stm_hashtable_t;
+typedef TLPREFIX struct stm_hashtable_entry_s stm_hashtable_entry_t;
+
+stm_hashtable_t *stm_hashtable_create(void);
+void stm_hashtable_free(stm_hashtable_t *);
+stm_hashtable_entry_t *stm_hashtable_lookup(object_t *, stm_hashtable_t *,
+                                            uintptr_t key);
+object_t *stm_hashtable_read(object_t *, stm_hashtable_t *, uintptr_t key);
+void stm_hashtable_write(object_t *, stm_hashtable_t *, uintptr_t key,
+                         object_t *nvalue, stm_thread_local_t *);
+void stm_hashtable_write_entry(object_t *hobj, stm_hashtable_entry_t *entry,
+                               object_t *nvalue);
+long stm_hashtable_length_upper_bound(stm_hashtable_t *);
+long stm_hashtable_list(object_t *, stm_hashtable_t *,
+                        stm_hashtable_entry_t **results);
+extern uint32_t stm_hashtable_entry_userdata;
+void stm_hashtable_tracefn(struct object_s *, stm_hashtable_t *,
+                           void (object_t **));
+
+struct stm_hashtable_entry_s {
+    struct object_s header;
+    uint32_t userdata;
+    uintptr_t index;
+    object_t *object;
+};
+
 /* ==================== END ==================== */
 
 static void (*stmcb_expand_marker)(char *segment_base, uintptr_t odd_number,