[pypy-commit] stmgc default: Add stm_hashtable_iter*() functions. Contains a bit of delicate logic

arigo noreply at buildbot.pypy.org
Thu Nov 5 08:22:35 EST 2015


Author: Armin Rigo <arigo at tunes.org>
Branch: 
Changeset: r1964:6d3eb55f21f9
Date: 2015-11-05 14:23 +0100
http://bitbucket.org/pypy/stmgc/changeset/6d3eb55f21f9/

Log:	Add stm_hashtable_iter*() functions. Contains a bit of delicate
	logic to avoid tracing tables repeatedly during minor collection,
	which I think should be correct but is probably not

diff --git a/c8/stm/gcpage.c b/c8/stm/gcpage.c
--- a/c8/stm/gcpage.c
+++ b/c8/stm/gcpage.c
@@ -224,6 +224,9 @@
    version and thus don't need tracing. */
 static struct list_s *marked_objects_to_trace;
 
+/* a list of hobj/hashtable pairs for all hashtables seen */
+static struct list_s *all_hashtables_seen = NULL;
+
 /* we use the sharing seg0's pages for the GCFLAG_VISITED flag */
 
 static inline struct object_s *mark_loc(object_t *obj)
@@ -301,8 +304,6 @@
 }
 
 
-#define TRACE_FOR_MAJOR_COLLECTION  (&mark_record_trace)
-
 static void mark_and_trace(
     object_t *obj,
     char *segment_base, /* to trace obj in */
@@ -791,6 +792,7 @@
 
     /* marking */
     LIST_CREATE(marked_objects_to_trace);
+    LIST_CREATE(all_hashtables_seen);
     mark_visit_from_modified_objects();
     mark_visit_from_markers();
     mark_visit_from_roots();
@@ -815,6 +817,10 @@
     sweep_large_objects();
     sweep_small_objects();
 
+    /* hashtables */
+    stm_compact_hashtables();
+    LIST_FREE(all_hashtables_seen);
+
     dprintf((" | used after collection:  %ld\n",
              (long)pages_ctl.total_allocated));
     dprintf((" `----------------------------------------------\n"));
diff --git a/c8/stm/hashtable.c b/c8/stm/hashtable.c
--- a/c8/stm/hashtable.c
+++ b/c8/stm/hashtable.c
@@ -49,8 +49,12 @@
 #define PERTURB_SHIFT            5
 #define RESIZING_LOCK            0
 
-typedef struct {
-    uintptr_t mask;
+#define TRACE_FLAG_OFF              0
+#define TRACE_FLAG_ONCE             1
+#define TRACE_FLAG_KEEPALIVE        2
+
+typedef struct stm_hashtable_table_s {
+    uintptr_t mask;      /* 'mask' is always immutable. */
 
     /* 'resize_counter' start at an odd value, and is decremented (by
        6) for every new item put in 'items'.  When it crosses 0, we
@@ -63,6 +67,8 @@
     */
     uintptr_t resize_counter;
 
+    uint8_t trace_flag;
+
     stm_hashtable_entry_t *items[INITIAL_HASHTABLE_SIZE];
 } stm_hashtable_table_t;
 
@@ -79,6 +85,7 @@
 {
     table->mask = itemcount - 1;
     table->resize_counter = itemcount * 4 + 1;
+    table->trace_flag = TRACE_FLAG_OFF;
     memset(table->items, 0, itemcount * sizeof(stm_hashtable_entry_t *));
 }
 
@@ -162,6 +169,7 @@
     assert(biggertable);   // XXX
 
     stm_hashtable_table_t *table = hashtable->table;
+    table->trace_flag = TRACE_FLAG_ONCE;
     table->resize_counter = (uintptr_t)biggertable;
     /* ^^^ this unlocks the table by writing a non-zero value to
        table->resize_counter, but the new value is a pointer to the
@@ -485,6 +493,41 @@
 static void _stm_compact_hashtable(struct object_s *hobj,
                                    stm_hashtable_t *hashtable)
 {
+    /* Walk the chained list that starts at 'hashtable->initial_table'
+       and follows the 'resize_counter' fields.  Remove all tables
+       except (1) the initial one, (2) the most recent one, and (3)
+       the ones on which stm_hashtable_iter_tracefn() was called.
+    */
+    stm_hashtable_table_t *most_recent_table = hashtable->table;
+    assert(!IS_EVEN(most_recent_table->resize_counter));
+    /* set the "don't free me" flag on the most recent table */
+    most_recent_table->trace_flag = TRACE_FLAG_KEEPALIVE;
+
+    stm_hashtable_table_t *known_alive = &hashtable->initial_table;
+    known_alive->trace_flag = TRACE_FLAG_OFF;
+    /* a KEEPALIVE flag is ignored on the initial table: it is never
+       individually freed anyway */
+
+    while (known_alive != most_recent_table) {
+        uintptr_t rc = known_alive->resize_counter;
+        assert(IS_EVEN(rc));
+        assert(rc != RESIZING_LOCK);
+
+        stm_hashtable_table_t *next_table = (stm_hashtable_table_t *)rc;
+        if (next_table->trace_flag != TRACE_FLAG_KEEPALIVE) {
+            /* free this next table and relink the chained list to skip it */
+            assert(IS_EVEN(next_table->resize_counter));
+            known_alive->resize_counter = next_table->resize_counter;
+            free(next_table);
+        }
+        else {
+            /* this next table is kept alive */
+            known_alive = next_table;
+            known_alive->trace_flag = TRACE_FLAG_OFF;
+        }
+    }
+    /* done the first part */
+
     stm_hashtable_table_t *table = hashtable->table;
     uintptr_t rc = table->resize_counter;
     assert(!IS_EVEN(rc));
@@ -515,35 +558,24 @@
         dprintf(("compact with %ld items:\n", num_entries_times_6 / 6));
         _stm_rehash_hashtable(hashtable, count, segnum);
     }
+}
 
-    table = hashtable->table;
-    assert(!IS_EVEN(table->resize_counter));
-
-    if (table != &hashtable->initial_table) {
-        uintptr_t rc = hashtable->initial_table.resize_counter;
-        while (1) {
-            assert(IS_EVEN(rc));
-            assert(rc != RESIZING_LOCK);
-
-            stm_hashtable_table_t *old_table = (stm_hashtable_table_t *)rc;
-            if (old_table == table)
-                break;
-            rc = old_table->resize_counter;
-            free(old_table);
-        }
-        hashtable->initial_table.resize_counter = (uintptr_t)table;
-        assert(IS_EVEN(hashtable->initial_table.resize_counter));
+static void stm_compact_hashtables(void)
+{
+    uintptr_t i = all_hashtables_seen->count;
+    while (i > 0) {
+        i -= 2;
+        _stm_compact_hashtable(
+            (struct object_s *)all_hashtables_seen->items[i],
+            (stm_hashtable_t *)all_hashtables_seen->items[i + 1]);
     }
 }
 
-void stm_hashtable_tracefn(struct object_s *hobj, stm_hashtable_t *hashtable,
-                           void trace(object_t **))
+static void _hashtable_tracefn(stm_hashtable_table_t *table,
+                               void trace(object_t **))
 {
-    if (trace == TRACE_FOR_MAJOR_COLLECTION)
-        _stm_compact_hashtable(hobj, hashtable);
-
-    stm_hashtable_table_t *table;
-    table = VOLATILE_HASHTABLE(hashtable)->table;
+    if (table->trace_flag == TRACE_FLAG_ONCE)
+        table->trace_flag = TRACE_FLAG_OFF;
 
     uintptr_t j, mask = table->mask;
     for (j = 0; j <= mask; j++) {
@@ -554,3 +586,105 @@
         }
     }
 }
+
+void stm_hashtable_tracefn(struct object_s *hobj, stm_hashtable_t *hashtable,
+                           void trace(object_t **))
+{
+    if (all_hashtables_seen != NULL)
+        all_hashtables_seen = list_append2(all_hashtables_seen,
+                                           (uintptr_t)hobj,
+                                           (uintptr_t)hashtable);
+
+    _hashtable_tracefn(VOLATILE_HASHTABLE(hashtable)->table, trace);
+}
+
+
+/* Hashtable iterators */
+
+/* TRACE_FLAG_ONCE: the table must be traced once if it supports an iterator
+   TRACE_FLAG_OFF: the table is the most recent table, or has already been
+       traced once
+   TRACE_FLAG_KEEPALIVE: during major collection only: mark tables that
+       must be kept alive because there are iterators
+*/
+
+struct stm_hashtable_table_s *stm_hashtable_iter(stm_hashtable_t *hashtable)
+{
+    /* Get the table.  No synchronization is needed: we may miss some
+       entries that are being added, but they would contain NULL in
+       this segment anyway. */
+    return VOLATILE_HASHTABLE(hashtable)->table;
+}
+
+stm_hashtable_entry_t **
+stm_hashtable_iter_next(object_t *hobj, struct stm_hashtable_table_s *table,
+                        stm_hashtable_entry_t **previous)
+{
+    /* Set the read marker on hobj for every item, in case we have
+       transaction breaks in-between.
+    */
+    stm_read(hobj);
+
+    /* Get the bounds of the part of the 'stm_hashtable_entry_t *' array
+       that we have to check */
+    stm_hashtable_entry_t **pp, **last;
+    if (previous == NULL)
+        pp = table->items;
+    else
+        pp = previous + 1;
+    last = table->items + table->mask;
+
+    /* Find the first non-null entry */
+    stm_hashtable_entry_t *entry;
+
+    while (pp <= last) {
+        entry = *(stm_hashtable_entry_t *volatile *)pp;
+        if (entry != NULL) {
+            stm_read((object_t *)entry);
+            if (entry->object != NULL) {
+                //fprintf(stderr, "stm_hashtable_iter_next(%p, %p, %p) = %p\n",
+                //        hobj, table, previous, pp);
+                return pp;
+            }
+        }
+        ++pp;
+    }
+    //fprintf(stderr, "stm_hashtable_iter_next(%p, %p, %p) = %p\n",
+    //        hobj, table, previous, NULL);
+    return NULL;
+}
+
+void stm_hashtable_iter_tracefn(struct stm_hashtable_table_s *table,
+                                void trace(object_t **))
+{
+    if (all_hashtables_seen == NULL) {   /* for minor collections */
+
+        /* During minor collection, tracing the table is only required
+           the first time: if it contains young objects, they must be
+           kept alive and have their address updated.  We use
+           TRACE_FLAG_ONCE to know that.  We don't need to do it if
+           our 'table' is the latest version, because in that case it
+           will be done by stm_hashtable_tracefn().  That's why
+           TRACE_FLAG_ONCE is only set when a more recent table is
+           attached.
+
+           It is only needed once: non-latest-version tables are
+           immutable.  We mark once all the entries as old, and
+           then these now-old objects stay alive until the next
+           major collection.
+
+           Checking the flag can be done without synchronization: it
+           never wrong to call _hashtable_tracefn() too much, and the
+           only case where it *has to* be called occurs if the
+           hashtable object is still young (and not seen by other
+           threads).
+        */
+        if (table->trace_flag == TRACE_FLAG_ONCE)
+            _hashtable_tracefn(table, trace);
+    }
+    else {       /* for major collections */
+
+        /* Set this flag for _stm_compact_hashtable() */
+        table->trace_flag = TRACE_FLAG_KEEPALIVE;
+    }
+}
diff --git a/c8/stm/hashtable.h b/c8/stm/hashtable.h
new file mode 100644
--- /dev/null
+++ b/c8/stm/hashtable.h
@@ -0,0 +1,2 @@
+
+static void stm_compact_hashtables(void);
diff --git a/c8/stmgc.c b/c8/stmgc.c
--- a/c8/stmgc.c
+++ b/c8/stmgc.c
@@ -19,6 +19,7 @@
 #include "stm/finalizer.h"
 #include "stm/locks.h"
 #include "stm/detach.h"
+#include "stm/hashtable.h"
 #include "stm/queue.h"
 
 #include "stm/misc.c"
diff --git a/c8/stmgc.h b/c8/stmgc.h
--- a/c8/stmgc.h
+++ b/c8/stmgc.h
@@ -767,6 +767,18 @@
     object_t *object;
 };
 
+/* Hashtable iterators.  You get a raw 'table' pointer when you make an
+   iterator, which you pass to stm_hashtable_iter_next().  When the GC
+   traces, you must keep the table pointer alive with
+   stm_hashtable_iter_tracefn().  This may or may not return items added
+   after stm_hashtable_iter() was called. */
+struct stm_hashtable_table_s *stm_hashtable_iter(stm_hashtable_t *);
+stm_hashtable_entry_t **
+stm_hashtable_iter_next(object_t *hobj, struct stm_hashtable_table_s *table,
+                        stm_hashtable_entry_t **previous);
+void stm_hashtable_iter_tracefn(struct stm_hashtable_table_s *table,
+                                void trace(object_t **));
+
 
 /* Queues.  The items you put() and get() back are in random order.
    Like hashtables, the type 'stm_queue_t' is not an object type at
diff --git a/c8/test/support.py b/c8/test/support.py
--- a/c8/test/support.py
+++ b/c8/test/support.py
@@ -222,6 +222,15 @@
 long _stm_hashtable_list(object_t *o, stm_hashtable_t *h,
                          object_t *entries);
 
+object_t *_hashtable_iter(stm_hashtable_t *);
+stm_hashtable_entry_t *_hashtable_iter_next(object_t *, object_t *);
+struct stm_hashtable_table_s *stm_hashtable_iter(stm_hashtable_t *);
+stm_hashtable_entry_t **
+stm_hashtable_iter_next(object_t *hobj, struct stm_hashtable_table_s *table,
+                        stm_hashtable_entry_t **previous);
+void stm_hashtable_iter_tracefn(struct stm_hashtable_table_s *table,
+                                void trace(object_t **));
+
 void _set_hashtable(object_t *obj, stm_hashtable_t *h);
 stm_hashtable_t *_get_hashtable(object_t *obj);
 uintptr_t _get_entry_index(stm_hashtable_entry_t *entry);
@@ -419,6 +428,37 @@
     return stm_hashtable_list(o, h, NULL);
 }
 
+struct myiter_s {
+    struct myobj_s common;
+    struct stm_hashtable_table_s *table;
+    stm_hashtable_entry_t **previous;
+};
+typedef TLPREFIX struct myiter_s myiter_t;
+
+object_t *_hashtable_iter(stm_hashtable_t *h)
+{
+    myiter_t *iter = (myiter_t *)stm_allocate(sizeof(myiter_t));
+    _set_type_id(&iter->common.hdr, 421416);
+    iter->table = stm_hashtable_iter(h);
+    iter->previous = NULL;
+    return (object_t *)iter;
+}
+
+stm_hashtable_entry_t *_hashtable_iter_next(object_t *hobj, object_t *iterobj)
+{
+    stm_write(iterobj);
+    myiter_t *iter = (myiter_t *)iterobj;
+    assert(iter->common.type_id == 421416);
+    stm_hashtable_entry_t **pentry;
+    pentry = stm_hashtable_iter_next(hobj, iter->table, iter->previous);
+    if (pentry == NULL)
+        return NULL;
+    iter->previous = pentry;
+    //fprintf(stderr, "\tcontaining %p: index=%ld, object=%p\n",
+    //        *pentry, (*pentry)->index, (*pentry)->object);
+    return *pentry;
+}
+
 
 void _set_queue(object_t *obj, stm_queue_t *q)
 {
@@ -478,6 +518,9 @@
         if (myobj->type_id == 421417) {    /* queue */
             return sizeof(struct myobj_s) + 1 * sizeof(void*);
         }
+        if (myobj->type_id == 421416) {    /* hashtable iterator */
+            return sizeof(struct myobj_s) + 2 * sizeof(void*);
+        }
         /* basic case: tid equals 42 plus the size of the object */
         assert(myobj->type_id >= 42 + sizeof(struct myobj_s));
         assert((myobj->type_id - 42) >= 16);
@@ -514,6 +557,13 @@
         stm_queue_tracefn(q, visit);
         return;
     }
+    if (myobj->type_id == 421416) {
+        /* hashtable iterator */
+        struct stm_hashtable_table_s *table;
+        table = *(struct stm_hashtable_table_s **)(myobj + 1);
+        stm_hashtable_iter_tracefn(table, visit);
+        return;
+    }
     if (myobj->type_id < 421420) {
         /* basic case: no references */
         return;
@@ -538,6 +588,7 @@
     assert(myobj->type_id != 421419);
     assert(myobj->type_id != 421418);
     assert(myobj->type_id != 421417);
+    assert(myobj->type_id != 421416);
     if (myobj->type_id < 421420) {
         /* basic case: no references */
         return;
@@ -557,6 +608,7 @@
     assert(myobj->type_id != 421419);
     assert(myobj->type_id != 421418);
     assert(myobj->type_id != 421417);
+    assert(myobj->type_id != 421416);
     if (myobj->type_id < 421420) {
         offset_itemsize[0] = SIZEOF_MYOBJ;
         offset_itemsize[1] = 1;
@@ -699,6 +751,12 @@
     lib._set_hashtable(o, h)
     return o
 
+def hashtable_iter(hobj):
+    return lib._hashtable_iter(get_hashtable(hobj))
+
+def hashtable_iter_next(hobj, o):
+    return lib._hashtable_iter_next(hobj, o)
+
 def hashtable_lookup(hto, ht, idx):
     return ffi.cast("object_t*",
                     lib.stm_hashtable_lookup(hto, ht, idx))
diff --git a/c8/test/test_hashtable.py b/c8/test/test_hashtable.py
--- a/c8/test/test_hashtable.py
+++ b/c8/test/test_hashtable.py
@@ -455,8 +455,96 @@
         stm_major_collect()
         self.commit_transaction()
 
+    def test_iter_direct(self):
+        self.start_transaction()
+        h = self.allocate_hashtable()
+        tl0 = self.tls[self.current_thread]
+        expected = []
+        for i in range(29):
+            lp = stm_allocate(32)
+            htset(h, 19 ^ i, lp, tl0)
+            expected.append((19 ^ i, lp))
+        table = lib.stm_hashtable_iter(get_hashtable(h))
+        previous = ffi.NULL
+        seen = []
+        while True:
+            next_pentry = lib.stm_hashtable_iter_next(h, table, previous)
+            if next_pentry == ffi.NULL:
+                break
+            previous = next_pentry
+            entry = next_pentry[0]
+            seen.append((lib._get_entry_index(entry),
+                         lib._get_entry_object(entry)))
+        assert len(seen) == 29
+        assert sorted(seen) == sorted(expected)
 
+    def test_iter_object(self):
+        self.start_transaction()
+        h = self.allocate_hashtable()
+        tl0 = self.tls[self.current_thread]
+        expected = []
+        for i in range(29):
+            lp = stm_allocate(32)
+            htset(h, 19 ^ i, lp, tl0)
+            expected.append((19 ^ i, lp))
+        iterobj = hashtable_iter(h)
+        seen = []
+        while True:
+            entry = hashtable_iter_next(h, iterobj)
+            if entry == ffi.NULL:
+                break
+            seen.append((lib._get_entry_index(entry),
+                         lib._get_entry_object(entry)))
+        assert len(seen) == 29
+        assert sorted(seen) == sorted(expected)
 
+    def test_iter_collections(self):
+        def advance(iterobj, seen):
+            entry = hashtable_iter_next(h, iterobj)
+            if entry == ffi.NULL:
+                seen.add('DONE')
+            else:
+                item = (lib._get_entry_index(entry),
+                        lib._get_entry_object(entry))
+                assert htget(h, item[0]) == item[1]
+                assert item[0] not in seen
+                seen.add(item[0])
+
+        self.start_transaction()
+        h = self.allocate_hashtable()
+        tl0 = self.tls[self.current_thread]
+        iterators = []
+        for i in range(250):
+            lp = stm_allocate(32)
+            htset(h, 19 ^ i, lp, tl0)
+            iterators.append((hashtable_iter(h), i, set()))
+
+            iterobj, _, seen = random.choice(iterators)
+            if 'DONE' not in seen:
+                advance(iterobj, seen)
+
+            self.push_root(h)
+            for iterobj, _, _ in iterators:
+                self.push_root(iterobj)
+            if i % 49 == 48:
+                stm_major_collect()
+            elif i % 7 == 6:
+                stm_minor_collect()
+            for i, prev in reversed(list(enumerate(iterators))):
+                iterators[i] = (self.pop_root(),) + prev[1:]
+            h = self.pop_root()
+
+        for iterobj, _, seen in iterators:
+            while 'DONE' not in seen:
+                advance(iterobj, seen)
+
+        maximum = set([j ^ 19 for j in range(250)])
+        maximum.add('DONE')
+        for iterobj, i, seen in iterators:
+            assert seen.issubset(maximum)
+            for j in range(i):
+                assert j ^ 19 in seen
+            # we may also have seen more items added later
 
 
 class TestRandomHashtable(BaseTestHashtable):


More information about the pypy-commit mailing list