[pypy-commit] stmgc default: Add stm_hashtable_iter*() functions. Contains a bit of delicate logic
arigo
noreply at buildbot.pypy.org
Thu Nov 5 08:22:35 EST 2015
Author: Armin Rigo <arigo at tunes.org>
Branch:
Changeset: r1964:6d3eb55f21f9
Date: 2015-11-05 14:23 +0100
http://bitbucket.org/pypy/stmgc/changeset/6d3eb55f21f9/
Log: Add stm_hashtable_iter*() functions. Contains a bit of delicate
logic to avoid tracing tables repeatedly during minor collection,
which I think should be correct but is probably not
diff --git a/c8/stm/gcpage.c b/c8/stm/gcpage.c
--- a/c8/stm/gcpage.c
+++ b/c8/stm/gcpage.c
@@ -224,6 +224,9 @@
version and thus don't need tracing. */
static struct list_s *marked_objects_to_trace;
+/* a list of hobj/hashtable pairs for all hashtables seen */
+static struct list_s *all_hashtables_seen = NULL;
+
/* we use the sharing seg0's pages for the GCFLAG_VISITED flag */
static inline struct object_s *mark_loc(object_t *obj)
@@ -301,8 +304,6 @@
}
-#define TRACE_FOR_MAJOR_COLLECTION (&mark_record_trace)
-
static void mark_and_trace(
object_t *obj,
char *segment_base, /* to trace obj in */
@@ -791,6 +792,7 @@
/* marking */
LIST_CREATE(marked_objects_to_trace);
+ LIST_CREATE(all_hashtables_seen);
mark_visit_from_modified_objects();
mark_visit_from_markers();
mark_visit_from_roots();
@@ -815,6 +817,10 @@
sweep_large_objects();
sweep_small_objects();
+ /* hashtables */
+ stm_compact_hashtables();
+ LIST_FREE(all_hashtables_seen);
+
dprintf((" | used after collection: %ld\n",
(long)pages_ctl.total_allocated));
dprintf((" `----------------------------------------------\n"));
diff --git a/c8/stm/hashtable.c b/c8/stm/hashtable.c
--- a/c8/stm/hashtable.c
+++ b/c8/stm/hashtable.c
@@ -49,8 +49,12 @@
#define PERTURB_SHIFT 5
#define RESIZING_LOCK 0
-typedef struct {
- uintptr_t mask;
+#define TRACE_FLAG_OFF 0
+#define TRACE_FLAG_ONCE 1
+#define TRACE_FLAG_KEEPALIVE 2
+
+typedef struct stm_hashtable_table_s {
+ uintptr_t mask; /* 'mask' is always immutable. */
/* 'resize_counter' start at an odd value, and is decremented (by
6) for every new item put in 'items'. When it crosses 0, we
@@ -63,6 +67,8 @@
*/
uintptr_t resize_counter;
+ uint8_t trace_flag;
+
stm_hashtable_entry_t *items[INITIAL_HASHTABLE_SIZE];
} stm_hashtable_table_t;
@@ -79,6 +85,7 @@
{
table->mask = itemcount - 1;
table->resize_counter = itemcount * 4 + 1;
+ table->trace_flag = TRACE_FLAG_OFF;
memset(table->items, 0, itemcount * sizeof(stm_hashtable_entry_t *));
}
@@ -162,6 +169,7 @@
assert(biggertable); // XXX
stm_hashtable_table_t *table = hashtable->table;
+ table->trace_flag = TRACE_FLAG_ONCE;
table->resize_counter = (uintptr_t)biggertable;
/* ^^^ this unlocks the table by writing a non-zero value to
table->resize_counter, but the new value is a pointer to the
@@ -485,6 +493,41 @@
static void _stm_compact_hashtable(struct object_s *hobj,
stm_hashtable_t *hashtable)
{
+ /* Walk the chained list that starts at 'hashtable->initial_table'
+ and follows the 'resize_counter' fields. Remove all tables
+ except (1) the initial one, (2) the most recent one, and (3)
+ the ones on which stm_hashtable_iter_tracefn() was called.
+ */
+ stm_hashtable_table_t *most_recent_table = hashtable->table;
+ assert(!IS_EVEN(most_recent_table->resize_counter));
+ /* set the "don't free me" flag on the most recent table */
+ most_recent_table->trace_flag = TRACE_FLAG_KEEPALIVE;
+
+ stm_hashtable_table_t *known_alive = &hashtable->initial_table;
+ known_alive->trace_flag = TRACE_FLAG_OFF;
+ /* a KEEPALIVE flag is ignored on the initial table: it is never
+ individually freed anyway */
+
+ while (known_alive != most_recent_table) {
+ uintptr_t rc = known_alive->resize_counter;
+ assert(IS_EVEN(rc));
+ assert(rc != RESIZING_LOCK);
+
+ stm_hashtable_table_t *next_table = (stm_hashtable_table_t *)rc;
+ if (next_table->trace_flag != TRACE_FLAG_KEEPALIVE) {
+ /* free this next table and relink the chained list to skip it */
+ assert(IS_EVEN(next_table->resize_counter));
+ known_alive->resize_counter = next_table->resize_counter;
+ free(next_table);
+ }
+ else {
+ /* this next table is kept alive */
+ known_alive = next_table;
+ known_alive->trace_flag = TRACE_FLAG_OFF;
+ }
+ }
+ /* done the first part */
+
stm_hashtable_table_t *table = hashtable->table;
uintptr_t rc = table->resize_counter;
assert(!IS_EVEN(rc));
@@ -515,35 +558,24 @@
dprintf(("compact with %ld items:\n", num_entries_times_6 / 6));
_stm_rehash_hashtable(hashtable, count, segnum);
}
+}
- table = hashtable->table;
- assert(!IS_EVEN(table->resize_counter));
-
- if (table != &hashtable->initial_table) {
- uintptr_t rc = hashtable->initial_table.resize_counter;
- while (1) {
- assert(IS_EVEN(rc));
- assert(rc != RESIZING_LOCK);
-
- stm_hashtable_table_t *old_table = (stm_hashtable_table_t *)rc;
- if (old_table == table)
- break;
- rc = old_table->resize_counter;
- free(old_table);
- }
- hashtable->initial_table.resize_counter = (uintptr_t)table;
- assert(IS_EVEN(hashtable->initial_table.resize_counter));
+static void stm_compact_hashtables(void)
+{
+ uintptr_t i = all_hashtables_seen->count;
+ while (i > 0) {
+ i -= 2;
+ _stm_compact_hashtable(
+ (struct object_s *)all_hashtables_seen->items[i],
+ (stm_hashtable_t *)all_hashtables_seen->items[i + 1]);
}
}
-void stm_hashtable_tracefn(struct object_s *hobj, stm_hashtable_t *hashtable,
- void trace(object_t **))
+static void _hashtable_tracefn(stm_hashtable_table_t *table,
+ void trace(object_t **))
{
- if (trace == TRACE_FOR_MAJOR_COLLECTION)
- _stm_compact_hashtable(hobj, hashtable);
-
- stm_hashtable_table_t *table;
- table = VOLATILE_HASHTABLE(hashtable)->table;
+ if (table->trace_flag == TRACE_FLAG_ONCE)
+ table->trace_flag = TRACE_FLAG_OFF;
uintptr_t j, mask = table->mask;
for (j = 0; j <= mask; j++) {
@@ -554,3 +586,105 @@
}
}
}
+
+void stm_hashtable_tracefn(struct object_s *hobj, stm_hashtable_t *hashtable,
+ void trace(object_t **))
+{
+ if (all_hashtables_seen != NULL)
+ all_hashtables_seen = list_append2(all_hashtables_seen,
+ (uintptr_t)hobj,
+ (uintptr_t)hashtable);
+
+ _hashtable_tracefn(VOLATILE_HASHTABLE(hashtable)->table, trace);
+}
+
+
+/* Hashtable iterators */
+
+/* TRACE_FLAG_ONCE: the table must be traced once if it supports an iterator
+ TRACE_FLAG_OFF: the table is the most recent table, or has already been
+ traced once
+ TRACE_FLAG_KEEPALIVE: during major collection only: mark tables that
+ must be kept alive because there are iterators
+*/
+
+struct stm_hashtable_table_s *stm_hashtable_iter(stm_hashtable_t *hashtable)
+{
+ /* Get the table. No synchronization is needed: we may miss some
+ entries that are being added, but they would contain NULL in
+ this segment anyway. */
+ return VOLATILE_HASHTABLE(hashtable)->table;
+}
+
+stm_hashtable_entry_t **
+stm_hashtable_iter_next(object_t *hobj, struct stm_hashtable_table_s *table,
+ stm_hashtable_entry_t **previous)
+{
+ /* Set the read marker on hobj for every item, in case we have
+ transaction breaks in-between.
+ */
+ stm_read(hobj);
+
+ /* Get the bounds of the part of the 'stm_hashtable_entry_t *' array
+ that we have to check */
+ stm_hashtable_entry_t **pp, **last;
+ if (previous == NULL)
+ pp = table->items;
+ else
+ pp = previous + 1;
+ last = table->items + table->mask;
+
+ /* Find the first non-null entry */
+ stm_hashtable_entry_t *entry;
+
+ while (pp <= last) {
+ entry = *(stm_hashtable_entry_t *volatile *)pp;
+ if (entry != NULL) {
+ stm_read((object_t *)entry);
+ if (entry->object != NULL) {
+ //fprintf(stderr, "stm_hashtable_iter_next(%p, %p, %p) = %p\n",
+ // hobj, table, previous, pp);
+ return pp;
+ }
+ }
+ ++pp;
+ }
+ //fprintf(stderr, "stm_hashtable_iter_next(%p, %p, %p) = %p\n",
+ // hobj, table, previous, NULL);
+ return NULL;
+}
+
+void stm_hashtable_iter_tracefn(struct stm_hashtable_table_s *table,
+ void trace(object_t **))
+{
+ if (all_hashtables_seen == NULL) { /* for minor collections */
+
+ /* During minor collection, tracing the table is only required
+ the first time: if it contains young objects, they must be
+ kept alive and have their address updated. We use
+ TRACE_FLAG_ONCE to know that. We don't need to do it if
+ our 'table' is the latest version, because in that case it
+ will be done by stm_hashtable_tracefn(). That's why
+ TRACE_FLAG_ONCE is only set when a more recent table is
+ attached.
+
+ It is only needed once: non-latest-version tables are
+ immutable. We mark once all the entries as old, and
+ then these now-old objects stay alive until the next
+ major collection.
+
+ Checking the flag can be done without synchronization: it
+ never wrong to call _hashtable_tracefn() too much, and the
+ only case where it *has to* be called occurs if the
+ hashtable object is still young (and not seen by other
+ threads).
+ */
+ if (table->trace_flag == TRACE_FLAG_ONCE)
+ _hashtable_tracefn(table, trace);
+ }
+ else { /* for major collections */
+
+ /* Set this flag for _stm_compact_hashtable() */
+ table->trace_flag = TRACE_FLAG_KEEPALIVE;
+ }
+}
diff --git a/c8/stm/hashtable.h b/c8/stm/hashtable.h
new file mode 100644
--- /dev/null
+++ b/c8/stm/hashtable.h
@@ -0,0 +1,2 @@
+
+static void stm_compact_hashtables(void);
diff --git a/c8/stmgc.c b/c8/stmgc.c
--- a/c8/stmgc.c
+++ b/c8/stmgc.c
@@ -19,6 +19,7 @@
#include "stm/finalizer.h"
#include "stm/locks.h"
#include "stm/detach.h"
+#include "stm/hashtable.h"
#include "stm/queue.h"
#include "stm/misc.c"
diff --git a/c8/stmgc.h b/c8/stmgc.h
--- a/c8/stmgc.h
+++ b/c8/stmgc.h
@@ -767,6 +767,18 @@
object_t *object;
};
+/* Hashtable iterators. You get a raw 'table' pointer when you make an
+ iterator, which you pass to stm_hashtable_iter_next(). When the GC
+ traces, you must keep the table pointer alive with
+ stm_hashtable_iter_tracefn(). This may or may not return items added
+ after stm_hashtable_iter() was called. */
+struct stm_hashtable_table_s *stm_hashtable_iter(stm_hashtable_t *);
+stm_hashtable_entry_t **
+stm_hashtable_iter_next(object_t *hobj, struct stm_hashtable_table_s *table,
+ stm_hashtable_entry_t **previous);
+void stm_hashtable_iter_tracefn(struct stm_hashtable_table_s *table,
+ void trace(object_t **));
+
/* Queues. The items you put() and get() back are in random order.
Like hashtables, the type 'stm_queue_t' is not an object type at
diff --git a/c8/test/support.py b/c8/test/support.py
--- a/c8/test/support.py
+++ b/c8/test/support.py
@@ -222,6 +222,15 @@
long _stm_hashtable_list(object_t *o, stm_hashtable_t *h,
object_t *entries);
+object_t *_hashtable_iter(stm_hashtable_t *);
+stm_hashtable_entry_t *_hashtable_iter_next(object_t *, object_t *);
+struct stm_hashtable_table_s *stm_hashtable_iter(stm_hashtable_t *);
+stm_hashtable_entry_t **
+stm_hashtable_iter_next(object_t *hobj, struct stm_hashtable_table_s *table,
+ stm_hashtable_entry_t **previous);
+void stm_hashtable_iter_tracefn(struct stm_hashtable_table_s *table,
+ void trace(object_t **));
+
void _set_hashtable(object_t *obj, stm_hashtable_t *h);
stm_hashtable_t *_get_hashtable(object_t *obj);
uintptr_t _get_entry_index(stm_hashtable_entry_t *entry);
@@ -419,6 +428,37 @@
return stm_hashtable_list(o, h, NULL);
}
+struct myiter_s {
+ struct myobj_s common;
+ struct stm_hashtable_table_s *table;
+ stm_hashtable_entry_t **previous;
+};
+typedef TLPREFIX struct myiter_s myiter_t;
+
+object_t *_hashtable_iter(stm_hashtable_t *h)
+{
+ myiter_t *iter = (myiter_t *)stm_allocate(sizeof(myiter_t));
+ _set_type_id(&iter->common.hdr, 421416);
+ iter->table = stm_hashtable_iter(h);
+ iter->previous = NULL;
+ return (object_t *)iter;
+}
+
+stm_hashtable_entry_t *_hashtable_iter_next(object_t *hobj, object_t *iterobj)
+{
+ stm_write(iterobj);
+ myiter_t *iter = (myiter_t *)iterobj;
+ assert(iter->common.type_id == 421416);
+ stm_hashtable_entry_t **pentry;
+ pentry = stm_hashtable_iter_next(hobj, iter->table, iter->previous);
+ if (pentry == NULL)
+ return NULL;
+ iter->previous = pentry;
+ //fprintf(stderr, "\tcontaining %p: index=%ld, object=%p\n",
+ // *pentry, (*pentry)->index, (*pentry)->object);
+ return *pentry;
+}
+
void _set_queue(object_t *obj, stm_queue_t *q)
{
@@ -478,6 +518,9 @@
if (myobj->type_id == 421417) { /* queue */
return sizeof(struct myobj_s) + 1 * sizeof(void*);
}
+ if (myobj->type_id == 421416) { /* hashtable iterator */
+ return sizeof(struct myobj_s) + 2 * sizeof(void*);
+ }
/* basic case: tid equals 42 plus the size of the object */
assert(myobj->type_id >= 42 + sizeof(struct myobj_s));
assert((myobj->type_id - 42) >= 16);
@@ -514,6 +557,13 @@
stm_queue_tracefn(q, visit);
return;
}
+ if (myobj->type_id == 421416) {
+ /* hashtable iterator */
+ struct stm_hashtable_table_s *table;
+ table = *(struct stm_hashtable_table_s **)(myobj + 1);
+ stm_hashtable_iter_tracefn(table, visit);
+ return;
+ }
if (myobj->type_id < 421420) {
/* basic case: no references */
return;
@@ -538,6 +588,7 @@
assert(myobj->type_id != 421419);
assert(myobj->type_id != 421418);
assert(myobj->type_id != 421417);
+ assert(myobj->type_id != 421416);
if (myobj->type_id < 421420) {
/* basic case: no references */
return;
@@ -557,6 +608,7 @@
assert(myobj->type_id != 421419);
assert(myobj->type_id != 421418);
assert(myobj->type_id != 421417);
+ assert(myobj->type_id != 421416);
if (myobj->type_id < 421420) {
offset_itemsize[0] = SIZEOF_MYOBJ;
offset_itemsize[1] = 1;
@@ -699,6 +751,12 @@
lib._set_hashtable(o, h)
return o
+def hashtable_iter(hobj):
+ return lib._hashtable_iter(get_hashtable(hobj))
+
+def hashtable_iter_next(hobj, o):
+ return lib._hashtable_iter_next(hobj, o)
+
def hashtable_lookup(hto, ht, idx):
return ffi.cast("object_t*",
lib.stm_hashtable_lookup(hto, ht, idx))
diff --git a/c8/test/test_hashtable.py b/c8/test/test_hashtable.py
--- a/c8/test/test_hashtable.py
+++ b/c8/test/test_hashtable.py
@@ -455,8 +455,96 @@
stm_major_collect()
self.commit_transaction()
+ def test_iter_direct(self):
+ self.start_transaction()
+ h = self.allocate_hashtable()
+ tl0 = self.tls[self.current_thread]
+ expected = []
+ for i in range(29):
+ lp = stm_allocate(32)
+ htset(h, 19 ^ i, lp, tl0)
+ expected.append((19 ^ i, lp))
+ table = lib.stm_hashtable_iter(get_hashtable(h))
+ previous = ffi.NULL
+ seen = []
+ while True:
+ next_pentry = lib.stm_hashtable_iter_next(h, table, previous)
+ if next_pentry == ffi.NULL:
+ break
+ previous = next_pentry
+ entry = next_pentry[0]
+ seen.append((lib._get_entry_index(entry),
+ lib._get_entry_object(entry)))
+ assert len(seen) == 29
+ assert sorted(seen) == sorted(expected)
+ def test_iter_object(self):
+ self.start_transaction()
+ h = self.allocate_hashtable()
+ tl0 = self.tls[self.current_thread]
+ expected = []
+ for i in range(29):
+ lp = stm_allocate(32)
+ htset(h, 19 ^ i, lp, tl0)
+ expected.append((19 ^ i, lp))
+ iterobj = hashtable_iter(h)
+ seen = []
+ while True:
+ entry = hashtable_iter_next(h, iterobj)
+ if entry == ffi.NULL:
+ break
+ seen.append((lib._get_entry_index(entry),
+ lib._get_entry_object(entry)))
+ assert len(seen) == 29
+ assert sorted(seen) == sorted(expected)
+ def test_iter_collections(self):
+ def advance(iterobj, seen):
+ entry = hashtable_iter_next(h, iterobj)
+ if entry == ffi.NULL:
+ seen.add('DONE')
+ else:
+ item = (lib._get_entry_index(entry),
+ lib._get_entry_object(entry))
+ assert htget(h, item[0]) == item[1]
+ assert item[0] not in seen
+ seen.add(item[0])
+
+ self.start_transaction()
+ h = self.allocate_hashtable()
+ tl0 = self.tls[self.current_thread]
+ iterators = []
+ for i in range(250):
+ lp = stm_allocate(32)
+ htset(h, 19 ^ i, lp, tl0)
+ iterators.append((hashtable_iter(h), i, set()))
+
+ iterobj, _, seen = random.choice(iterators)
+ if 'DONE' not in seen:
+ advance(iterobj, seen)
+
+ self.push_root(h)
+ for iterobj, _, _ in iterators:
+ self.push_root(iterobj)
+ if i % 49 == 48:
+ stm_major_collect()
+ elif i % 7 == 6:
+ stm_minor_collect()
+ for i, prev in reversed(list(enumerate(iterators))):
+ iterators[i] = (self.pop_root(),) + prev[1:]
+ h = self.pop_root()
+
+ for iterobj, _, seen in iterators:
+ while 'DONE' not in seen:
+ advance(iterobj, seen)
+
+ maximum = set([j ^ 19 for j in range(250)])
+ maximum.add('DONE')
+ for iterobj, i, seen in iterators:
+ assert seen.issubset(maximum)
+ for j in range(i):
+ assert j ^ 19 in seen
+ # we may also have seen more items added later
class TestRandomHashtable(BaseTestHashtable):
More information about the pypy-commit
mailing list