[pypy-commit] stmgc default: add hash/id/shadow
Raemi
noreply at buildbot.pypy.org
Tue Sep 9 11:33:13 CEST 2014
Author: Remi Meier <remi.meier at inf.ethz.ch>
Branch:
Changeset: r1374:30234e0f06ae
Date: 2014-09-09 11:07 +0200
http://bitbucket.org/pypy/stmgc/changeset/30234e0f06ae/
Log: add hash/id/shadow
diff --git a/c8/stm/core.c b/c8/stm/core.c
--- a/c8/stm/core.c
+++ b/c8/stm/core.c
@@ -326,6 +326,7 @@
assert(tree_is_cleared(STM_PSEGMENT->modified_old_objects));
assert(list_is_empty(STM_PSEGMENT->objects_pointing_to_nursery));
assert(tree_is_cleared(STM_PSEGMENT->young_outside_nursery));
+ assert(tree_is_cleared(STM_PSEGMENT->nursery_objects_shadows));
check_nursery_at_transaction_start();
diff --git a/c8/stm/core.h b/c8/stm/core.h
--- a/c8/stm/core.h
+++ b/c8/stm/core.h
@@ -35,6 +35,7 @@
enum /* stm_flags */ {
GCFLAG_WRITE_BARRIER = _STM_GCFLAG_WRITE_BARRIER,
+ GCFLAG_HAS_SHADOW = 0x02,
};
@@ -52,6 +53,7 @@
struct tree_s *modified_old_objects;
struct list_s *objects_pointing_to_nursery;
struct tree_s *young_outside_nursery;
+ struct tree_s *nursery_objects_shadows;
uint8_t privatization_lock;
diff --git a/c8/stm/hash_id.c b/c8/stm/hash_id.c
new file mode 100644
--- /dev/null
+++ b/c8/stm/hash_id.c
@@ -0,0 +1,69 @@
+#ifndef _STM_CORE_H_
+# error "must be compiled via stmgc.c"
+#endif
+
+
+static long mangle_hash(long i)
+{
+ /* To hash pointers in dictionaries. Assumes that i shows some
+ alignment (to 8, 16, maybe 32 bytes), so we use the following
+ formula to avoid the trailing bits being always 0. */
+ return i ^ (i >> 5);
+}
+
+static long id_or_identityhash(object_t *obj, bool is_hash)
+{
+ long result;
+
+ if (obj != NULL) {
+ if (_is_in_nursery(obj)) {
+ obj = find_shadow(obj);
+ }
+ else if (is_hash) {
+ if (obj->stm_flags & GCFLAG_HAS_SHADOW) {
+
+ /* For identityhash(), we need a special case for some
+ prebuilt objects: their hash must be the same before
+ and after translation. It is stored as an extra word
+ after the object. But we cannot use it for id()
+ because the stored value might clash with a real one.
+ */
+ struct object_s *realobj = (struct object_s *)
+ REAL_ADDRESS(STM_SEGMENT->segment_base, obj);
+ size_t size = stmcb_size_rounded_up(realobj);
+ result = *(long *)(((char *)realobj) + size);
+ /* Important: the returned value is not mangle_hash()ed! */
+ return result;
+ }
+ }
+ }
+
+ result = (long)(uintptr_t)obj;
+ if (is_hash) {
+ result = mangle_hash(result);
+ }
+ return result;
+}
+
+long stm_id(object_t *obj)
+{
+ return id_or_identityhash(obj, false);
+}
+
+long stm_identityhash(object_t *obj)
+{
+ return id_or_identityhash(obj, true);
+}
+
+void stm_set_prebuilt_identityhash(object_t *obj, long hash)
+{
+ struct object_s *realobj = (struct object_s *)
+ REAL_ADDRESS(stm_object_pages, obj);
+
+ assert(realobj->stm_flags == GCFLAG_WRITE_BARRIER);
+ realobj->stm_flags |= GCFLAG_HAS_SHADOW;
+
+ size_t size = stmcb_size_rounded_up(realobj);
+ assert(*(long *)(((char *)realobj) + size) == 0);
+ *(long *)(((char *)realobj) + size) = hash;
+}
diff --git a/c8/stm/nursery.c b/c8/stm/nursery.c
--- a/c8/stm/nursery.c
+++ b/c8/stm/nursery.c
@@ -45,6 +45,7 @@
/************************************************************/
+static object_t *find_existing_shadow(object_t *obj);
#define GCWORD_MOVED ((object_t *) -1)
static void minor_trace_if_young(object_t **pobj)
@@ -65,9 +66,23 @@
where the object moved to, is stored in the second word in 'obj'. */
object_t *TLPREFIX *pforwarded_array = (object_t *TLPREFIX *)obj;
- if (LIKELY(pforwarded_array[0] == GCWORD_MOVED)) {
- *pobj = pforwarded_array[1]; /* already moved */
- return;
+ if (obj->stm_flags & GCFLAG_HAS_SHADOW) {
+ /* ^^ the single check above detects both already-moved objects
+ and objects with HAS_SHADOW. This is because GCWORD_MOVED
+ overrides completely the stm_flags field with 1's bits. */
+
+ if (LIKELY(pforwarded_array[0] == GCWORD_MOVED)) {
+ *pobj = pforwarded_array[1]; /* already moved */
+ return;
+ }
+ else {
+ /* really has a shadow */
+ nobj = find_existing_shadow(obj);
+ obj->stm_flags &= ~GCFLAG_HAS_SHADOW;
+ realobj = REAL_ADDRESS(STM_SEGMENT->segment_base, obj);
+ size = stmcb_size_rounded_up((struct object_s *)realobj);
+ goto copy_large_object;
+ }
}
realobj = REAL_ADDRESS(STM_SEGMENT->segment_base, obj);
@@ -77,6 +92,7 @@
char *allocated = allocate_outside_nursery_large(size);
nobj = (object_t *)(allocated - stm_object_pages);
+ copy_large_object:;
char *realnobj = REAL_ADDRESS(STM_SEGMENT->segment_base, nobj);
memcpy(realnobj, realobj, size);
@@ -206,6 +222,7 @@
tree_clear(pseg->young_outside_nursery);
}
+ tree_clear(pseg->nursery_objects_shadows);
return nursery_used;
#pragma pop_macro("STM_SEGMENT")
@@ -330,3 +347,62 @@
STM_SEGMENT->nursery_current),
NURSERY_END - _stm_nursery_start);
}
+
+
+static object_t *allocate_shadow(object_t *obj)
+{
+ char *realobj = REAL_ADDRESS(STM_SEGMENT->segment_base, obj);
+ size_t size = stmcb_size_rounded_up((struct object_s *)realobj);
+
+ /* always gets outside as a large object for now */
+ char *allocated = allocate_outside_nursery_large(size);
+ object_t *nobj = (object_t *)(allocated - stm_object_pages);
+
+ /* Initialize the shadow enough to be considered a valid gc object.
+ If the original object stays alive at the next minor collection,
+ it will anyway be copied over the shadow and overwrite the
+ following fields. But if the object dies, then the shadow will
+ stay around and only be freed at the next major collection, at
+ which point we want it to look valid (but ready to be freed).
+
+ Here, in the general case, it requires copying the whole object.
+ It could be more optimized in special cases like in PyPy, by
+ copying only the typeid and (for var-sized objects) the length
+ field. It's probably overkill to add a special stmcb_xxx
+ interface just for that.
+ */
+ char *realnobj = REAL_ADDRESS(STM_SEGMENT->segment_base, nobj);
+ memcpy(realnobj, realobj, size);
+
+ obj->stm_flags |= GCFLAG_HAS_SHADOW;
+
+ tree_insert(STM_PSEGMENT->nursery_objects_shadows,
+ (uintptr_t)obj, (uintptr_t)nobj);
+ return nobj;
+}
+
+static object_t *find_existing_shadow(object_t *obj)
+{
+ wlog_t *item;
+
+ TREE_FIND(STM_PSEGMENT->nursery_objects_shadows,
+ (uintptr_t)obj, item, goto not_found);
+
+ /* The answer is the address of the shadow. */
+ return (object_t *)item->val;
+
+ not_found:
+ stm_fatalerror("GCFLAG_HAS_SHADOW but no shadow found");
+}
+
+static object_t *find_shadow(object_t *obj)
+{
+ /* The object 'obj' is still in the nursery. Find or allocate a
+ "shadow" object, which is where the object will be moved by the
+ next minor collection
+ */
+ if (obj->stm_flags & GCFLAG_HAS_SHADOW)
+ return find_existing_shadow(obj);
+ else
+ return allocate_shadow(obj);
+}
diff --git a/c8/stm/pages.c b/c8/stm/pages.c
--- a/c8/stm/pages.c
+++ b/c8/stm/pages.c
@@ -69,7 +69,6 @@
}
for (i = 0; i < NB_SEGMENTS; i++) {
- uint64_t bitmask = 1UL << i;
uintptr_t amount = count;
while (amount-->0) {
volatile struct page_shared_s *ps2 = (volatile struct page_shared_s *)
diff --git a/c8/stm/setup.c b/c8/stm/setup.c
--- a/c8/stm/setup.c
+++ b/c8/stm/setup.c
@@ -111,6 +111,7 @@
pr->modified_old_objects = tree_create();
pr->objects_pointing_to_nursery = list_create();
pr->young_outside_nursery = tree_create();
+ pr->nursery_objects_shadows = tree_create();
pr->last_commit_log_entry = &commit_log_root;
pr->pub.transaction_read_version = 0xff;
}
@@ -143,6 +144,7 @@
list_free(pr->objects_pointing_to_nursery);
tree_free(pr->modified_old_objects);
tree_free(pr->young_outside_nursery);
+ tree_free(pr->nursery_objects_shadows);
}
munmap(stm_object_pages, TOTAL_MEMORY);
diff --git a/c8/stmgc.c b/c8/stmgc.c
--- a/c8/stmgc.c
+++ b/c8/stmgc.c
@@ -22,4 +22,5 @@
#include "stm/setup.c"
#include "stm/fprintcolor.c"
#include "stm/rewind_setjmp.c"
+#include "stm/hash_id.c"
#include "stm/misc.c"
diff --git a/c8/test/support.py b/c8/test/support.py
--- a/c8/test/support.py
+++ b/c8/test/support.py
@@ -71,6 +71,11 @@
void _stm_set_nursery_free_count(uint64_t free_count);
+long stm_identityhash(object_t *obj);
+long stm_id(object_t *obj);
+void stm_set_prebuilt_identityhash(object_t *obj, uint64_t hash);
+
+
long _stm_count_modified_old_objects(void);
long _stm_count_objects_pointing_to_nursery(void);
object_t *_stm_enum_modified_old_objects(long index);
diff --git a/c8/test/test_hash_id.py b/c8/test/test_hash_id.py
new file mode 100644
--- /dev/null
+++ b/c8/test/test_hash_id.py
@@ -0,0 +1,74 @@
+from support import *
+from test_prebuilt import prebuilt
+import py
+
+class TestHashId(BaseTest):
+
+ def test_hash_old_object(self):
+ lp1 = stm_allocate_old(16)
+ lp2 = stm_allocate_old(16)
+ lp3 = stm_allocate_old(16)
+ lp4 = stm_allocate_old(16)
+ self.start_transaction()
+ h1 = lib.stm_identityhash(lp1)
+ h2 = lib.stm_identityhash(lp2)
+ h3 = lib.stm_identityhash(lp3)
+ h4 = lib.stm_identityhash(lp4)
+ assert len(set([h1, h2, h3, h4])) == 4 # guaranteed by the algo
+
+ def test_id_old_object(self):
+ lp1 = stm_allocate_old(16)
+ self.start_transaction()
+ h1 = lib.stm_id(lp1)
+ assert h1 == int(ffi.cast("long", lp1))
+
+ def test_set_prebuilt_identityhash(self):
+ static1 = prebuilt(16)
+ static2 = prebuilt(16)
+ lp1 = lib.stm_setup_prebuilt(static1)
+ lp2 = lib.stm_setup_prebuilt(static2)
+ lib.stm_set_prebuilt_identityhash(lp1, 42)
+ self.start_transaction()
+ h1 = lib.stm_identityhash(lp1)
+ h2 = lib.stm_identityhash(lp2)
+ assert h1 == 42
+ assert h2 != 0
+ h1 = lib.stm_id(lp1)
+ h2 = lib.stm_id(lp2)
+ assert h1 == int(ffi.cast("long", lp1))
+ assert h2 == int(ffi.cast("long", lp2))
+
+ def test_hash_nursery(self):
+ self.start_transaction()
+ lp1 = stm_allocate(16)
+ lp2 = stm_allocate(16)
+ lp3 = stm_allocate(16)
+ lp4 = stm_allocate(16)
+ h1 = lib.stm_identityhash(lp1)
+ h2 = lib.stm_identityhash(lp2)
+ h3 = lib.stm_identityhash(lp3)
+ h4 = lib.stm_identityhash(lp4)
+ assert len(set([h1, h2, h3, h4])) == 4 # guaranteed by the algo
+
+ def test_hash_lower_bits(self):
+ self.start_transaction()
+ lp1 = stm_allocate(32)
+ lp2 = stm_allocate(32)
+ lp3 = stm_allocate(32)
+ lp4 = stm_allocate(32)
+ h1 = lib.stm_identityhash(lp1)
+ h2 = lib.stm_identityhash(lp2)
+ h3 = lib.stm_identityhash(lp3)
+ h4 = lib.stm_identityhash(lp4)
+ assert len(set([h1 & 15, h2 & 15, h3 & 15, h4 & 15])) == 4
+
+ def test_hash_around_minor_collect(self):
+ self.start_transaction()
+ lp = stm_allocate(16)
+ h1 = lib.stm_identityhash(lp)
+ self.push_root(lp)
+ stm_minor_collect()
+ lp = self.pop_root()
+ h2 = lib.stm_identityhash(lp)
+ assert h2 == h1
+ assert h2 != lib.stm_id(lp)
More information about the pypy-commit
mailing list