[pypy-commit] stmgc default: add hash/id/shadow

Raemi noreply at buildbot.pypy.org
Tue Sep 9 11:33:13 CEST 2014


Author: Remi Meier <remi.meier at inf.ethz.ch>
Branch: 
Changeset: r1374:30234e0f06ae
Date: 2014-09-09 11:07 +0200
http://bitbucket.org/pypy/stmgc/changeset/30234e0f06ae/

Log:	add hash/id/shadow

diff --git a/c8/stm/core.c b/c8/stm/core.c
--- a/c8/stm/core.c
+++ b/c8/stm/core.c
@@ -326,6 +326,7 @@
     assert(tree_is_cleared(STM_PSEGMENT->modified_old_objects));
     assert(list_is_empty(STM_PSEGMENT->objects_pointing_to_nursery));
     assert(tree_is_cleared(STM_PSEGMENT->young_outside_nursery));
+    assert(tree_is_cleared(STM_PSEGMENT->nursery_objects_shadows));
 
     check_nursery_at_transaction_start();
 
diff --git a/c8/stm/core.h b/c8/stm/core.h
--- a/c8/stm/core.h
+++ b/c8/stm/core.h
@@ -35,6 +35,7 @@
 
 enum /* stm_flags */ {
     GCFLAG_WRITE_BARRIER = _STM_GCFLAG_WRITE_BARRIER,
+    GCFLAG_HAS_SHADOW = 0x02,
 };
 
 
@@ -52,6 +53,7 @@
     struct tree_s *modified_old_objects;
     struct list_s *objects_pointing_to_nursery;
     struct tree_s *young_outside_nursery;
+    struct tree_s *nursery_objects_shadows;
 
     uint8_t privatization_lock;
 
diff --git a/c8/stm/hash_id.c b/c8/stm/hash_id.c
new file mode 100644
--- /dev/null
+++ b/c8/stm/hash_id.c
@@ -0,0 +1,69 @@
+#ifndef _STM_CORE_H_
+# error "must be compiled via stmgc.c"
+#endif
+
+
+static long mangle_hash(long i)
+{
+    /* To hash pointers in dictionaries.  Assumes that i shows some
+       alignment (to 8, 16, maybe 32 bytes), so we use the following
+       formula to avoid the trailing bits being always 0. */
+    return i ^ (i >> 5);
+}
+
+static long id_or_identityhash(object_t *obj, bool is_hash)
+{
+    long result;
+
+    if (obj != NULL) {
+        if (_is_in_nursery(obj)) {
+            obj = find_shadow(obj);
+        }
+        else if (is_hash) {
+            if (obj->stm_flags & GCFLAG_HAS_SHADOW) {
+
+                /* For identityhash(), we need a special case for some
+                   prebuilt objects: their hash must be the same before
+                   and after translation.  It is stored as an extra word
+                   after the object.  But we cannot use it for id()
+                   because the stored value might clash with a real one.
+                */
+                struct object_s *realobj = (struct object_s *)
+                    REAL_ADDRESS(STM_SEGMENT->segment_base, obj);
+                size_t size = stmcb_size_rounded_up(realobj);
+                result = *(long *)(((char *)realobj) + size);
+                /* Important: the returned value is not mangle_hash()ed! */
+                return result;
+            }
+        }
+    }
+
+    result = (long)(uintptr_t)obj;
+    if (is_hash) {
+        result = mangle_hash(result);
+    }
+    return result;
+}
+
+long stm_id(object_t *obj)
+{
+    return id_or_identityhash(obj, false);
+}
+
+long stm_identityhash(object_t *obj)
+{
+    return id_or_identityhash(obj, true);
+}
+
+void stm_set_prebuilt_identityhash(object_t *obj, long hash)
+{
+    struct object_s *realobj = (struct object_s *)
+        REAL_ADDRESS(stm_object_pages, obj);
+
+    assert(realobj->stm_flags == GCFLAG_WRITE_BARRIER);
+    realobj->stm_flags |= GCFLAG_HAS_SHADOW;
+
+    size_t size = stmcb_size_rounded_up(realobj);
+    assert(*(long *)(((char *)realobj) + size) == 0);
+    *(long *)(((char *)realobj) + size) = hash;
+}
diff --git a/c8/stm/nursery.c b/c8/stm/nursery.c
--- a/c8/stm/nursery.c
+++ b/c8/stm/nursery.c
@@ -45,6 +45,7 @@
 
 
 /************************************************************/
+static object_t *find_existing_shadow(object_t *obj);
 #define GCWORD_MOVED  ((object_t *) -1)
 
 static void minor_trace_if_young(object_t **pobj)
@@ -65,9 +66,23 @@
            where the object moved to, is stored in the second word in 'obj'. */
         object_t *TLPREFIX *pforwarded_array = (object_t *TLPREFIX *)obj;
 
-        if (LIKELY(pforwarded_array[0] == GCWORD_MOVED)) {
-            *pobj = pforwarded_array[1];    /* already moved */
-            return;
+        if (obj->stm_flags & GCFLAG_HAS_SHADOW) {
+            /* ^^ the single check above detects both already-moved objects
+               and objects with HAS_SHADOW.  This is because GCWORD_MOVED
+               overrides completely the stm_flags field with 1's bits. */
+
+            if (LIKELY(pforwarded_array[0] == GCWORD_MOVED)) {
+                *pobj = pforwarded_array[1];    /* already moved */
+                return;
+            }
+            else {
+                /* really has a shadow */
+                nobj = find_existing_shadow(obj);
+                obj->stm_flags &= ~GCFLAG_HAS_SHADOW;
+                realobj = REAL_ADDRESS(STM_SEGMENT->segment_base, obj);
+                size = stmcb_size_rounded_up((struct object_s *)realobj);
+                goto copy_large_object;
+            }
         }
 
         realobj = REAL_ADDRESS(STM_SEGMENT->segment_base, obj);
@@ -77,6 +92,7 @@
         char *allocated = allocate_outside_nursery_large(size);
         nobj = (object_t *)(allocated - stm_object_pages);
 
+    copy_large_object:;
         char *realnobj = REAL_ADDRESS(STM_SEGMENT->segment_base, nobj);
         memcpy(realnobj, realobj, size);
 
@@ -206,6 +222,7 @@
         tree_clear(pseg->young_outside_nursery);
     }
 
+    tree_clear(pseg->nursery_objects_shadows);
 
     return nursery_used;
 #pragma pop_macro("STM_SEGMENT")
@@ -330,3 +347,62 @@
                                     STM_SEGMENT->nursery_current),
                        NURSERY_END - _stm_nursery_start);
 }
+
+
+static object_t *allocate_shadow(object_t *obj)
+{
+    char *realobj = REAL_ADDRESS(STM_SEGMENT->segment_base, obj);
+    size_t size = stmcb_size_rounded_up((struct object_s *)realobj);
+
+    /* always gets outside as a large object for now */
+    char *allocated = allocate_outside_nursery_large(size);
+    object_t *nobj = (object_t *)(allocated - stm_object_pages);
+
+    /* Initialize the shadow enough to be considered a valid gc object.
+       If the original object stays alive at the next minor collection,
+       it will anyway be copied over the shadow and overwrite the
+       following fields.  But if the object dies, then the shadow will
+       stay around and only be freed at the next major collection, at
+       which point we want it to look valid (but ready to be freed).
+
+       Here, in the general case, it requires copying the whole object.
+       It could be more optimized in special cases like in PyPy, by
+       copying only the typeid and (for var-sized objects) the length
+       field.  It's probably overkill to add a special stmcb_xxx
+       interface just for that.
+    */
+    char *realnobj = REAL_ADDRESS(STM_SEGMENT->segment_base, nobj);
+    memcpy(realnobj, realobj, size);
+
+    obj->stm_flags |= GCFLAG_HAS_SHADOW;
+
+    tree_insert(STM_PSEGMENT->nursery_objects_shadows,
+                (uintptr_t)obj, (uintptr_t)nobj);
+    return nobj;
+}
+
+static object_t *find_existing_shadow(object_t *obj)
+{
+    wlog_t *item;
+
+    TREE_FIND(STM_PSEGMENT->nursery_objects_shadows,
+              (uintptr_t)obj, item, goto not_found);
+
+    /* The answer is the address of the shadow. */
+    return (object_t *)item->val;
+
+ not_found:
+    stm_fatalerror("GCFLAG_HAS_SHADOW but no shadow found");
+}
+
+static object_t *find_shadow(object_t *obj)
+{
+    /* The object 'obj' is still in the nursery.  Find or allocate a
+        "shadow" object, which is where the object will be moved by the
+        next minor collection
+    */
+    if (obj->stm_flags & GCFLAG_HAS_SHADOW)
+        return find_existing_shadow(obj);
+    else
+        return allocate_shadow(obj);
+}
diff --git a/c8/stm/pages.c b/c8/stm/pages.c
--- a/c8/stm/pages.c
+++ b/c8/stm/pages.c
@@ -69,7 +69,6 @@
     }
 
     for (i = 0; i < NB_SEGMENTS; i++) {
-        uint64_t bitmask = 1UL << i;
         uintptr_t amount = count;
         while (amount-->0) {
             volatile struct page_shared_s *ps2 = (volatile struct page_shared_s *)
diff --git a/c8/stm/setup.c b/c8/stm/setup.c
--- a/c8/stm/setup.c
+++ b/c8/stm/setup.c
@@ -111,6 +111,7 @@
         pr->modified_old_objects = tree_create();
         pr->objects_pointing_to_nursery = list_create();
         pr->young_outside_nursery = tree_create();
+        pr->nursery_objects_shadows = tree_create();
         pr->last_commit_log_entry = &commit_log_root;
         pr->pub.transaction_read_version = 0xff;
     }
@@ -143,6 +144,7 @@
         list_free(pr->objects_pointing_to_nursery);
         tree_free(pr->modified_old_objects);
         tree_free(pr->young_outside_nursery);
+        tree_free(pr->nursery_objects_shadows);
     }
 
     munmap(stm_object_pages, TOTAL_MEMORY);
diff --git a/c8/stmgc.c b/c8/stmgc.c
--- a/c8/stmgc.c
+++ b/c8/stmgc.c
@@ -22,4 +22,5 @@
 #include "stm/setup.c"
 #include "stm/fprintcolor.c"
 #include "stm/rewind_setjmp.c"
+#include "stm/hash_id.c"
 #include "stm/misc.c"
diff --git a/c8/test/support.py b/c8/test/support.py
--- a/c8/test/support.py
+++ b/c8/test/support.py
@@ -71,6 +71,11 @@
 
 void _stm_set_nursery_free_count(uint64_t free_count);
 
+long stm_identityhash(object_t *obj);
+long stm_id(object_t *obj);
+void stm_set_prebuilt_identityhash(object_t *obj, uint64_t hash);
+
+
 long _stm_count_modified_old_objects(void);
 long _stm_count_objects_pointing_to_nursery(void);
 object_t *_stm_enum_modified_old_objects(long index);
diff --git a/c8/test/test_hash_id.py b/c8/test/test_hash_id.py
new file mode 100644
--- /dev/null
+++ b/c8/test/test_hash_id.py
@@ -0,0 +1,74 @@
+from support import *
+from test_prebuilt import prebuilt
+import py
+
+class TestHashId(BaseTest):
+
+    def test_hash_old_object(self):
+        lp1 = stm_allocate_old(16)
+        lp2 = stm_allocate_old(16)
+        lp3 = stm_allocate_old(16)
+        lp4 = stm_allocate_old(16)
+        self.start_transaction()
+        h1 = lib.stm_identityhash(lp1)
+        h2 = lib.stm_identityhash(lp2)
+        h3 = lib.stm_identityhash(lp3)
+        h4 = lib.stm_identityhash(lp4)
+        assert len(set([h1, h2, h3, h4])) == 4     # guaranteed by the algo
+
+    def test_id_old_object(self):
+        lp1 = stm_allocate_old(16)
+        self.start_transaction()
+        h1 = lib.stm_id(lp1)
+        assert h1 == int(ffi.cast("long", lp1))
+
+    def test_set_prebuilt_identityhash(self):
+        static1 = prebuilt(16)
+        static2 = prebuilt(16)
+        lp1 = lib.stm_setup_prebuilt(static1)
+        lp2 = lib.stm_setup_prebuilt(static2)
+        lib.stm_set_prebuilt_identityhash(lp1, 42)
+        self.start_transaction()
+        h1 = lib.stm_identityhash(lp1)
+        h2 = lib.stm_identityhash(lp2)
+        assert h1 == 42
+        assert h2 != 0
+        h1 = lib.stm_id(lp1)
+        h2 = lib.stm_id(lp2)
+        assert h1 == int(ffi.cast("long", lp1))
+        assert h2 == int(ffi.cast("long", lp2))
+
+    def test_hash_nursery(self):
+        self.start_transaction()
+        lp1 = stm_allocate(16)
+        lp2 = stm_allocate(16)
+        lp3 = stm_allocate(16)
+        lp4 = stm_allocate(16)
+        h1 = lib.stm_identityhash(lp1)
+        h2 = lib.stm_identityhash(lp2)
+        h3 = lib.stm_identityhash(lp3)
+        h4 = lib.stm_identityhash(lp4)
+        assert len(set([h1, h2, h3, h4])) == 4     # guaranteed by the algo
+
+    def test_hash_lower_bits(self):
+        self.start_transaction()
+        lp1 = stm_allocate(32)
+        lp2 = stm_allocate(32)
+        lp3 = stm_allocate(32)
+        lp4 = stm_allocate(32)
+        h1 = lib.stm_identityhash(lp1)
+        h2 = lib.stm_identityhash(lp2)
+        h3 = lib.stm_identityhash(lp3)
+        h4 = lib.stm_identityhash(lp4)
+        assert len(set([h1 & 15, h2 & 15, h3 & 15, h4 & 15])) == 4
+
+    def test_hash_around_minor_collect(self):
+        self.start_transaction()
+        lp = stm_allocate(16)
+        h1 = lib.stm_identityhash(lp)
+        self.push_root(lp)
+        stm_minor_collect()
+        lp = self.pop_root()
+        h2 = lib.stm_identityhash(lp)
+        assert h2 == h1
+        assert h2 != lib.stm_id(lp)


More information about the pypy-commit mailing list