[pypy-commit] pypy stmgc-c8-gcc: fix for stmdict.get_items_w() and similar

Tue Aug 25 16:53:02 CEST 2015

Author: Remi Meier <remi.meier at inf.ethz.ch>
Branch: stmgc-c8-gcc
Changeset: r79226:861d6cdfd881
Date: 2015-08-25 16:55 +0200
http://bitbucket.org/pypy/pypy/changeset/861d6cdfd881/

Log:	fix for stmdict.get_items_w() and similar

	Import a new stmgc version that changes the interface of
	stm_hashtable_list to accept a GC array. Before, we used a raw-array
	that at some point contained GC references, which is not good if a
	minor GC occurs and we read from it later.

diff --git a/pypy/module/pypystm/hashtable.py b/pypy/module/pypystm/hashtable.py
--- a/pypy/module/pypystm/hashtable.py
+++ b/pypy/module/pypystm/hashtable.py
@@ -61,30 +61,21 @@
 
     def keys_w(self, space):
         array, count = self.h.list()
-        try:
-            lst = [intmask(array[i].index) for i in range(count)]
-        finally:
-            self.h.freelist(array)
+        lst = [intmask(array[i].index) for i in range(count)]
         return space.newlist_int(lst)
 
     def values_w(self, space):
         array, count = self.h.list()
-        try:
-            lst_w = [cast_gcref_to_instance(W_Root, array[i].object)
-                     for i in range(count)]
-        finally:
-            self.h.freelist(array)
+        lst_w = [cast_gcref_to_instance(W_Root, array[i].object)
+                 for i in range(count)]
         return space.newlist(lst_w)
 
     def items_w(self, space):
         array, count = self.h.list()
-        try:
-            lst_w = [space.newtuple([
-                         space.wrap(intmask(array[i].index)),
-                         cast_gcref_to_instance(W_Root, array[i].object)])
-                     for i in range(count)]
-        finally:
-            self.h.freelist(array)
+        lst_w = [space.newtuple([
+            space.wrap(intmask(array[i].index)),
+            cast_gcref_to_instance(W_Root, array[i].object)])
+                 for i in range(count)]
         return space.newlist(lst_w)
 
 
diff --git a/pypy/module/pypystm/stmdict.py b/pypy/module/pypystm/stmdict.py
--- a/pypy/module/pypystm/stmdict.py
+++ b/pypy/module/pypystm/stmdict.py
@@ -157,49 +157,40 @@
 
     def get_length(self):
         array, count = self.h.list()
-        try:
-            total_length_times_two = 0
-            for i in range(count):
-                subarray = lltype.cast_opaque_ptr(PARRAY, array[i].object)
-                assert subarray
-                total_length_times_two += len(subarray)
-        finally:
-            self.h.freelist(array)
+        total_length_times_two = 0
+        for i in range(count):
+            subarray = lltype.cast_opaque_ptr(PARRAY, array[i].object)
+            assert subarray
+            total_length_times_two += len(subarray)
         return total_length_times_two >> 1
 
     def get_keys_values_w(self, offset):
         array, count = self.h.list()
-        try:
-            result_list_w = []
-            for i in range(count):
-                subarray = lltype.cast_opaque_ptr(PARRAY, array[i].object)
-                assert subarray
-                j = offset
-                limit = len(subarray)
-                while j < limit:
-                    w_item = cast_gcref_to_instance(W_Root, subarray[j])
-                    result_list_w.append(w_item)
-                    j += 2
-        finally:
-            self.h.freelist(array)
+        result_list_w = []
+        for i in range(count):
+            subarray = lltype.cast_opaque_ptr(PARRAY, array[i].object)
+            assert subarray
+            j = offset
+            limit = len(subarray)
+            while j < limit:
+                w_item = cast_gcref_to_instance(W_Root, subarray[j])
+                result_list_w.append(w_item)
+                j += 2
         return result_list_w
 
     def get_items_w(self, space):
         array, count = self.h.list()
-        try:
-            result_list_w = []
-            for i in range(count):
-                subarray = lltype.cast_opaque_ptr(PARRAY, array[i].object)
-                assert subarray
-                j = 0
-                limit = len(subarray)
-                while j < limit:
-                    w_key = cast_gcref_to_instance(W_Root, subarray[j])
-                    w_value = cast_gcref_to_instance(W_Root, subarray[j + 1])
-                    result_list_w.append(space.newtuple([w_key, w_value]))
-                    j += 2
-        finally:
-            self.h.freelist(array)
+        result_list_w = []
+        for i in range(count):
+            subarray = lltype.cast_opaque_ptr(PARRAY, array[i].object)
+            assert subarray
+            j = 0
+            limit = len(subarray)
+            while j < limit:
+                w_key = cast_gcref_to_instance(W_Root, subarray[j])
+                w_value = cast_gcref_to_instance(W_Root, subarray[j + 1])
+                result_list_w.append(space.newtuple([w_key, w_value]))
+                j += 2
         return result_list_w
 
     def len_w(self, space):
diff --git a/pypy/module/pypystm/stmset.py b/pypy/module/pypystm/stmset.py
--- a/pypy/module/pypystm/stmset.py
+++ b/pypy/module/pypystm/stmset.py
@@ -96,28 +96,22 @@
 
     def get_length(self):
         array, count = self.h.list()
-        try:
-            total_length = 0
-            for i in range(count):
-                subarray = lltype.cast_opaque_ptr(PARRAY, array[i].object)
-                assert subarray
-                total_length += len(subarray)
-        finally:
-            self.h.freelist(array)
+        total_length = 0
+        for i in range(count):
+            subarray = lltype.cast_opaque_ptr(PARRAY, array[i].object)
+            assert subarray
+            total_length += len(subarray)
         return total_length
 
     def get_items_w(self):
         array, count = self.h.list()
-        try:
-            result_list_w = []
-            for i in range(count):
-                subarray = lltype.cast_opaque_ptr(PARRAY, array[i].object)
-                assert subarray
-                for j in range(len(subarray)):
-                    w_item = cast_gcref_to_instance(W_Root, subarray[j])
-                    result_list_w.append(w_item)
-        finally:
-            self.h.freelist(array)
+        result_list_w = []
+        for i in range(count):
+            subarray = lltype.cast_opaque_ptr(PARRAY, array[i].object)
+            assert subarray
+            for j in range(len(subarray)):
+                w_item = cast_gcref_to_instance(W_Root, subarray[j])
+                result_list_w.append(w_item)
         return result_list_w
 
     def len_w(self, space):
diff --git a/rpython/rlib/rstm.py b/rpython/rlib/rstm.py
--- a/rpython/rlib/rstm.py
+++ b/rpython/rlib/rstm.py
@@ -214,7 +214,7 @@
                                        ('index', lltype.Unsigned),
                                        ('object', llmemory.GCREF))
 _STM_HASHTABLE_ENTRY_P = lltype.Ptr(_STM_HASHTABLE_ENTRY)
-_STM_HASHTABLE_ENTRY_ARRAY = rffi.CArray(_STM_HASHTABLE_ENTRY_P)
+_STM_HASHTABLE_ENTRY_ARRAY = lltype.GcArray(_STM_HASHTABLE_ENTRY_P)
 
 @dont_look_inside
 def _ll_hashtable_get(h, key):
@@ -234,17 +234,14 @@
 def _ll_hashtable_list(h):
     upper_bound = llop.stm_hashtable_length_upper_bound(lltype.Signed,
                                                         h.ll_raw_hashtable)
-    array = lltype.malloc(_STM_HASHTABLE_ENTRY_ARRAY, upper_bound,
-                          flavor='raw')
+    array = lltype.malloc(_STM_HASHTABLE_ENTRY_ARRAY, upper_bound)
+    # 'array' is newly allocated, thus we don't need to do a manual
+    # write_barrier for stm as requested by stm_hashtable_list
     count = llop.stm_hashtable_list(lltype.Signed, h, h.ll_raw_hashtable,
-                                    array)
+                                    lltype.direct_arrayitems(array))
     return (array, count)
 
 @dont_look_inside
-def _ll_hashtable_freelist(h, array):
-    lltype.free(array, flavor='raw')
-
- at dont_look_inside
 def _ll_hashtable_lookup(h, key):
     return llop.stm_hashtable_lookup(_STM_HASHTABLE_ENTRY_P,
                                      h, h.ll_raw_hashtable, key)
@@ -261,7 +258,6 @@
                                            'set': _ll_hashtable_set,
                                            'len': _ll_hashtable_len,
                                           'list': _ll_hashtable_list,
-                                      'freelist': _ll_hashtable_freelist,
                                         'lookup': _ll_hashtable_lookup,
                                       'writeobj': _ll_hashtable_writeobj})
 NULL_HASHTABLE = lltype.nullptr(_HASHTABLE_OBJ)
@@ -332,9 +328,6 @@
             items.append("additional garbage for testing")
         return items, count
 
-    def freelist(self, array):
-        pass
-
     def lookup(self, key):
         assert type(key) is int
         return EntryObjectForTest(self, key)
diff --git a/rpython/translator/stm/funcgen.py b/rpython/translator/stm/funcgen.py
--- a/rpython/translator/stm/funcgen.py
+++ b/rpython/translator/stm/funcgen.py
@@ -360,7 +360,7 @@
     arg2 = funcgen.expr(op.args[2])
     result = funcgen.expr(op.result)
     return ('%s = stm_hashtable_list((object_t *)%s, %s, '
-            '(stm_hashtable_entry_t **)%s);' % (result, arg0, arg1, arg2))
+            '(stm_hashtable_entry_t * TLPREFIX*)(%s));' % (result, arg0, arg1, arg2))
 
 def stm_hashtable_tracefn(funcgen, op):
     arg0 = funcgen.expr(op.args[0])
diff --git a/rpython/translator/stm/src_stm/revision b/rpython/translator/stm/src_stm/revision
--- a/rpython/translator/stm/src_stm/revision
+++ b/rpython/translator/stm/src_stm/revision
@@ -1,1 +1,1 @@
-6666c6fd1aad
+a3cb98b78053
diff --git a/rpython/translator/stm/src_stm/stm/core.c b/rpython/translator/stm/src_stm/stm/core.c
--- a/rpython/translator/stm/src_stm/stm/core.c
+++ b/rpython/translator/stm/src_stm/stm/core.c
@@ -271,7 +271,7 @@
 /* ############# commit log ############# */
 
 
-void _dbg_print_commit_log()
+void _dbg_print_commit_log(void)
 {
     struct stm_commit_log_entry_s *cl = &commit_log_root;
 
diff --git a/rpython/translator/stm/src_stm/stm/finalizer.c b/rpython/translator/stm/src_stm/stm/finalizer.c
--- a/rpython/translator/stm/src_stm/stm/finalizer.c
+++ b/rpython/translator/stm/src_stm/stm/finalizer.c
@@ -309,7 +309,7 @@
 {
     assert(_finalization_state(obj) == 1);
     /* The call will add GCFLAG_VISITED recursively, thus bump state 1->2 */
-    mark_visit_possibly_new_object(obj, pseg);
+    mark_visit_possibly_overflow_object(obj, pseg);
 }
 
 static struct list_s *mark_finalize_step1(
@@ -431,7 +431,7 @@
     if (f != NULL && f->run_finalizers != NULL) {
         LIST_FOREACH_R(f->run_finalizers, object_t * /*item*/,
                        ({
-                           mark_visit_possibly_new_object(item, pseg);
+                           mark_visit_possibly_overflow_object(item, pseg);
                        }));
     }
 }
diff --git a/rpython/translator/stm/src_stm/stm/gcpage.c b/rpython/translator/stm/src_stm/stm/gcpage.c
--- a/rpython/translator/stm/src_stm/stm/gcpage.c
+++ b/rpython/translator/stm/src_stm/stm/gcpage.c
@@ -341,7 +341,7 @@
 }
 
 
-static void mark_visit_possibly_new_object(object_t *obj, struct stm_priv_segment_info_s *pseg)
+static void mark_visit_possibly_overflow_object(object_t *obj, struct stm_priv_segment_info_s *pseg)
 {
     /* if newly allocated object, we trace in segment_base, otherwise in
        the sharing seg0 */
@@ -464,7 +464,7 @@
         for (; modified < end; modified++) {
             if (modified->type == TYPE_POSITION_MARKER &&
                     modified->type2 != TYPE_MODIFIED_HASHTABLE)
-                mark_visit_possibly_new_object(modified->marker_object, pseg);
+                mark_visit_possibly_overflow_object(modified->marker_object, pseg);
         }
     }
 }
@@ -503,11 +503,11 @@
         struct stm_shadowentry_s *base = tl->shadowstack_base;
         while (current-- != base) {
             if ((((uintptr_t)current->ss) & 3) == 0) {
-                mark_visit_possibly_new_object(current->ss, pseg);
+                mark_visit_possibly_overflow_object(current->ss, pseg);
             }
         }
 
-        mark_visit_possibly_new_object(tl->thread_local_obj, pseg);
+        mark_visit_possibly_overflow_object(tl->thread_local_obj, pseg);
 
         tl = tl->next;
     } while (tl != stm_all_thread_locals);
@@ -517,7 +517,7 @@
     assert(get_priv_segment(0)->transaction_state == TS_NONE);
     for (i = 1; i < NB_SEGMENTS; i++) {
         if (get_priv_segment(i)->transaction_state != TS_NONE) {
-            mark_visit_possibly_new_object(
+            mark_visit_possibly_overflow_object(
                 get_priv_segment(i)->threadlocal_at_start_of_transaction,
                 get_priv_segment(i));
 
diff --git a/rpython/translator/stm/src_stm/stm/hashtable.c b/rpython/translator/stm/src_stm/stm/hashtable.c
--- a/rpython/translator/stm/src_stm/stm/hashtable.c
+++ b/rpython/translator/stm/src_stm/stm/hashtable.c
@@ -325,6 +325,9 @@
                 stm_allocate_preexisting(sizeof(stm_hashtable_entry_t),
                                          (char *)&initial.header);
             hashtable->additions++;
+            /* make sure .object is NULL in all segments before
+               "publishing" the entry in the hashtable: */
+            write_fence();
         }
         table->items[i] = entry;
         write_fence();     /* make sure 'table->items' is written here */
@@ -422,7 +425,7 @@
 }
 
 long stm_hashtable_list(object_t *hobj, stm_hashtable_t *hashtable,
-                        stm_hashtable_entry_t **results)
+                        stm_hashtable_entry_t * TLPREFIX *results)
 {
     /* Set the read marker.  It will be left as long as we're running
        the same transaction.
diff --git a/rpython/translator/stm/src_stm/stm/pages.c b/rpython/translator/stm/src_stm/stm/pages.c
--- a/rpython/translator/stm/src_stm/stm/pages.c
+++ b/rpython/translator/stm/src_stm/stm/pages.c
@@ -28,7 +28,7 @@
     uint64_t ta = __sync_add_and_fetch(&pages_ctl.total_allocated,
                                        add_or_remove);
 
-    if (ta >= pages_ctl.total_allocated_bound)
+    if (UNLIKELY(ta >= pages_ctl.total_allocated_bound))
         pages_ctl.major_collection_requested = true;
 
     return ta;
diff --git a/rpython/translator/stm/src_stm/stm/queue.c b/rpython/translator/stm/src_stm/stm/queue.c
--- a/rpython/translator/stm/src_stm/stm/queue.c
+++ b/rpython/translator/stm/src_stm/stm/queue.c
@@ -437,7 +437,7 @@
             queue_entry_t *entry = seg->added_in_this_transaction;
 
             while (entry != NULL) {
-                mark_visit_possibly_new_object(entry->object, pseg);
+                mark_visit_possibly_overflow_object(entry->object, pseg);
                 entry = entry->next;
             }
         } TREE_LOOP_END;
diff --git a/rpython/translator/stm/src_stm/stm/smallmalloc.c b/rpython/translator/stm/src_stm/stm/smallmalloc.c
--- a/rpython/translator/stm/src_stm/stm/smallmalloc.c
+++ b/rpython/translator/stm/src_stm/stm/smallmalloc.c
@@ -8,9 +8,9 @@
 
 typedef struct {
     uint8_t sz;
-} fpsz_t;
+} full_page_size_t;
 
-static fpsz_t full_pages_object_size[PAGE_SMSIZE_END - PAGE_SMSIZE_START];
+static full_page_size_t full_pages_object_size[PAGE_SMSIZE_END - PAGE_SMSIZE_START];
 /* ^^^ This array contains the size (in number of words) of the objects
    in the given page, provided it's a "full page of small objects".  It
    is 0 if it's not such a page, if it's fully free, or if it's in
@@ -19,7 +19,7 @@
    technically full yet, it will be very soon in this case).
 */
 
-static fpsz_t *get_fpsz(char *smallpage)
+static full_page_size_t *get_full_page_size(char *smallpage)
 {
     uintptr_t pagenum = (((char *)smallpage) - END_NURSERY_PAGE * 4096UL - stm_object_pages) / 4096;
     /* <= PAGE_SMSIZE_END because we may ask for it when there is no
@@ -118,7 +118,7 @@
 
         /* Succeeded: we have a page in 'smallpage' */
         *fl = smallpage->next;
-        get_fpsz((char *)smallpage)->sz = n;
+        get_full_page_size((char *)smallpage)->sz = n;
         return (char *)smallpage;
     }
 
@@ -126,12 +126,15 @@
        Maybe we can pick one from free_uniform_pages.
      */
     smallpage = free_uniform_pages;
-    if (smallpage != NULL) {
+    if (LIKELY(smallpage != NULL)) {
         if (UNLIKELY(!__sync_bool_compare_and_swap(&free_uniform_pages,
                                                    smallpage,
                                                    smallpage->nextpage)))
             goto retry;
 
+        /* got a new page: */
+        increment_total_allocated(4096);
+
         /* Succeeded: we have a page in 'smallpage', which is not
            initialized so far, apart from the 'nextpage' field read
            above.  Initialize it.
@@ -153,7 +156,7 @@
         *previous = NULL;
 
         /* The first slot is immediately returned */
-        get_fpsz((char *)smallpage)->sz = n;
+        get_full_page_size((char *)smallpage)->sz = n;
         return (char *)smallpage;
     }
 
@@ -174,8 +177,6 @@
 
     struct small_free_loc_s *result = *fl;
 
-    increment_total_allocated(size);
-
     if (UNLIKELY(result == NULL)) {
         char *addr = _allocate_small_slowpath(size);
         ((struct object_s*)addr)->stm_flags = 0;
@@ -270,7 +271,6 @@
         }
         else if (!_smallmalloc_sweep_keep(p)) {
             /* the location should be freed now */
-            increment_total_allocated(-szword*8);
 #ifdef STM_TESTS
             /* fill location with 0xdd in all segs except seg0 */
             int j;
@@ -300,6 +300,7 @@
             any_object_remaining = true;
         }
     }
+
     if (!any_object_remaining) {
         /* give page back to free_uniform_pages and thus make it
            inaccessible from all other segments again (except seg0) */
@@ -311,9 +312,14 @@
 
         ((struct small_free_loc_s *)baseptr)->nextpage = free_uniform_pages;
         free_uniform_pages = (struct small_free_loc_s *)baseptr;
+
+        /* gave the page back */
+        increment_total_allocated(-4096);
     }
     else if (!any_object_dying) {
-        get_fpsz(baseptr)->sz = szword;
+        /* this is still a full page. only in this case we set the
+           full_page_size again: */
+        get_full_page_size(baseptr)->sz = szword;
     }
     else {
         check_order_inside_small_page(page_free);
@@ -339,9 +345,9 @@
             if (*fl != NULL) {
                 /* the entry in full_pages_object_size[] should already be
                    szword.  We reset it to 0. */
-                fpsz_t *fpsz = get_fpsz((char *)*fl);
-                assert(fpsz->sz == szword);
-                fpsz->sz = 0;
+                full_page_size_t *full_page_size = get_full_page_size((char *)*fl);
+                assert(full_page_size->sz == szword);
+                full_page_size->sz = 0;
                 sweep_small_page(getbaseptr(*fl), *fl, szword);
                 *fl = NULL;
             }
@@ -351,7 +357,7 @@
         while (page != NULL) {
             /* for every page in small_page_lists: assert that the
                corresponding full_pages_object_size[] entry is 0 */
-            assert(get_fpsz((char *)page)->sz == 0);
+            assert(get_full_page_size((char *)page)->sz == 0);
             nextpage = page->nextpage;
             sweep_small_page(getbaseptr(page), page, szword);
             page = nextpage;
@@ -361,10 +367,10 @@
     /* process the really full pages, which are the ones which still
        have a non-zero full_pages_object_size[] entry */
     char *pageptr = uninitialized_page_stop;
-    fpsz_t *fpsz_start = get_fpsz(pageptr);
-    fpsz_t *fpsz_end = &full_pages_object_size[PAGE_SMSIZE_END -
-                                               PAGE_SMSIZE_START];
-    fpsz_t *fpsz;
+    full_page_size_t *fpsz_start = get_full_page_size(pageptr);
+    full_page_size_t *fpsz_end = &full_pages_object_size[PAGE_SMSIZE_END -
+                                                         PAGE_SMSIZE_START];
+    full_page_size_t *fpsz;
     for (fpsz = fpsz_start; fpsz < fpsz_end; fpsz++, pageptr += 4096) {
         uint8_t sz = fpsz->sz;
         if (sz != 0) {
diff --git a/rpython/translator/stm/src_stm/stmgc.h b/rpython/translator/stm/src_stm/stmgc.h
--- a/rpython/translator/stm/src_stm/stmgc.h
+++ b/rpython/translator/stm/src_stm/stmgc.h
@@ -733,8 +733,13 @@
 void stm_hashtable_write_entry(object_t *hobj, stm_hashtable_entry_t *entry,
                                object_t *nvalue);
 long stm_hashtable_length_upper_bound(stm_hashtable_t *);
+
+/* WARNING: stm_hashtable_list does not do a stm_write() on the 'results'
+   argument. 'results' may point inside an object. So if 'results' may be
+   a part of an old obj (which may have survived a minor GC), then make
+   sure to call stm_write() on the obj before calling this function. */
 long stm_hashtable_list(object_t *, stm_hashtable_t *,
-                        stm_hashtable_entry_t **results);
+                        stm_hashtable_entry_t * TLPREFIX *results);
 extern uint32_t stm_hashtable_entry_userdata;
 void stm_hashtable_tracefn(struct object_s *, stm_hashtable_t *,
                            void (object_t **));