[pypy-commit] stmgc gc-small-uniform: Get started

arigo noreply at buildbot.pypy.org
Tue Mar 18 08:40:22 CET 2014


Author: Armin Rigo <arigo at tunes.org>
Branch: gc-small-uniform
Changeset: r1060:f1e60e1cb9cf
Date: 2014-03-17 11:19 +0100
http://bitbucket.org/pypy/stmgc/changeset/f1e60e1cb9cf/

Log:	Get started

diff --git a/c7/stm/core.c b/c7/stm/core.c
--- a/c7/stm/core.c
+++ b/c7/stm/core.c
@@ -83,7 +83,7 @@
            (outside the nursery), then it fits into one page.  This is
            the common case. Otherwise, we need to compute it based on
            its location and size. */
-        if ((obj->stm_flags & GCFLAG_SMALL_UNIFORM) != 0) {
+        if (is_small_uniform(obj)) {
             page_privatize(first_page);
         }
         else {
@@ -272,16 +272,16 @@
     assert(_has_mutex_pages());
     assert(!_is_young(obj));
 
-    char *segment_base = get_segment_base(source_segment_num);
     uintptr_t start = (uintptr_t)obj;
     uintptr_t first_page = start / 4096UL;
-    struct object_s *realobj = (struct object_s *)
-        REAL_ADDRESS(segment_base, obj);
 
-    if (realobj->stm_flags & GCFLAG_SMALL_UNIFORM) {
+    if (is_small_uniform(obj)) {
         abort();//XXX WRITE THE FAST CASE
     }
     else {
+        char *segment_base = get_segment_base(source_segment_num);
+        struct object_s *realobj = (struct object_s *)
+            REAL_ADDRESS(segment_base, obj);
         ssize_t obj_size = stmcb_size_rounded_up(realobj);
         assert(obj_size >= 16);
         uintptr_t end = start + obj_size;
@@ -334,7 +334,7 @@
     uintptr_t start = (uintptr_t)obj;
     uintptr_t first_page = start / 4096UL;
 
-    if (obj->stm_flags & GCFLAG_SMALL_UNIFORM) {
+    if (is_small_uniform(obj)) {
         abort();//XXX WRITE THE FAST CASE
     }
     else {
@@ -488,6 +488,9 @@
     /* update 'overflow_number' if needed */
     if (STM_PSEGMENT->overflow_number_has_been_used) {
         highest_overflow_number += GCFLAG_OVERFLOW_NUMBER_bit0;
+        /* Note that the overflow number cannot be entirely 1 bits;
+           this prevents stm_flags from ever containing the value -1,
+           which might be confused with GCWORD_MOVED. */
         assert(highest_overflow_number !=        /* XXX else, overflow! */
                (uint32_t)-GCFLAG_OVERFLOW_NUMBER_bit0);
         STM_PSEGMENT->overflow_number = highest_overflow_number;
diff --git a/c7/stm/core.h b/c7/stm/core.h
--- a/c7/stm/core.h
+++ b/c7/stm/core.h
@@ -41,18 +41,13 @@
     */
     GCFLAG_WRITE_BARRIER = _STM_GCFLAG_WRITE_BARRIER,
 
-    /* This flag is set by gcpage.c for all objects living in
-       uniformly-sized pages of small objects.
-    */
-    GCFLAG_SMALL_UNIFORM = 0x02,
-
     /* The following flag is set on nursery objects of which we asked
        the id or the identityhash.  It means that a space of the size of
        the object has already been allocated in the nonmovable part.
        The same flag is abused to mark prebuilt objects whose hash has
        been taken during translation and is statically recorded just
        after the object. */
-    GCFLAG_HAS_SHADOW = 0x04,
+    GCFLAG_HAS_SHADOW = 0x2,
 
     /* All remaining bits of the 32-bit 'stm_flags' field are taken by
        the "overflow number".  This is a number that identifies the
@@ -61,7 +56,7 @@
        current transaction that have been flushed out of the nursery,
        which occurs if the same transaction allocates too many objects.
     */
-    GCFLAG_OVERFLOW_NUMBER_bit0 = 0x8   /* must be last */
+    GCFLAG_OVERFLOW_NUMBER_bit0 = 0x4   /* must be last */
 };
 
 
@@ -157,6 +152,9 @@
 #ifndef NDEBUG
     pthread_t running_pthread;
 #endif
+
+    /* This is for smallmalloc.c */
+    struct small_malloc_data_s small_malloc_data;
 };
 
 enum /* safe_point */ {
diff --git a/c7/stm/gcpage.c b/c7/stm/gcpage.c
--- a/c7/stm/gcpage.c
+++ b/c7/stm/gcpage.c
@@ -19,8 +19,6 @@
 
 static void teardown_gcpage(void)
 {
-    memset(small_alloc, 0, sizeof(small_alloc));
-    free_uniform_pages = NULL;
     LIST_FREE(testing_prebuilt_objs);
     if (tree_prebuilt_objs != NULL) {
         tree_free(tree_prebuilt_objs);
@@ -29,52 +27,11 @@
 }
 
 
-#define GCPAGE_NUM_PAGES   20
-
 static void setup_N_pages(char *pages_addr, uint64_t num)
 {
     pages_initialize_shared((pages_addr - stm_object_pages) / 4096UL, num);
 }
 
-static void grab_more_free_pages_for_small_allocations(void)
-{
-    /* grab N (= GCPAGE_NUM_PAGES) pages out of the top addresses */
-    uintptr_t decrease_by = GCPAGE_NUM_PAGES * 4096;
-    if (uninitialized_page_stop - uninitialized_page_start <= decrease_by)
-        goto out_of_memory;
-
-    uninitialized_page_stop -= decrease_by;
-
-    if (!_stm_largemalloc_resize_arena(uninitialized_page_stop -
-                                       uninitialized_page_start))
-        goto out_of_memory;
-
-    setup_N_pages(uninitialized_page_start, GCPAGE_NUM_PAGES);
-
-    char *p = uninitialized_page_start;
-    long i;
-    for (i = 0; i < 16; i++) {
-        *(char **)p = free_uniform_pages;
-        free_uniform_pages = p;
-    }
-    return;
-
- out_of_memory:
-    stm_fatalerror("out of memory!\n");   /* XXX */
-}
-
-static char *_allocate_small_slowpath(uint64_t size)
-{
-    /* not thread-safe!  Use only when holding the mutex */
-    assert(_has_mutex());
-
-    if (free_uniform_pages == NULL)
-        grab_more_free_pages_for_small_allocations();
-
-    abort();//...
-}
-
-
 static char *allocate_outside_nursery_large(uint64_t size)
 {
     /* thread-safe: use the lock of pages.c to prevent any remapping
@@ -195,14 +152,14 @@
 static uintptr_t object_last_page(object_t *obj)
 {
     uintptr_t lastbyte;
-    struct object_s *realobj =
-        (struct object_s *)REAL_ADDRESS(stm_object_pages, obj);
 
-    if (realobj->stm_flags & GCFLAG_SMALL_UNIFORM) {
+    if (is_small_uniform(obj)) {
         lastbyte = (uintptr_t)obj;
     }
     else {
         /* get the size of the object */
+        struct object_s *realobj =
+            (struct object_s *)REAL_ADDRESS(stm_object_pages, obj);
         size_t obj_size = stmcb_size_rounded_up(realobj);
 
         /* that's the last byte within the object */
diff --git a/c7/stm/gcpage.h b/c7/stm/gcpage.h
--- a/c7/stm/gcpage.h
+++ b/c7/stm/gcpage.h
@@ -1,19 +1,12 @@
 
-/* Outside the nursery, we are taking from the highest addresses
-   complete pages, one at a time, which uniformly contain objects of
-   size "8 * N" for some N in range(2, GC_N_SMALL_REQUESTS).  We are
-   taking from the lowest addresses "large" objects, which are at least
-   288 bytes long, allocated by largemalloc.c.  The limit is the same
-   as used in PyPy's default GC.
-*/
-
-#define GC_N_SMALL_REQUESTS    36
-
-/* More parameters fished directly from PyPy's default GC
+/* Some parameters fished directly from PyPy's default GC
    XXX document me */
 #define GC_MIN                 (NB_NURSERY_PAGES * 4096 * 8)
 #define GC_MAJOR_COLLECT       1.82
 
+/* Granularity when grabbing more unused pages: take 50 at a time */
+#define GCPAGE_NUM_PAGES   50
+
 /* re-share pages after major collections (1 or 0) */
 #define RESHARE_PAGES 1
 
@@ -23,16 +16,6 @@
 static char *uninitialized_page_stop;
 
 
-struct small_alloc_s {
-    char *next_object;   /* the next address we will return, or NULL */
-    char *range_last;    /* if equal to next_object: next_object starts with
-                            a next pointer; if greater: last item of a
-                            contiguous range of unallocated objs */
-};
-
-static struct small_alloc_s small_alloc[GC_N_SMALL_REQUESTS];
-static char *free_uniform_pages;
-
 static void setup_gcpage(void);
 static void teardown_gcpage(void);
 static char *allocate_outside_nursery_large(uint64_t size);
@@ -40,29 +23,4 @@
 static void major_collection_if_requested(void);
 static void major_collection_now_at_safe_point(void);
 static bool largemalloc_keep_object_at(char *data);   /* for largemalloc.c */
-
-
-static char *_allocate_small_slowpath(uint64_t size);
-
-static inline char *allocate_outside_nursery_small(uint64_t size)
-{
-    uint64_t index = size / 8;
-    OPT_ASSERT(2 <= index);
-    OPT_ASSERT(index < GC_N_SMALL_REQUESTS);
-
-    char *result = small_alloc[index].next_object;
-    if (result == NULL)
-        return _allocate_small_slowpath(size);
-
-    char *following;
-    if (small_alloc[index].range_last == result) {
-        following = ((char **)result)[0];
-        small_alloc[index].range_last = ((char **)result)[1];
-    }
-    else {
-        following = result + size;
-    }
-    small_alloc[index].next_object = following;
-
-    return result;
-}
+static void setup_N_pages(char *pages_addr, uint64_t num);
diff --git a/c7/stm/setup.c b/c7/stm/setup.c
--- a/c7/stm/setup.c
+++ b/c7/stm/setup.c
@@ -111,6 +111,7 @@
     teardown_sync();
     teardown_gcpage();
     teardown_nursery();
+    teardown_smallmalloc();
     teardown_pages();
 }
 
diff --git a/c7/stm/smallmalloc.c b/c7/stm/smallmalloc.c
new file mode 100644
--- /dev/null
+++ b/c7/stm/smallmalloc.c
@@ -0,0 +1,128 @@
+#ifndef _STM_CORE_H_
+# error "must be compiled via stmgc.c"
+#endif
+
+
+static void teardown_smallmalloc(void)
+{
+    memset(small_page_lists, 0, sizeof(small_page_lists));
+    assert(free_uniform_pages == NULL);
+    first_small_uniform_loc = (uintptr_t) -1;
+}
+
+static void grab_more_free_pages_for_small_allocations(void)
+{
+    /* Grab GCPAGE_NUM_PAGES pages out of the top addresses.  Use the
+       lock of pages.c to prevent any remapping from occurring under our
+       feet.
+    */
+    mutex_pages_lock();
+
+    if (free_uniform_pages == NULL) {
+
+        uintptr_t decrease_by = GCPAGE_NUM_PAGES * 4096;
+        if (uninitialized_page_stop - uninitialized_page_start < decrease_by)
+            goto out_of_memory;
+
+        uninitialized_page_stop -= decrease_by;
+        first_small_uniform_loc = (uintptr_t)uninitialized_page_stop;
+
+        char *base = stm_object_pages + END_NURSERY_PAGE * 4096UL;
+        if (!_stm_largemalloc_resize_arena(uninitialized_page_stop - base))
+            goto out_of_memory;
+
+        setup_N_pages(uninitialized_page_stop, GCPAGE_NUM_PAGES);
+
+        char *p = uninitialized_page_stop;
+        long i;
+        for (i = 0; i < GCPAGE_NUM_PAGES; i++) {
+            ((struct small_page_list_s *)p)->nextpage = free_uniform_pages;
+            free_uniform_pages = (struct small_page_list_s *)p;
+            p += 4096;
+        }
+    }
+
+    mutex_pages_unlock();
+    return;
+
+ out_of_memory:
+    stm_fatalerror("out of memory!\n");   /* XXX */
+}
+
+static char *_allocate_small_slowpath(uint64_t size)
+{
+    /* First try to grab the next page from the global 'small_page_list'
+     */
+    long n = size / 8;
+    struct small_page_list_s *smallpage;
+    struct small_free_loc_s *TLPREFIX *fl =
+        &STM_PSEGMENT->small_malloc_data.loc_free[n];
+    assert(*fl == NULL);
+
+ retry:
+    smallpage = small_page_lists[n];
+    if (smallpage != NULL) {
+        if (UNLIKELY(!__sync_bool_compare_and_swap(&small_page_lists[n],
+                                                   smallpage,
+                                                   smallpage->nextpage)))
+            goto retry;
+
+        /* Succeeded: we have a page in 'smallpage' */
+        *fl = smallpage->header.next;
+        return (char *)smallpage;
+    }
+
+    /* There is no more page waiting.  Maybe we can pick one from
+       free_uniform_pages.
+     */
+    smallpage = free_uniform_pages;
+    if (smallpage != NULL) {
+        if (UNLIKELY(!__sync_bool_compare_and_swap(&free_uniform_pages,
+                                                   smallpage,
+                                                   smallpage->nextpage)))
+            goto retry;
+
+        /* Succeeded: we have a page in 'smallpage', which is not
+           initialized so far, apart from the 'nextpage' field read
+           above.  Initialize it.
+        */
+        assert(!(((uintptr_t)smallpage) & 4095));
+        struct small_free_loc_s *p, *following = NULL;
+
+        /* Initialize all slots from the second one to the last one to
+           contain a chained list */
+        uintptr_t i = size;
+        while (i <= 4096 - size) {
+            p = (struct small_free_loc_s *)(((char *)smallpage) + i);
+            p->next = following;
+            following = p;
+            i += size;
+        }
+
+        /* The first slot is immediately returned */
+        *fl = following;
+        return (char *)smallpage;
+    }
+
+    /* Not a single free page left.  Grab some more free pges and retry. */
+    grab_more_free_pages_for_small_allocations();
+    goto retry;
+}
+
+__attribute__((always_inline))
+static inline char *allocate_outside_nursery_small(uint64_t size)
+{
+    OPT_ASSERT((size & 7) == 0);
+    OPT_ASSERT(16 <= size && size < 8 * GC_N_SMALL_REQUESTS);
+
+    struct small_free_loc_s *TLPREFIX *fl =
+        &STM_PSEGMENT->small_malloc_data.loc_free[size / 8];
+
+    struct small_free_loc_s *result = *fl;
+
+    if (UNLIKELY(result == NULL))
+        return _allocate_small_slowpath(size);
+
+    *fl = result->next;
+    return (char *)result;
+}
diff --git a/c7/stm/smallmalloc.h b/c7/stm/smallmalloc.h
new file mode 100644
--- /dev/null
+++ b/c7/stm/smallmalloc.h
@@ -0,0 +1,65 @@
+
+/* Outside the nursery, we are taking from the highest addresses
+   complete pages, one at a time, which uniformly contain objects of
+   size "8 * N" for some N in range(2, GC_N_SMALL_REQUESTS).  We are
+   taking from the lowest addresses "large" objects, which are at least
+   288 bytes long, allocated by largemalloc.c.  The limit is the same
+   as used in PyPy's default GC.
+*/
+
+#define GC_N_SMALL_REQUESTS    36
+
+
+struct small_free_loc_s {
+    struct small_free_loc_s *next;
+};
+
+struct small_page_list_s {
+    /* A chained list of locations within the same page which are
+       free. */
+    struct small_free_loc_s header;
+
+    /* A chained list of all small pages containing objects of
+       a given small size, and that have at least one free object. */
+    struct small_page_list_s *nextpage;
+
+    /* This structure is only two words, so it always fits inside one
+       free slot inside the page. */
+};
+
+
+/* For every size from 16 bytes to 8*(GC_N_SMALL_REQUESTS-1), this is
+   a list of pages that contain objects of that size and have at least
+   one free location.  Additionally, the item 0 in the following list
+   is a chained list of fully-free pages (which can be reused for a
+   different size than the one they originally contained).
+*/
+static struct small_page_list_s *small_page_lists[GC_N_SMALL_REQUESTS];
+
+#define free_uniform_pages   (small_page_lists[0])
+
+
+/* For is_small_uniform(). */
+static uintptr_t first_small_uniform_loc = (uintptr_t) -1;
+
+
+/* This is a definition for 'STM_PSEGMENT->small_malloc_data'.  Each
+   segment grabs one page at a time from the global list, and then
+   requests for data are answered locally.
+*/
+struct small_malloc_data_s {
+    struct small_free_loc_s *loc_free[GC_N_SMALL_REQUESTS];
+};
+
+
+/* Functions
+ */
+static inline char *allocate_outside_nursery_small(uint64_t size)
+     __attribute__((always_inline));
+
+static char *_allocate_small_slowpath(uint64_t size);
+static void teardown_smallmalloc(void);
+
+static inline bool is_small_uniform(object_t *obj) {
+    return ((uintptr_t)obj) >= first_small_uniform_loc;
+}
diff --git a/c7/stmgc.c b/c7/stmgc.c
--- a/c7/stmgc.c
+++ b/c7/stmgc.c
@@ -2,6 +2,7 @@
 #include "stmgc.h"
 #include "stm/atomic.h"
 #include "stm/list.h"
+#include "stm/smallmalloc.h"
 #include "stm/core.h"
 #include "stm/pagecopy.h"
 #include "stm/pages.h"
@@ -21,6 +22,7 @@
 #include "stm/prebuilt.c"
 #include "stm/gcpage.c"
 #include "stm/largemalloc.c"
+#include "stm/smallmalloc.c"
 #include "stm/nursery.c"
 #include "stm/sync.c"
 #include "stm/setup.c"


More information about the pypy-commit mailing list