[pypy-commit] stmgc c7: re-introduce the alloc-pages for small-sized allocations (3x faster in simple duhton bench).
Remi Meier
noreply at buildbot.pypy.org
Mon Jan 27 13:54:59 CET 2014
Author: Remi Meier
Branch: c7
Changeset: r677:8e7c804b4bdc
Date: 2014-01-27 13:55 +0100
http://bitbucket.org/pypy/stmgc/changeset/8e7c804b4bdc/
Log: re-introduce the alloc-pages for small-sized allocations (3x faster
in simple duhton bench).
diff --git a/c7/core.c b/c7/core.c
--- a/c7/core.c
+++ b/c7/core.c
@@ -100,10 +100,15 @@
/* privatize if SHARED_PAGE */
uintptr_t pagenum2, pages;
- _stm_chunk_pages((struct object_s*)REAL_ADDRESS(get_thread_base(0), obj),
- &pagenum2, &pages);
- assert(pagenum == pagenum2);
- assert(pages == (stmcb_size(real_address(obj)) +4095) / 4096);
+ if (obj->stm_flags & GCFLAG_SMALL) {
+ pagenum2 = pagenum;
+ pages = 1;
+ } else {
+ _stm_chunk_pages((struct object_s*)REAL_ADDRESS(get_thread_base(0), obj),
+ &pagenum2, &pages);
+ assert(pagenum == pagenum2);
+ assert(pages == (stmcb_size(real_address(obj)) +4095) / 4096);
+ }
for (pagenum2 += pages - 1; pagenum2 >= pagenum; pagenum2--)
stm_pages_privatize(pagenum2);
diff --git a/c7/core.h b/c7/core.h
--- a/c7/core.h
+++ b/c7/core.h
@@ -37,6 +37,9 @@
/* only used during collections to mark an obj as moved out of the
generation it was in */
GCFLAG_MOVED = (1 << 2),
+ /* objects smaller than one page and even smaller than
+ LARGE_OBJECT_WORDS * 8 bytes */
+ GCFLAG_SMALL = (1 << 3),
};
@@ -47,6 +50,7 @@
typedef TLPREFIX struct _thread_local1_s _thread_local1_t;
typedef TLPREFIX struct object_s object_t;
+typedef TLPREFIX struct alloc_for_size_s alloc_for_size_t;
typedef TLPREFIX struct read_marker_s read_marker_t;
typedef TLPREFIX char localchar_t;
typedef void* jmpbufptr_t[5]; /* for use with __builtin_setjmp() */
@@ -79,6 +83,11 @@
uint8_t rm;
};
+struct alloc_for_size_s {
+ localchar_t *next;
+ uint16_t start, stop;
+ bool flag_partial_page;
+};
struct _thread_local1_s {
jmpbufptr_t *jmpbufptr;
@@ -94,6 +103,7 @@
object_t **shadow_stack;
object_t **shadow_stack_base;
+ struct alloc_for_size_s alloc[LARGE_OBJECT_WORDS];
struct stm_list_s *uncommitted_objects;
localchar_t *nursery_current;
diff --git a/c7/largemalloc.c b/c7/largemalloc.c
--- a/c7/largemalloc.c
+++ b/c7/largemalloc.c
@@ -105,6 +105,9 @@
size_t _stm_data_size(struct object_s *data)
{
+ if (data->stm_flags & GCFLAG_SMALL)
+ return stmcb_size(data); /* XXX: inefficient */
+
mchunk_t *chunk = data2chunk((char*)data);
return chunk->size & ~FLAG_SORTED;
}
@@ -120,7 +123,13 @@
char *end = src + _stm_data_size((struct object_s*)REAL_ADDRESS(get_thread_base(0), obj));
uintptr_t pagenum, num;
struct object_s *t0_obj = (struct object_s*)REAL_ADDRESS(get_thread_base(0), _stm_tl_address(src));
- _stm_chunk_pages(t0_obj, &pagenum, &num);
+
+ if (obj->stm_flags & GCFLAG_SMALL) {
+ pagenum = (uintptr_t)obj / 4096UL;
+ num = 1;
+ } else {
+ _stm_chunk_pages(t0_obj, &pagenum, &num);
+ }
while (src < end) {
size_t to_copy = 4096UL - ((uintptr_t)src & 4095UL);
@@ -299,6 +308,8 @@
void stm_large_free(object_t *tldata)
{
+ assert(!(tldata->stm_flags & GCFLAG_SMALL));
+
while (__sync_lock_test_and_set(&alloc_lock, 1))
spin_loop();
diff --git a/c7/nursery.c b/c7/nursery.c
--- a/c7/nursery.c
+++ b/c7/nursery.c
@@ -44,6 +44,58 @@
return _stm_allocate_old(size); /* XXX */
}
+localchar_t *_stm_alloc_next_page(size_t size_class)
+{
+ /* may return uninitialized pages */
+
+ /* 'alloc->next' points to where the next allocation should go. The
+ present function is called instead when this next allocation is
+ equal to 'alloc->stop'. As we know that 'start', 'next' and
+ 'stop' are always nearby pointers, we play tricks and only store
+ the lower 16 bits of 'start' and 'stop', so that the three
+ variables plus some flags fit in 16 bytes.
+ */
+ uintptr_t page;
+ localchar_t *result;
+ alloc_for_size_t *alloc = &_STM_TL->alloc[size_class];
+ size_t size = size_class * 8;
+
+ /* reserve a fresh new page (XXX: from the end!) */
+ page = stm_pages_reserve(1);
+
+ result = (localchar_t *)(page * 4096UL);
+ alloc->start = (uintptr_t)result;
+ alloc->stop = alloc->start + (4096 / size) * size;
+ alloc->next = result + size;
+ alloc->flag_partial_page = false;
+ return result;
+}
+
+object_t *stm_big_small_alloc_old(size_t size, bool *is_small)
+{
+ /* may return uninitialized objects */
+ object_t *result;
+ size_t size_class = size / 8;
+ assert(size_class >= 2);
+
+ if (size_class >= LARGE_OBJECT_WORDS) {
+ result = stm_large_malloc(size);
+ *is_small = 0;
+ } else {
+ *is_small = 1;
+ alloc_for_size_t *alloc = &_STM_TL->alloc[size_class];
+
+ if ((uint16_t)((uintptr_t)alloc->next) == alloc->stop) {
+ result = (object_t *)_stm_alloc_next_page(size_class);
+ } else {
+ result = (object_t *)alloc->next;
+ alloc->next += size;
+ }
+ }
+ return result;
+}
+
+
void trace_if_young(object_t **pobj)
{
@@ -62,7 +114,8 @@
/* move obj to somewhere else */
size_t size = stmcb_size(real_address(*pobj));
- object_t *moved = stm_large_malloc(size);
+ bool is_small;
+ object_t *moved = stm_big_small_alloc_old(size, &is_small);
memcpy((void*)real_address(moved),
(void*)real_address(*pobj),
@@ -70,6 +123,8 @@
/* object is not committed yet */
moved->stm_flags |= GCFLAG_NOT_COMMITTED;
+ if (is_small) /* means, not allocated by large-malloc */
+ moved->stm_flags |= GCFLAG_SMALL;
LIST_APPEND(_STM_TL->uncommitted_objects, moved);
(*pobj)->stm_flags |= GCFLAG_MOVED;
@@ -189,6 +244,21 @@
/* uncommitted objects */
push_uncommitted_to_other_threads();
stm_list_clear(_STM_TL->uncommitted_objects);
+
+ /* for small alloc classes, set the partial flag */
+ long j;
+ for (j = 2; j < LARGE_OBJECT_WORDS; j++) {
+ alloc_for_size_t *alloc = &_STM_TL->alloc[j];
+ uint16_t start = alloc->start;
+ uint16_t cur = (uintptr_t)alloc->next;
+
+ if (start == cur)
+ continue; /* page full -> will be replaced automatically */
+
+ alloc->start = cur; /* next transaction has different 'start' to
+ reset in case of an abort */
+ alloc->flag_partial_page = 1;
+ }
}
void nursery_on_abort()
@@ -205,13 +275,26 @@
_STM_TL->nursery_current = nursery_base;
+ /* reset the alloc-pages to the state at the start of the transaction */
+ long j;
+ for (j = 2; j < LARGE_OBJECT_WORDS; j++) {
+ alloc_for_size_t *alloc = &_STM_TL->alloc[j];
+ uint16_t num_allocated = ((uintptr_t)alloc->next) - alloc->start;
+
+ if (num_allocated) {
+ /* forget about all non-committed objects */
+ alloc->next -= num_allocated;
+ }
+ }
+
/* free uncommitted objects */
struct stm_list_s *uncommitted = _STM_TL->uncommitted_objects;
STM_LIST_FOREACH(
uncommitted,
({
- stm_large_free(item);
+ if (!(item->stm_flags & GCFLAG_SMALL))
+ stm_large_free(item);
}));
stm_list_clear(uncommitted);
More information about the pypy-commit
mailing list