[pypy-commit] pypy quad-color-gc: QCGC update

Thu Sep 15 17:10:55 EDT 2016

Author: Nicolas Truessel <ntruessel at njsm.de>
Branch: quad-color-gc
Changeset: r87127:a870d33012f1
Date: 2016-09-15 18:45 +0200
http://bitbucket.org/pypy/pypy/changeset/a870d33012f1/

Log:	QCGC update

diff --git a/rpython/translator/c/src/qcgc/config.h b/rpython/translator/c/src/qcgc/config.h
--- a/rpython/translator/c/src/qcgc/config.h
+++ b/rpython/translator/c/src/qcgc/config.h
@@ -10,8 +10,8 @@
  */
 #define EVENT_LOG 1							// Enable event log
 #define LOGFILE "./qcgc_events.log"			// Default logfile
-#define LOG_ALLOCATION 0					// Enable allocation log (warning:
-											// significant performance impact)
+#define LOG_ALLOCATION 1					// Enable allocation log
+#define LOG_DUMP_FREELIST_STATS 1			// Dump freelist stats
 
 #define QCGC_SHADOWSTACK_SIZE 163840		// Total shadowstack size
 #define QCGC_ARENA_BAG_INIT_SIZE 16			// Initial size of the arena bag
@@ -31,7 +31,7 @@
 /**
  * Auto Mark/Collect
  */
-#define QCGC_INCMARK_THRESHOLD (1<<QCGC_ARENA_SIZE_EXP)
+#define QCGC_INCMARK_THRESHOLD (1<<(QCGC_ARENA_SIZE_EXP-4))
 #define QCGC_INCMARK_TO_SWEEP 5
 
 /**
diff --git a/rpython/translator/c/src/qcgc/qcgc.c b/rpython/translator/c/src/qcgc/qcgc.c
--- a/rpython/translator/c/src/qcgc/qcgc.c
+++ b/rpython/translator/c/src/qcgc/qcgc.c
@@ -34,7 +34,8 @@
 	qcgc_state.gp_gray_stack = qcgc_gray_stack_create(16); // XXX
 	qcgc_state.gray_stack_size = 0;
 	qcgc_state.phase = GC_PAUSE;
-	qcgc_state.bytes_since_incmark = 0;
+	qcgc_state.cells_since_incmark = 0;
+	qcgc_state.cells_since_collect = 0;
 	qcgc_state.incmark_since_sweep = 0;
 	qcgc_state.free_cells = 0;
 	qcgc_state.largest_free_block = 0;
@@ -62,12 +63,13 @@
 
 object_t *qcgc_allocate(size_t size) {
 #if LOG_ALLOCATION
-	qcgc_event_logger_log(EVENT_ALLOCATE_START, sizeof(size_t),
-			(uint8_t *) &size);
+	size_t cells = bytes_to_cells(size);
+	qcgc_event_logger_log(EVENT_ALLOCATE, sizeof(size_t),
+			(uint8_t *) &cells);
 #endif
 	object_t *result;
 
-	if (UNLIKELY(qcgc_state.bytes_since_incmark >
+	if (UNLIKELY(qcgc_state.cells_since_incmark >
 				qcgc_state.incmark_threshold)) {
 		if (qcgc_state.incmark_since_sweep == qcgc_state.incmark_to_sweep) {
 			qcgc_collect();
@@ -75,13 +77,11 @@
 			qcgc_incmark();
 			qcgc_state.incmark_since_sweep++;
 		}
-		
 	}
 
 	if (LIKELY(size <= 1<<QCGC_LARGE_ALLOC_THRESHOLD_EXP)) {
 		// Use bump / fit allocator
-		//if (qcgc_allocator_state.use_bump_allocator) {
-		if (false) {
+		if (qcgc_allocator_state.use_bump_allocator) {
 			result = bump_allocate(size);
 		} else {
 			result = qcgc_fit_allocate(size);
@@ -91,19 +91,13 @@
 				result = bump_allocate(size);
 			}
 		}
+		qcgc_state.free_cells -= bytes_to_cells(size);
 	} else {
 		// Use huge block allocator
 		result = qcgc_large_allocate(size);
 	}
-
-	// XXX: Should we use cells instead of bytes?
-	qcgc_state.bytes_since_incmark += size;
-
-
-#if LOG_ALLOCATION
-	qcgc_event_logger_log(EVENT_ALLOCATE_DONE, sizeof(object_t *),
-			(uint8_t *) &result);
-#endif
+	qcgc_state.cells_since_incmark += bytes_to_cells(size);
+	qcgc_state.cells_since_collect += bytes_to_cells(size);
 	return result;
 }
 
diff --git a/rpython/translator/c/src/qcgc/src/allocator.c b/rpython/translator/c/src/qcgc/src/allocator.c
--- a/rpython/translator/c/src/qcgc/src/allocator.c
+++ b/rpython/translator/c/src/qcgc/src/allocator.c
@@ -2,9 +2,7 @@
 
 #include <assert.h>
 #include <stdbool.h>
-#include <stdlib.h>
-
-#include "hugeblocktable.h"
+#include "gc_state.h"
 
 QCGC_STATIC QCGC_INLINE void bump_allocator_assign(cell_t *ptr, size_t cells);
 
@@ -65,25 +63,6 @@
 	free(qcgc_allocator_state.free_arenas);
 }
 
-void qcgc_fit_allocator_add(cell_t *ptr, size_t cells) {
-#if CHECKED
-	assert(cells > 0);
-#endif
-	if (is_small(cells)) {
-		size_t index = small_index(cells);
-		qcgc_allocator_state.fit_state.small_free_list[index] =
-			qcgc_linear_free_list_add(
-					qcgc_allocator_state.fit_state.small_free_list[index],
-					ptr);
-	} else {
-		size_t index = large_index(cells);
-		qcgc_allocator_state.fit_state.large_free_list[index] =
-			qcgc_exp_free_list_add(
-					qcgc_allocator_state.fit_state.large_free_list[index],
-					(struct exp_free_list_item_s) {ptr, cells});
-	}
-}
-
 /*******************************************************************************
  * Bump Allocator                                                              *
  ******************************************************************************/
@@ -124,6 +103,8 @@
 	} else {
 		// Grab a new arena
 		arena_t *arena = qcgc_arena_create();
+		qcgc_state.free_cells += QCGC_ARENA_CELLS_COUNT -
+			QCGC_ARENA_FIRST_CELL_INDEX;
 		bump_allocator_assign(&(arena->cells[QCGC_ARENA_FIRST_CELL_INDEX]),
 				QCGC_ARENA_CELLS_COUNT - QCGC_ARENA_FIRST_CELL_INDEX);
 		qcgc_allocator_state.arenas =
@@ -163,6 +144,10 @@
 	qcgc_allocator_state.bump_state.remaining_cells = cells;
 }
 
+/*******************************************************************************
+ * Fit Allocator                                                               *
+ ******************************************************************************/
+
 object_t *qcgc_fit_allocate(size_t bytes) {
 #if CHECKED
 	//free_list_consistency_check();
@@ -192,23 +177,6 @@
 	return result;
 }
 
-/**
- * Constraints:
- * - Zero initialized
- * - Aligned to arena size
- * - Multiple of arena size
- * - No header, metadata stored in hash-map
- */
-object_t *qcgc_large_allocate(size_t bytes) {
-	object_t *result = aligned_alloc(QCGC_ARENA_SIZE, bytes);
-#if QCGC_INIT_ZERO
-	memset(result, 0, bytes);
-#endif
-	qcgc_hbtable_insert(result);
-	result->flags = QCGC_GRAY_FLAG;
-	return result;
-}
-
 void qcgc_fit_allocator_empty_lists(void) {
 	for (size_t i = 0; i < QCGC_SMALL_FREE_LISTS; i++) {
 		qcgc_allocator_state.fit_state.small_free_list[i]->count = 0;
@@ -219,6 +187,25 @@
 	}
 }
 
+void qcgc_fit_allocator_add(cell_t *ptr, size_t cells) {
+#if CHECKED
+	assert(cells > 0);
+#endif
+	if (is_small(cells)) {
+		size_t index = small_index(cells);
+		qcgc_allocator_state.fit_state.small_free_list[index] =
+			qcgc_linear_free_list_add(
+					qcgc_allocator_state.fit_state.small_free_list[index],
+					ptr);
+	} else {
+		size_t index = large_index(cells);
+		qcgc_allocator_state.fit_state.large_free_list[index] =
+			qcgc_exp_free_list_add(
+					qcgc_allocator_state.fit_state.large_free_list[index],
+					(struct exp_free_list_item_s) {ptr, cells});
+	}
+}
+
 QCGC_STATIC cell_t *fit_allocator_small_first_fit(size_t index, size_t cells) {
 #if CHECKED
 	assert(small_index_to_cells(index) >= cells);
@@ -321,18 +308,18 @@
 	return NULL;
 }
 
-QCGC_STATIC bool is_small(size_t cells) {
+QCGC_STATIC QCGC_INLINE bool is_small(size_t cells) {
 	return cells <= QCGC_SMALL_FREE_LISTS;
 }
 
-QCGC_STATIC size_t small_index(size_t cells) {
+QCGC_STATIC QCGC_INLINE size_t small_index(size_t cells) {
 #if CHECKED
 	assert(is_small(cells));
 #endif
 	return cells - 1;
 }
 
-QCGC_STATIC size_t large_index(size_t cells) {
+QCGC_STATIC QCGC_INLINE size_t large_index(size_t cells) {
 #if CHECKED
 	assert(!is_small(cells));
 #endif
@@ -341,10 +328,11 @@
 	cells = cells >> QCGC_LARGE_FREE_LIST_FIRST_EXP;
 
 	// calculates floor(log(cells))
-	return MIN((8 * sizeof(unsigned long)) - __builtin_clzl(cells) - 1, QCGC_LARGE_FREE_LISTS - 1);
+	return MIN((8 * sizeof(unsigned long)) - __builtin_clzl(cells) - 1,
+			QCGC_LARGE_FREE_LISTS - 1);
 }
 
-QCGC_STATIC size_t small_index_to_cells(size_t index) {
+QCGC_STATIC QCGC_INLINE size_t small_index_to_cells(size_t index) {
 #if CHECKED
 	assert(index < QCGC_SMALL_FREE_LISTS);
 #endif
diff --git a/rpython/translator/c/src/qcgc/src/allocator.h b/rpython/translator/c/src/qcgc/src/allocator.h
--- a/rpython/translator/c/src/qcgc/src/allocator.h
+++ b/rpython/translator/c/src/qcgc/src/allocator.h
@@ -3,9 +3,11 @@
 #include "../qcgc.h"
 
 #include <string.h>
+#include <stdlib.h>
 
 #include "arena.h"
 #include "bag.h"
+#include "hugeblocktable.h"
 
 /**
  * Free lists:
@@ -71,15 +73,6 @@
 object_t *qcgc_fit_allocate(size_t bytes);
 
 /**
- * Allocate new memory region using huge block allocator
- *
- * @param	bytes	Desired size of the memory region in bytes
- * @return	Pointer to memory large enough to hold size bytes, NULL in case of
- *			errors, already zero initialized if QCGC_INIT_ZERO is set
- */
-object_t *qcgc_large_allocate(size_t bytes);
-
-/**
  * Empty all free lists (used before sweep)
  */
 void qcgc_fit_allocator_empty_lists(void);
@@ -131,3 +124,20 @@
 	result->flags = QCGC_GRAY_FLAG;
 	return result;
 }
+
+/**
+ * Allocate new memory region using huge block allocator
+ *
+ * @param	bytes	Desired size of the memory region in bytes
+ * @return	Pointer to memory large enough to hold size bytes, NULL in case of
+ *			errors, already zero initialized if QCGC_INIT_ZERO is set
+ */
+QCGC_STATIC QCGC_INLINE object_t *qcgc_large_allocate(size_t bytes) {
+	object_t *result = aligned_alloc(QCGC_ARENA_SIZE, bytes);
+#if QCGC_INIT_ZERO
+	memset(result, 0, bytes);
+#endif
+	qcgc_hbtable_insert(result);
+	result->flags = QCGC_GRAY_FLAG;
+	return result;
+}
diff --git a/rpython/translator/c/src/qcgc/src/collector.c b/rpython/translator/c/src/qcgc/src/collector.c
--- a/rpython/translator/c/src/qcgc/src/collector.c
+++ b/rpython/translator/c/src/qcgc/src/collector.c
@@ -14,7 +14,7 @@
 
 void qcgc_mark(void) {
 	mark_setup(false);
-	
+
 	while (qcgc_state.gray_stack_size > 0) {
 		// General purpose gray stack (prebuilt objects and huge blocks)
 
@@ -94,7 +94,7 @@
 				(uint8_t *) &log_info);
 	}
 
-	qcgc_state.bytes_since_incmark = 0;
+	qcgc_state.cells_since_incmark = 0;
 
 	if (qcgc_state.phase == GC_PAUSE) {
 
@@ -186,10 +186,16 @@
 	assert(qcgc_state.phase == GC_COLLECT);
 #endif
 	{
-		unsigned long arena_count;
-		arena_count = qcgc_allocator_state.arenas->count;
-		qcgc_event_logger_log(EVENT_SWEEP_START, sizeof(arena_count),
-				(uint8_t *) &arena_count);
+		struct log_info_s {
+			size_t arenas;
+			size_t free_cells;
+		};
+		struct log_info_s log_info = {
+			qcgc_allocator_state.arenas->count,
+			qcgc_state.free_cells,
+		};
+		qcgc_event_logger_log(EVENT_SWEEP_START, sizeof(struct log_info_s),
+				(uint8_t *) &log_info);
 	}
 
 	qcgc_hbtable_sweep();
@@ -218,22 +224,75 @@
 	// Determine whether fragmentation is too high
 	// Fragmenation = 1 - (largest block / total free space)
 	// Use bump allocator when fragmentation < 50%
+#if CHECKED
+	size_t free_cells = 0;
+	size_t largest_free_block = 0;
+		for (size_t i = 0; i < QCGC_SMALL_FREE_LISTS; i++) {
+			if (qcgc_allocator_state.fit_state.small_free_list[i]->count > 0) {
+				largest_free_block = i + 1;
+			}
+			free_cells += qcgc_allocator_state.fit_state.small_free_list[i]
+				->count * (i + 1);
+		}
+		for (size_t i = 0; i < QCGC_LARGE_FREE_LISTS; i++) {
+			for (size_t j = 0; j < qcgc_allocator_state.fit_state.
+					large_free_list[i]->count; j++) {
+				free_cells += qcgc_allocator_state.fit_state.large_free_list[i]
+					->items[j].size;
+				largest_free_block = MAX(largest_free_block,
+					qcgc_allocator_state.fit_state.large_free_list[i]
+					->items[j].size);
+			}
+		}
+	assert(free_cells == qcgc_state.free_cells);
+	assert(largest_free_block == qcgc_state.largest_free_block);
+	assert(free_cells <= qcgc_allocator_state.arenas->count * (QCGC_ARENA_CELLS_COUNT - QCGC_ARENA_FIRST_CELL_INDEX));
+	assert(qcgc_allocator_state.arenas->count *
+			(QCGC_ARENA_CELLS_COUNT - QCGC_ARENA_FIRST_CELL_INDEX) >=
+			free_cells);
+#endif
 	qcgc_allocator_state.use_bump_allocator = qcgc_state.free_cells <
 		2 * qcgc_state.largest_free_block;
 
 	update_weakrefs();
 
+	qcgc_state.free_cells += qcgc_allocator_state.bump_state.remaining_cells;
 	{
 		struct log_info_s {
+			size_t arenas;
 			size_t free_cells;
 			size_t largest_free_block;
 		};
 		struct log_info_s log_info = {
+			qcgc_allocator_state.arenas->count,
 			qcgc_state.free_cells,
 			qcgc_state.largest_free_block
 		};
 		qcgc_event_logger_log(EVENT_SWEEP_DONE, sizeof(struct log_info_s),
 				(uint8_t *) &log_info);
 	}
+#if LOG_DUMP_FREELIST_STATS
+	{
+		struct log_info_s {
+			size_t class;
+			size_t items;
+		};
+		struct log_info_s log_info;
+
+		for (size_t i = 0; i < QCGC_SMALL_FREE_LISTS; i++) {
+			log_info = (struct log_info_s){ i + 1,
+				qcgc_allocator_state.fit_state.small_free_list[i]->count};
+			qcgc_event_logger_log(EVENT_FREELIST_DUMP,
+					sizeof(struct log_info_s), (uint8_t *) &log_info);
+		}
+		for (size_t i = 0; i < QCGC_LARGE_FREE_LISTS; i++) {
+			log_info = (struct log_info_s){
+				1<<(QCGC_LARGE_FREE_LIST_FIRST_EXP + i),
+				qcgc_allocator_state.fit_state.large_free_list[i]->count};
+			qcgc_event_logger_log(EVENT_FREELIST_DUMP,
+					sizeof(struct log_info_s), (uint8_t *) &log_info);
+		}
+	}
+#endif
 }
 
diff --git a/rpython/translator/c/src/qcgc/src/event_logger.h b/rpython/translator/c/src/qcgc/src/event_logger.h
--- a/rpython/translator/c/src/qcgc/src/event_logger.h
+++ b/rpython/translator/c/src/qcgc/src/event_logger.h
@@ -15,13 +15,14 @@
 	EVENT_SWEEP_START,
 	EVENT_SWEEP_DONE,
 
-	EVENT_ALLOCATE_START,
-	EVENT_ALLOCATE_DONE,		// = 5
+	EVENT_ALLOCATE,
 
-	EVENT_NEW_ARENA,
+	EVENT_NEW_ARENA,			// = 5
 
 	EVENT_MARK_START,
 	EVENT_MARK_DONE,
+
+	EVENT_FREELIST_DUMP,
 };
 
 /**
diff --git a/rpython/translator/c/src/qcgc/src/gc_state.h b/rpython/translator/c/src/qcgc/src/gc_state.h
--- a/rpython/translator/c/src/qcgc/src/gc_state.h
+++ b/rpython/translator/c/src/qcgc/src/gc_state.h
@@ -33,7 +33,8 @@
 	size_t gray_stack_size;
 	gc_phase_t phase;
 
-	size_t bytes_since_incmark;
+	size_t cells_since_incmark;
+	size_t cells_since_collect;
 	size_t incmark_since_sweep;
 	size_t incmark_threshold;
 	size_t incmark_to_sweep;