[Python-checkins] Add some GC stats to Py_STATS (GH-107581)

markshannon webhook-mailer at python.org
Fri Aug 4 05:34:27 EDT 2023


https://github.com/python/cpython/commit/2ba7c7f7b151ff56cf12bf3cab286981bb646c90
commit: 2ba7c7f7b151ff56cf12bf3cab286981bb646c90
branch: main
author: Mark Shannon <mark at hotpy.org>
committer: markshannon <mark at hotpy.org>
date: 2023-08-04T10:34:23+01:00
summary:

Add some GC stats to Py_STATS (GH-107581)

files:
M Include/internal/pycore_code.h
M Include/pystats.h
M Modules/gcmodule.c
M Python/specialize.c
M Tools/scripts/summarize_stats.py

diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h
index ee1b85187cbab..00099376635e9 100644
--- a/Include/internal/pycore_code.h
+++ b/Include/internal/pycore_code.h
@@ -274,6 +274,7 @@ extern int _PyStaticCode_Init(PyCodeObject *co);
 #define EVAL_CALL_STAT_INC(name) do { if (_py_stats) _py_stats->call_stats.eval_calls[name]++; } while (0)
 #define EVAL_CALL_STAT_INC_IF_FUNCTION(name, callable) \
     do { if (_py_stats && PyFunction_Check(callable)) _py_stats->call_stats.eval_calls[name]++; } while (0)
+#define GC_STAT_ADD(gen, name, n) do { if (_py_stats) _py_stats->gc_stats[(gen)].name += (n); } while (0)
 
 // Export for '_opcode' shared extension
 PyAPI_FUNC(PyObject*) _Py_GetSpecializationStats(void);
@@ -287,6 +288,7 @@ PyAPI_FUNC(PyObject*) _Py_GetSpecializationStats(void);
 #define OBJECT_STAT_INC_COND(name, cond) ((void)0)
 #define EVAL_CALL_STAT_INC(name) ((void)0)
 #define EVAL_CALL_STAT_INC_IF_FUNCTION(name, callable) ((void)0)
+#define GC_STAT_ADD(gen, name, n) ((void)0)
 #endif  // !Py_STATS
 
 // Utility functions for reading/writing 32/64-bit values in the inline caches.
diff --git a/Include/pystats.h b/Include/pystats.h
index 54c9b8d8b3538..e24aef5fe8072 100644
--- a/Include/pystats.h
+++ b/Include/pystats.h
@@ -74,12 +74,21 @@ typedef struct _object_stats {
     uint64_t optimization_traces_created;
     uint64_t optimization_traces_executed;
     uint64_t optimization_uops_executed;
+    /* Temporary value used during GC */
+    uint64_t object_visits;
 } ObjectStats;
 
+typedef struct _gc_stats {
+    uint64_t collections;
+    uint64_t object_visits;
+    uint64_t objects_collected;
+} GCStats;
+
 typedef struct _stats {
     OpcodeStats opcode_stats[256];
     CallStats call_stats;
     ObjectStats object_stats;
+    GCStats *gc_stats;
 } PyStats;
 
 
diff --git a/Modules/gcmodule.c b/Modules/gcmodule.c
index 246c0a9e160aa..35a35091bf451 100644
--- a/Modules/gcmodule.c
+++ b/Modules/gcmodule.c
@@ -460,6 +460,7 @@ update_refs(PyGC_Head *containers)
 static int
 visit_decref(PyObject *op, void *parent)
 {
+    OBJECT_STAT_INC(object_visits);
     _PyObject_ASSERT(_PyObject_CAST(parent), !_PyObject_IsFreed(op));
 
     if (_PyObject_IS_GC(op)) {
@@ -498,6 +499,7 @@ subtract_refs(PyGC_Head *containers)
 static int
 visit_reachable(PyObject *op, PyGC_Head *reachable)
 {
+    OBJECT_STAT_INC(object_visits);
     if (!_PyObject_IS_GC(op)) {
         return 0;
     }
@@ -725,6 +727,7 @@ clear_unreachable_mask(PyGC_Head *unreachable)
 static int
 visit_move(PyObject *op, PyGC_Head *tolist)
 {
+    OBJECT_STAT_INC(object_visits);
     if (_PyObject_IS_GC(op)) {
         PyGC_Head *gc = AS_GC(op);
         if (gc_is_collecting(gc)) {
@@ -1195,6 +1198,12 @@ gc_collect_main(PyThreadState *tstate, int generation,
                 Py_ssize_t *n_collected, Py_ssize_t *n_uncollectable,
                 int nofail)
 {
+    GC_STAT_ADD(generation, collections, 1);
+#ifdef Py_STATS
+    if (_py_stats) {
+        _py_stats->object_stats.object_visits = 0;
+    }
+#endif
     int i;
     Py_ssize_t m = 0; /* # objects collected */
     Py_ssize_t n = 0; /* # unreachable objects that couldn't be collected */
@@ -1351,6 +1360,15 @@ gc_collect_main(PyThreadState *tstate, int generation,
     stats->collected += m;
     stats->uncollectable += n;
 
+    GC_STAT_ADD(generation, objects_collected, m);
+#ifdef Py_STATS
+    if (_py_stats) {
+        GC_STAT_ADD(generation, object_visits,
+            _py_stats->object_stats.object_visits);
+        _py_stats->object_stats.object_visits = 0;
+    }
+#endif
+
     if (PyDTrace_GC_DONE_ENABLED()) {
         PyDTrace_GC_DONE(n + m);
     }
diff --git a/Python/specialize.c b/Python/specialize.c
index 1669ce17fc804..de329ef1195cb 100644
--- a/Python/specialize.c
+++ b/Python/specialize.c
@@ -18,7 +18,8 @@
  */
 
 #ifdef Py_STATS
-PyStats _py_stats_struct = { 0 };
+GCStats _py_gc_stats[NUM_GENERATIONS] = { 0 };
+PyStats _py_stats_struct = { .gc_stats = &_py_gc_stats[0] };
 PyStats *_py_stats = NULL;
 
 #define ADD_STAT_TO_DICT(res, field) \
@@ -202,17 +203,32 @@ print_object_stats(FILE *out, ObjectStats *stats)
     fprintf(out, "Optimization uops executed: %" PRIu64 "\n", stats->optimization_uops_executed);
 }
 
+static void
+print_gc_stats(FILE *out, GCStats *stats)
+{
+    for (int i = 0; i < NUM_GENERATIONS; i++) {
+        fprintf(out, "GC[%d] collections: %" PRIu64 "\n", i, stats[i].collections);
+        fprintf(out, "GC[%d] object visits: %" PRIu64 "\n", i, stats[i].object_visits);
+        fprintf(out, "GC[%d] objects collected: %" PRIu64 "\n", i, stats[i].objects_collected);
+    }
+}
+
 static void
 print_stats(FILE *out, PyStats *stats) {
     print_spec_stats(out, stats->opcode_stats);
     print_call_stats(out, &stats->call_stats);
     print_object_stats(out, &stats->object_stats);
+    print_gc_stats(out, stats->gc_stats);
 }
 
 void
 _Py_StatsClear(void)
 {
+    for (int i = 0; i < NUM_GENERATIONS; i++) {
+        _py_gc_stats[i] = (GCStats) { 0 };
+    }
     _py_stats_struct = (PyStats) { 0 };
+    _py_stats_struct.gc_stats = _py_gc_stats;
 }
 
 void
diff --git a/Tools/scripts/summarize_stats.py b/Tools/scripts/summarize_stats.py
index 9c881897c2de1..f798b2f772d08 100644
--- a/Tools/scripts/summarize_stats.py
+++ b/Tools/scripts/summarize_stats.py
@@ -494,6 +494,22 @@ def calculate_object_stats(stats):
             rows.append((label, value, ratio))
     return rows
 
+def calculate_gc_stats(stats):
+    gc_stats = []
+    for key, value in stats.items():
+        if not key.startswith("GC"):
+            continue
+        n, _, rest = key[3:].partition("]")
+        name = rest.strip()
+        gen_n = int(n)
+        while len(gc_stats) <= gen_n:
+            gc_stats.append({})
+        gc_stats[gen_n][name] = value
+    return [
+        (i, gen["collections"], gen["objects collected"], gen["object visits"])
+        for (i, gen) in enumerate(gc_stats)
+    ]
+
 def emit_object_stats(stats):
     with Section("Object stats", summary="allocations, frees and dict materializatons"):
         rows = calculate_object_stats(stats)
@@ -505,6 +521,22 @@ def emit_comparative_object_stats(base_stats, head_stats):
         head_rows = calculate_object_stats(head_stats)
         emit_table(("",  "Base Count:", "Base Ratio:", "Head Count:", "Head Ratio:"), join_rows(base_rows, head_rows))
 
+def emit_gc_stats(stats):
+    with Section("GC stats", summary="GC collections and effectiveness"):
+        rows = calculate_gc_stats(stats)
+        emit_table(("Generation:",  "Collections:", "Objects collected:", "Object visits:"), rows)
+
+def emit_comparative_gc_stats(base_stats, head_stats):
+    with Section("GC stats", summary="GC collections and effectiveness"):
+        base_rows = calculate_gc_stats(base_stats)
+        head_rows = calculate_gc_stats(head_stats)
+        emit_table(
+            ("Generation:",
+            "Base collections:", "Head collections:",
+            "Base objects collected:", "Head objects collected:",
+            "Base object visits:", "Head object visits:"),
+            join_rows(base_rows, head_rows))
+
 def get_total(opcode_stats):
     total = 0
     for opcode_stat in opcode_stats:
@@ -574,6 +606,7 @@ def output_single_stats(stats):
     emit_specialization_overview(opcode_stats, total)
     emit_call_stats(stats)
     emit_object_stats(stats)
+    emit_gc_stats(stats)
     with Section("Meta stats", summary="Meta statistics"):
         emit_table(("", "Count:"), [('Number of data files', stats['__nfiles__'])])
 
@@ -596,6 +629,7 @@ def output_comparative_stats(base_stats, head_stats):
     )
     emit_comparative_call_stats(base_stats, head_stats)
     emit_comparative_object_stats(base_stats, head_stats)
+    emit_comparative_gc_stats(base_stats, head_stats)
 
 def output_stats(inputs, json_output=None):
     if len(inputs) == 1:



More information about the Python-checkins mailing list