[Python-checkins] Add miss stats for specialized instructions. (GH-31108)

Fri Feb 4 04:56:50 EST 2022

https://github.com/python/cpython/commit/832876b99212999c063cbf41bdacc574da699190
commit: 832876b99212999c063cbf41bdacc574da699190
branch: main
author: Mark Shannon <mark at hotpy.org>
committer: markshannon <mark at hotpy.org>
date: 2022-02-04T09:56:46Z
summary:

Add miss stats for specialized instructions. (GH-31108)

files:
M Python/ceval.c
M Python/specialize.c
M Tools/scripts/summarize_stats.py

diff --git a/Python/ceval.c b/Python/ceval.c
index b4029d1081dcb..e6b5d3ae24237 100644
--- a/Python/ceval.c
+++ b/Python/ceval.c
@@ -5403,6 +5403,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
 #define MISS_WITH_CACHE(opname) \
 opname ## _miss: \
     { \
+        STAT_INC(opcode, miss); \
         STAT_INC(opname, miss); \
         _PyAdaptiveEntry *cache = &GET_CACHE()->adaptive; \
         cache->counter--; \
diff --git a/Python/specialize.c b/Python/specialize.c
index 4070d6a6a0be4..b7ef478ee5590 100644
--- a/Python/specialize.c
+++ b/Python/specialize.c
@@ -38,6 +38,33 @@
  <instr N-1>
 */
 
+/* Map from opcode to adaptive opcode.
+  Values of zero are ignored. */
+static uint8_t adaptive_opcodes[256] = {
+    [LOAD_ATTR] = LOAD_ATTR_ADAPTIVE,
+    [LOAD_GLOBAL] = LOAD_GLOBAL_ADAPTIVE,
+    [LOAD_METHOD] = LOAD_METHOD_ADAPTIVE,
+    [BINARY_SUBSCR] = BINARY_SUBSCR_ADAPTIVE,
+    [STORE_SUBSCR] = STORE_SUBSCR_ADAPTIVE,
+    [CALL] = CALL_ADAPTIVE,
+    [STORE_ATTR] = STORE_ATTR_ADAPTIVE,
+    [BINARY_OP] = BINARY_OP_ADAPTIVE,
+    [COMPARE_OP] = COMPARE_OP_ADAPTIVE,
+};
+
+/* The number of cache entries required for a "family" of instructions. */
+static uint8_t cache_requirements[256] = {
+    [LOAD_ATTR] = 2, /* _PyAdaptiveEntry and _PyAttrCache */
+    [LOAD_GLOBAL] = 2, /* _PyAdaptiveEntry and _PyLoadGlobalCache */
+    [LOAD_METHOD] = 3, /* _PyAdaptiveEntry, _PyAttrCache and _PyObjectCache */
+    [BINARY_SUBSCR] = 2, /* _PyAdaptiveEntry, _PyObjectCache */
+    [STORE_SUBSCR] = 0,
+    [CALL] = 2, /* _PyAdaptiveEntry and _PyObjectCache/_PyCallCache */
+    [STORE_ATTR] = 2, /* _PyAdaptiveEntry and _PyAttrCache */
+    [BINARY_OP] = 1,  // _PyAdaptiveEntry
+    [COMPARE_OP] = 1, /* _PyAdaptiveEntry */
+};
+
 Py_ssize_t _Py_QuickenedCount = 0;
 #ifdef Py_STATS
 PyStats _py_stats = { 0 };
@@ -144,7 +171,14 @@ _Py_GetSpecializationStats(void) {
 static void
 print_spec_stats(FILE *out, OpcodeStats *stats)
 {
+    /* Mark some opcodes as specializable for stats,
+     * even though we don't specialize them yet. */
+    fprintf(out, "    opcode[%d].specializable : 1\n", FOR_ITER);
+    fprintf(out, "    opcode[%d].specializable : 1\n", UNPACK_SEQUENCE);
     for (int i = 0; i < 256; i++) {
+        if (adaptive_opcodes[i]) {
+            fprintf(out, "    opcode[%d].specializable : 1\n", i);
+        }
         PRINT_STAT(i, specialization.success);
         PRINT_STAT(i, specialization.failure);
         PRINT_STAT(i, specialization.hit);
@@ -266,33 +300,6 @@ get_cache_count(SpecializedCacheOrInstruction *quickened) {
     return quickened[0].entry.zero.cache_count;
 }
 
-/* Map from opcode to adaptive opcode.
-  Values of zero are ignored. */
-static uint8_t adaptive_opcodes[256] = {
-    [LOAD_ATTR] = LOAD_ATTR_ADAPTIVE,
-    [LOAD_GLOBAL] = LOAD_GLOBAL_ADAPTIVE,
-    [LOAD_METHOD] = LOAD_METHOD_ADAPTIVE,
-    [BINARY_SUBSCR] = BINARY_SUBSCR_ADAPTIVE,
-    [STORE_SUBSCR] = STORE_SUBSCR_ADAPTIVE,
-    [CALL] = CALL_ADAPTIVE,
-    [STORE_ATTR] = STORE_ATTR_ADAPTIVE,
-    [BINARY_OP] = BINARY_OP_ADAPTIVE,
-    [COMPARE_OP] = COMPARE_OP_ADAPTIVE,
-};
-
-/* The number of cache entries required for a "family" of instructions. */
-static uint8_t cache_requirements[256] = {
-    [LOAD_ATTR] = 2, /* _PyAdaptiveEntry and _PyAttrCache */
-    [LOAD_GLOBAL] = 2, /* _PyAdaptiveEntry and _PyLoadGlobalCache */
-    [LOAD_METHOD] = 3, /* _PyAdaptiveEntry, _PyAttrCache and _PyObjectCache */
-    [BINARY_SUBSCR] = 2, /* _PyAdaptiveEntry, _PyObjectCache */
-    [STORE_SUBSCR] = 0,
-    [CALL] = 2, /* _PyAdaptiveEntry and _PyObjectCache/_PyCallCache */
-    [STORE_ATTR] = 2, /* _PyAdaptiveEntry and _PyAttrCache */
-    [BINARY_OP] = 1,  // _PyAdaptiveEntry
-    [COMPARE_OP] = 1, /* _PyAdaptiveEntry */
-};
-
 /* Return the oparg for the cache_offset and instruction index.
  *
  * If no cache is needed then return the original oparg.
diff --git a/Tools/scripts/summarize_stats.py b/Tools/scripts/summarize_stats.py
index f67a35a04a9dd..6d0020739a31f 100644
--- a/Tools/scripts/summarize_stats.py
+++ b/Tools/scripts/summarize_stats.py
@@ -22,11 +22,10 @@
             pass
     opname.append(name)
 
-
 TOTAL = "specialization.deferred", "specialization.hit", "specialization.miss", "execution_count"
 
 def print_specialization_stats(name, family_stats):
-    if "specialization.failure" not in family_stats:
+    if "specializable" not in family_stats:
         return
     total = sum(family_stats.get(kind, 0) for kind in TOTAL)
     if total == 0:
@@ -87,13 +86,18 @@ def main():
     for i, opcode_stat in enumerate(opcode_stats):
         if "execution_count" in opcode_stat:
             count = opcode_stat['execution_count']
-            counts.append((count, opname[i]))
+            miss = 0
+            if "specializable" not in opcode_stat:
+                miss = opcode_stat.get("specialization.miss")
+            counts.append((count, opname[i], miss))
             total += count
     counts.sort(reverse=True)
     cummulative = 0
-    for (count, name) in counts:
+    for (count, name, miss) in counts:
         cummulative += count
         print(f"{name}: {count} {100*count/total:0.1f}% {100*cummulative/total:0.1f}%")
+        if miss:
+            print(f"    Misses: {miss} {100*miss/count:0.1f}%")
     print("Specialization stats:")
     for i, opcode_stat in enumerate(opcode_stats):
         name = opname[i]