[Python-checkins] bpo-44337: Port LOAD_ATTR to PEP 659 adaptive interpreter (GH-26595)

markshannon webhook-mailer at python.org
Thu Jun 10 03:46:16 EDT 2021


https://github.com/python/cpython/commit/e117c0283705943189e6b1aef668a1f68f3f00a4
commit: e117c0283705943189e6b1aef668a1f68f3f00a4
branch: main
author: Mark Shannon <mark at hotpy.org>
committer: markshannon <mark at hotpy.org>
date: 2021-06-10T08:46:01+01:00
summary:

bpo-44337: Port LOAD_ATTR to PEP 659 adaptive interpreter (GH-26595)

* Specialize LOAD_ATTR with  LOAD_ATTR_SLOT and LOAD_ATTR_SPLIT_KEYS

* Move dict-common.h to internal/pycore_dict.h

* Add LOAD_ATTR_WITH_HINT specialized opcode.

* Quicken in function if loopy

* Specialize LOAD_ATTR for module attributes.

* Add specialization stats

files:
A Include/internal/pycore_dict.h
A Misc/NEWS.d/next/Core and Builtins/2021-06-08-10-22-46.bpo-44337.RTjmIt.rst
D Objects/dict-common.h
M Include/internal/pycore_code.h
M Include/opcode.h
M Lib/opcode.py
M Lib/test/test_capi.py
M Makefile.pre.in
M Objects/dictobject.c
M Objects/odictobject.c
M Python/ceval.c
M Python/makeopcodetargets.py
M Python/opcode_targets.h
M Python/specialize.c
M Tools/scripts/generate_opcode_h.py

diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h
index 2709e082b05b17..05ba522969a3d0 100644
--- a/Include/internal/pycore_code.h
+++ b/Include/internal/pycore_code.h
@@ -42,6 +42,12 @@ typedef struct {
     uint16_t index;
 } _PyAdaptiveEntry;
 
+
+typedef struct {
+    uint32_t tp_version;
+    uint32_t dk_version_or_hint;
+} _PyLoadAttrCache;
+
 /* Add specialized versions of entries to this union.
  *
  * Do not break the invariant: sizeof(SpecializedCacheEntry) == 8
@@ -55,6 +61,7 @@ typedef struct {
 typedef union {
     _PyEntryZero zero;
     _PyAdaptiveEntry adaptive;
+    _PyLoadAttrCache load_attr;
 } SpecializedCacheEntry;
 
 #define INSTRUCTIONS_PER_ENTRY (sizeof(SpecializedCacheEntry)/sizeof(_Py_CODEUNIT))
@@ -255,6 +262,83 @@ PyAPI_FUNC(PyObject *) _PyCode_GetCellvars(PyCodeObject *);
 PyAPI_FUNC(PyObject *) _PyCode_GetFreevars(PyCodeObject *);
 
 
+/* Cache hits and misses */
+
+static inline uint8_t
+saturating_increment(uint8_t c)
+{
+    return c<<1;
+}
+
+static inline uint8_t
+saturating_decrement(uint8_t c)
+{
+    return (c>>1) + 128;
+}
+
+static inline uint8_t
+saturating_zero(void)
+{
+    return 255;
+}
+
+/* Starting value for saturating counter.
+ * Technically this should be 1, but that is likely to
+ * cause a bit of thrashing when we optimize then get an immediate miss.
+ * We want to give the counter a change to stabilize, so we start at 3.
+ */
+static inline uint8_t
+saturating_start(void)
+{
+    return saturating_zero()<<3;
+}
+
+static inline void
+record_cache_hit(_PyAdaptiveEntry *entry) {
+    entry->counter = saturating_increment(entry->counter);
+}
+
+static inline void
+record_cache_miss(_PyAdaptiveEntry *entry) {
+    entry->counter = saturating_decrement(entry->counter);
+}
+
+static inline int
+too_many_cache_misses(_PyAdaptiveEntry *entry) {
+    return entry->counter == saturating_zero();
+}
+
+#define BACKOFF 64
+
+static inline void
+cache_backoff(_PyAdaptiveEntry *entry) {
+    entry->counter = BACKOFF;
+}
+
+/* Specialization functions */
+
+int _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache);
+
+#define SPECIALIZATION_STATS 0
+#if SPECIALIZATION_STATS
+
+typedef struct _specialization_stats {
+    uint64_t specialization_success;
+    uint64_t specialization_failure;
+    uint64_t loadattr_hit;
+    uint64_t loadattr_deferred;
+    uint64_t loadattr_miss;
+    uint64_t loadattr_deopt;
+} SpecializationStats;
+
+extern SpecializationStats _specialization_stats;
+#define STAT_INC(name) _specialization_stats.name++
+void _Py_PrintSpecializationStats(void);
+#else
+#define STAT_INC(name) ((void)0)
+#endif
+
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/Objects/dict-common.h b/Include/internal/pycore_dict.h
similarity index 66%
rename from Objects/dict-common.h
rename to Include/internal/pycore_dict.h
index a6f518f301885a..b2c64b2168cdcf 100644
--- a/Objects/dict-common.h
+++ b/Include/internal/pycore_dict.h
@@ -1,5 +1,14 @@
-#ifndef Py_DICT_COMMON_H
-#define Py_DICT_COMMON_H
+
+#ifndef Py_INTERNAL_DICT_H
+#define Py_INTERNAL_DICT_H
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifndef Py_BUILD_CORE
+#  error "this header requires Py_BUILD_CORE define"
+#endif
+
 
 typedef struct {
     /* Cached hash code of me_key. */
@@ -62,4 +71,26 @@ struct _dictkeysobject {
        see the DK_ENTRIES() macro */
 };
 
+#define DK_LOG_SIZE(dk)  ((dk)->dk_log2_size)
+#if SIZEOF_VOID_P > 4
+#define DK_SIZE(dk)      (((int64_t)1)<<DK_LOG_SIZE(dk))
+#define DK_IXSIZE(dk)                     \
+    (DK_LOG_SIZE(dk) <= 7 ?               \
+        1 : DK_LOG_SIZE(dk) <= 15 ?       \
+            2 : DK_LOG_SIZE(dk) <= 31 ?   \
+                4 : sizeof(int64_t))
+#else
+#define DK_SIZE(dk)      (1<<DK_LOG_SIZE(dk))
+#define DK_IXSIZE(dk)                     \
+    (DK_LOG_SIZE(dk) <= 7 ?               \
+        1 : DK_LOG_SIZE(dk) <= 15 ?       \
+            2 : sizeof(int32_t))
+#endif
+#define DK_ENTRIES(dk) \
+    ((PyDictKeyEntry*)(&((int8_t*)((dk)->dk_indices))[DK_SIZE(dk) * DK_IXSIZE(dk)]))
+
+
+#ifdef __cplusplus
+}
 #endif
+#endif   /* !Py_INTERNAL_DICT_H */
diff --git a/Include/opcode.h b/Include/opcode.h
index c65e2f41133fc6..8f5be99cae0c1b 100644
--- a/Include/opcode.h
+++ b/Include/opcode.h
@@ -136,6 +136,12 @@ extern "C" {
 #define DICT_MERGE              164
 #define DICT_UPDATE             165
 #define CALL_METHOD_KW          166
+#define JUMP_ABSOLUTE_QUICK       7
+#define LOAD_ATTR_ADAPTIVE        8
+#define LOAD_ATTR_SPLIT_KEYS     13
+#define LOAD_ATTR_WITH_HINT      14
+#define LOAD_ATTR_SLOT           18
+#define LOAD_ATTR_MODULE         21
 #ifdef NEED_OPCODE_JUMP_TABLES
 static uint32_t _PyOpcode_RelativeJump[8] = {
     0U,
diff --git a/Lib/opcode.py b/Lib/opcode.py
index 4d5343179e5932..265759e60071ce 100644
--- a/Lib/opcode.py
+++ b/Lib/opcode.py
@@ -218,3 +218,12 @@ def jabs_op(name, op):
 def_op('CALL_METHOD_KW', 166)
 
 del def_op, name_op, jrel_op, jabs_op
+
+_specialized_instructions = [
+    "JUMP_ABSOLUTE_QUICK",
+    "LOAD_ATTR_ADAPTIVE",
+    "LOAD_ATTR_SPLIT_KEYS",
+    "LOAD_ATTR_WITH_HINT",
+    "LOAD_ATTR_SLOT",
+    "LOAD_ATTR_MODULE",
+]
diff --git a/Lib/test/test_capi.py b/Lib/test/test_capi.py
index 1b18bfad553007..f4b7b8c13b7d38 100644
--- a/Lib/test/test_capi.py
+++ b/Lib/test/test_capi.py
@@ -323,7 +323,7 @@ class C(): pass
                         break
         """
         rc, out, err = assert_python_ok('-c', code)
-        self.assertIn(b'MemoryError 1 10', out)
+        self.assertIn(b'MemoryError 1', out)
         self.assertIn(b'MemoryError 2 20', out)
         self.assertIn(b'MemoryError 3 30', out)
 
diff --git a/Makefile.pre.in b/Makefile.pre.in
index 859b53947cab1a..97f21d454464bd 100644
--- a/Makefile.pre.in
+++ b/Makefile.pre.in
@@ -979,8 +979,7 @@ Objects/bytearrayobject.o: $(srcdir)/Objects/bytearrayobject.c $(BYTESTR_DEPS)
 
 Objects/unicodeobject.o: $(srcdir)/Objects/unicodeobject.c $(UNICODE_DEPS)
 
-Objects/odictobject.o: $(srcdir)/Objects/dict-common.h
-Objects/dictobject.o: $(srcdir)/Objects/stringlib/eq.h $(srcdir)/Objects/dict-common.h
+Objects/dictobject.o: $(srcdir)/Objects/stringlib/eq.h
 Objects/setobject.o: $(srcdir)/Objects/stringlib/eq.h
 
 .PHONY: regen-opcode-targets
@@ -1156,6 +1155,7 @@ PYTHON_HEADERS= \
 		$(srcdir)/Include/internal/pycore_compile.h \
 		$(srcdir)/Include/internal/pycore_condvar.h \
 		$(srcdir)/Include/internal/pycore_context.h \
+		$(srcdir)/Include/internal/pycore_dict.h \
 		$(srcdir)/Include/internal/pycore_dtoa.h \
 		$(srcdir)/Include/internal/pycore_fileutils.h \
 		$(srcdir)/Include/internal/pycore_format.h \
diff --git a/Misc/NEWS.d/next/Core and Builtins/2021-06-08-10-22-46.bpo-44337.RTjmIt.rst b/Misc/NEWS.d/next/Core and Builtins/2021-06-08-10-22-46.bpo-44337.RTjmIt.rst
new file mode 100644
index 00000000000000..2df082a078e309
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2021-06-08-10-22-46.bpo-44337.RTjmIt.rst	
@@ -0,0 +1,11 @@
+Initial implementation of adaptive specialization of LOAD_ATTR
+
+Four specialized forms of LOAD_ATTR are added:
+
+* LOAD_ATTR_SLOT
+
+* LOAD_ATTR_SPLIT_KEYS
+
+* LOAD_ATTR_WITH_HINT
+
+* LOAD_ATTR_MODULE
diff --git a/Objects/dictobject.c b/Objects/dictobject.c
index d97f9e2120d3fa..3a1dbc994b44b0 100644
--- a/Objects/dictobject.c
+++ b/Objects/dictobject.c
@@ -117,7 +117,7 @@ converting the dict to the combined table.
 #include "pycore_object.h"   // _PyObject_GC_TRACK()
 #include "pycore_pyerrors.h" // _PyErr_Fetch()
 #include "pycore_pystate.h"  // _PyThreadState_GET()
-#include "dict-common.h"
+#include "pycore_dict.h"
 #include "stringlib/eq.h"    // unicode_eq()
 
 /*[clinic input]
@@ -285,24 +285,6 @@ _PyDict_DebugMallocStats(FILE *out)
                            state->numfree, sizeof(PyDictObject));
 }
 
-#define DK_LOG_SIZE(dk)  ((dk)->dk_log2_size)
-#if SIZEOF_VOID_P > 4
-#define DK_SIZE(dk)      (((int64_t)1)<<DK_LOG_SIZE(dk))
-#define DK_IXSIZE(dk)                     \
-    (DK_LOG_SIZE(dk) <= 7 ?               \
-        1 : DK_LOG_SIZE(dk) <= 15 ?       \
-            2 : DK_LOG_SIZE(dk) <= 31 ?   \
-                4 : sizeof(int64_t))
-#else
-#define DK_SIZE(dk)      (1<<DK_LOG_SIZE(dk))
-#define DK_IXSIZE(dk)                     \
-    (DK_LOG_SIZE(dk) <= 7 ?               \
-        1 : DK_LOG_SIZE(dk) <= 15 ?       \
-            2 : sizeof(int32_t))
-#endif
-#define DK_ENTRIES(dk) \
-    ((PyDictKeyEntry*)(&((int8_t*)((dk)->dk_indices))[DK_SIZE(dk) * DK_IXSIZE(dk)]))
-
 #define DK_MASK(dk) (DK_SIZE(dk)-1)
 #define IS_POWER_OF_2(x) (((x) & (x-1)) == 0)
 
@@ -1544,10 +1526,10 @@ delitem_common(PyDictObject *mp, Py_hash_t hash, Py_ssize_t ix,
     assert(hashpos >= 0);
 
     mp->ma_used--;
+    mp->ma_keys->dk_version = 0;
     mp->ma_version_tag = DICT_NEXT_VERSION();
     ep = &DK_ENTRIES(mp->ma_keys)[ix];
     dictkeys_set_index(mp->ma_keys, hashpos, DKIX_DUMMY);
-    mp->ma_keys->dk_version = 0;
     old_key = ep->me_key;
     ep->me_key = NULL;
     ep->me_value = NULL;
diff --git a/Objects/odictobject.c b/Objects/odictobject.c
index 6a33910d9a89de..fb1ac0ce48dcfc 100644
--- a/Objects/odictobject.c
+++ b/Objects/odictobject.c
@@ -467,7 +467,7 @@ Potential Optimizations
 #include "Python.h"
 #include "pycore_object.h"
 #include <stddef.h>               // offsetof()
-#include "dict-common.h"
+#include "pycore_dict.h"
 #include <stddef.h>
 
 #include "clinic/odictobject.c.h"
diff --git a/Python/ceval.c b/Python/ceval.c
index a8abead23038ce..46133c9fe28dca 100644
--- a/Python/ceval.c
+++ b/Python/ceval.c
@@ -16,6 +16,7 @@
 #include "pycore_code.h"          // _PyCode_InitOpcache()
 #include "pycore_initconfig.h"    // _PyStatus_OK()
 #include "pycore_object.h"        // _PyObject_GC_TRACK()
+#include "pycore_moduleobject.h"
 #include "pycore_pyerrors.h"      // _PyErr_Fetch()
 #include "pycore_pylifecycle.h"   // _PyErr_Print()
 #include "pycore_pymem.h"         // _PyMem_IsPtrFreed()
@@ -24,6 +25,7 @@
 #include "pycore_tuple.h"         // _PyTuple_ITEMS()
 
 #include "code.h"
+#include "pycore_dict.h"
 #include "dictobject.h"
 #include "frameobject.h"
 #include "pycore_frame.h"
@@ -1554,8 +1556,15 @@ eval_frame_handle_pending(PyThreadState *tstate)
 #define OPCACHE_STAT_ATTR_DEOPT()
 #define OPCACHE_STAT_ATTR_TOTAL()
 
+#define JUMP_TO_INSTRUCTION(op) goto PREDICT_ID(op)
+
+#define GET_CACHE() \
+    _GetSpecializedCacheEntryForInstruction(first_instr, INSTR_OFFSET(), oparg)
+
 #endif
 
+#define DEOPT_IF(cond, instname) if (cond) { goto instname ## _miss; }
+
 #define GLOBALS() specials[FRAME_SPECIALS_GLOBALS_OFFSET]
 #define BUILTINS() specials[FRAME_SPECIALS_BUILTINS_OFFSET]
 #define LOCALS() specials[FRAME_SPECIALS_LOCALS_OFFSET]
@@ -1574,7 +1583,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag)
     int lastopcode = 0;
 #endif
     PyObject **stack_pointer;  /* Next free slot in value stack */
-    const _Py_CODEUNIT *next_instr;
+    _Py_CODEUNIT *next_instr;
     int opcode;        /* Current opcode */
     int oparg;         /* Current opcode argument, if any */
     PyObject **localsplus, **specials;
@@ -1582,7 +1591,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag)
     _Py_atomic_int * const eval_breaker = &tstate->interp->ceval.eval_breaker;
     PyCodeObject *co;
 
-    const _Py_CODEUNIT *first_instr;
+    _Py_CODEUNIT *first_instr;
     PyObject *names;
     PyObject *consts;
     _PyOpcache *co_opcache;
@@ -3443,196 +3452,129 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag)
         }
 
         case TARGET(LOAD_ATTR): {
+            PREDICTED(LOAD_ATTR);
             PyObject *name = GETITEM(names, oparg);
             PyObject *owner = TOP();
+            PyObject *res = PyObject_GetAttr(owner, name);
+            if (res == NULL) {
+                goto error;
+            }
+            Py_DECREF(owner);
+            SET_TOP(res);
+            DISPATCH();
+        }
 
-            PyTypeObject *type = Py_TYPE(owner);
-            PyObject *res;
-            PyObject **dictptr;
-            PyObject *dict;
-            _PyOpCodeOpt_LoadAttr *la;
-
-            OPCACHE_STAT_ATTR_TOTAL();
-
-            OPCACHE_CHECK();
-            if (co_opcache != NULL && PyType_HasFeature(type, Py_TPFLAGS_VALID_VERSION_TAG))
-            {
-                if (co_opcache->optimized > 0) {
-                    // Fast path -- cache hit makes LOAD_ATTR ~30% faster.
-                    la = &co_opcache->u.la;
-                    if (la->type == type && la->tp_version_tag == type->tp_version_tag)
-                    {
-                        // Hint >= 0 is a dict index; hint == -1 is a dict miss.
-                        // Hint < -1 is an inverted slot offset: offset is strictly > 0,
-                        // so ~offset is strictly < -1 (assuming 2's complement).
-                        if (la->hint < -1) {
-                            // Even faster path -- slot hint.
-                            Py_ssize_t offset = ~la->hint;
-                            // fprintf(stderr, "Using hint for offset %zd\n", offset);
-                            char *addr = (char *)owner + offset;
-                            res = *(PyObject **)addr;
-                            if (res != NULL) {
-                                Py_INCREF(res);
-                                SET_TOP(res);
-                                Py_DECREF(owner);
-                                DISPATCH();
-                            }
-                            // Else slot is NULL.  Fall through to slow path to raise AttributeError(name).
-                            // Don't DEOPT, since the slot is still there.
-                        } else {
-                            // Fast path for dict.
-                            assert(type->tp_dict != NULL);
-                            assert(type->tp_dictoffset > 0);
-
-                            dictptr = (PyObject **) ((char *)owner + type->tp_dictoffset);
-                            dict = *dictptr;
-                            if (dict != NULL && PyDict_CheckExact(dict)) {
-                                Py_ssize_t hint = la->hint;
-                                Py_INCREF(dict);
-                                res = NULL;
-                                assert(!_PyErr_Occurred(tstate));
-                                la->hint = _PyDict_GetItemHint((PyDictObject*)dict, name, hint, &res);
-                                if (res != NULL) {
-                                    assert(la->hint >= 0);
-                                    if (la->hint == hint && hint >= 0) {
-                                        // Our hint has helped -- cache hit.
-                                        OPCACHE_STAT_ATTR_HIT();
-                                    } else {
-                                        // The hint we provided didn't work.
-                                        // Maybe next time?
-                                        OPCACHE_MAYBE_DEOPT_LOAD_ATTR();
-                                    }
-
-                                    Py_INCREF(res);
-                                    SET_TOP(res);
-                                    Py_DECREF(owner);
-                                    Py_DECREF(dict);
-                                    DISPATCH();
-                                }
-                                else {
-                                    _PyErr_Clear(tstate);
-                                    // This attribute can be missing sometimes;
-                                    // we don't want to optimize this lookup.
-                                    OPCACHE_DEOPT_LOAD_ATTR();
-                                    Py_DECREF(dict);
-                                }
-                            }
-                            else {
-                                // There is no dict, or __dict__ doesn't satisfy PyDict_CheckExact.
-                                OPCACHE_DEOPT_LOAD_ATTR();
-                            }
-                        }
-                    }
-                    else {
-                        // The type of the object has either been updated,
-                        // or is different.  Maybe it will stabilize?
-                        OPCACHE_MAYBE_DEOPT_LOAD_ATTR();
-                    }
-                    OPCACHE_STAT_ATTR_MISS();
-                }
-
-                if (co_opcache != NULL && // co_opcache can be NULL after a DEOPT() call.
-                    type->tp_getattro == PyObject_GenericGetAttr)
-                {
-                    if (type->tp_dict == NULL) {
-                        if (PyType_Ready(type) < 0) {
-                            Py_DECREF(owner);
-                            SET_TOP(NULL);
-                            goto error;
-                        }
-                    }
-                    PyObject *descr = _PyType_Lookup(type, name);
-                    if (descr != NULL) {
-                        // We found an attribute with a data-like descriptor.
-                        PyTypeObject *dtype = Py_TYPE(descr);
-                        if (dtype == &PyMemberDescr_Type) {  // It's a slot
-                            PyMemberDescrObject *member = (PyMemberDescrObject *)descr;
-                            struct PyMemberDef *dmem = member->d_member;
-                            if (dmem->type == T_OBJECT_EX) {
-                                Py_ssize_t offset = dmem->offset;
-                                assert(offset > 0);  // 0 would be confused with dict hint == -1 (miss).
-
-                                if (co_opcache->optimized == 0) {
-                                    // First time we optimize this opcode.
-                                    OPCACHE_STAT_ATTR_OPT();
-                                    co_opcache->optimized = OPCODE_CACHE_MAX_TRIES;
-                                    // fprintf(stderr, "Setting hint for %s, offset %zd\n", dmem->name, offset);
-                                }
-
-                                la = &co_opcache->u.la;
-                                la->type = type;
-                                la->tp_version_tag = type->tp_version_tag;
-                                la->hint = ~offset;
-
-                                char *addr = (char *)owner + offset;
-                                res = *(PyObject **)addr;
-                                if (res != NULL) {
-                                    Py_INCREF(res);
-                                    Py_DECREF(owner);
-                                    SET_TOP(res);
-
-                                    DISPATCH();
-                                }
-                                // Else slot is NULL.  Fall through to slow path to raise AttributeError(name).
-                            }
-                            // Else it's a slot of a different type.  We don't handle those.
-                        }
-                        // Else it's some other kind of descriptor that we don't handle.
-                        OPCACHE_DEOPT_LOAD_ATTR();
-                    }
-                    else if (type->tp_dictoffset > 0) {
-                        // We found an instance with a __dict__.
-                        dictptr = (PyObject **) ((char *)owner + type->tp_dictoffset);
-                        dict = *dictptr;
-
-                        if (dict != NULL && PyDict_CheckExact(dict)) {
-                            Py_INCREF(dict);
-                            res = NULL;
-                            assert(!_PyErr_Occurred(tstate));
-                            Py_ssize_t hint = _PyDict_GetItemHint((PyDictObject*)dict, name, -1, &res);
-                            if (res != NULL) {
-                                Py_INCREF(res);
-                                Py_DECREF(dict);
-                                Py_DECREF(owner);
-                                SET_TOP(res);
-
-                                if (co_opcache->optimized == 0) {
-                                    // First time we optimize this opcode.
-                                    OPCACHE_STAT_ATTR_OPT();
-                                    co_opcache->optimized = OPCODE_CACHE_MAX_TRIES;
-                                }
-
-                                la = &co_opcache->u.la;
-                                la->type = type;
-                                la->tp_version_tag = type->tp_version_tag;
-                                assert(hint >= 0);
-                                la->hint = hint;
-
-                                DISPATCH();
-                            }
-                            else {
-                                _PyErr_Clear(tstate);
-                            }
-                            Py_DECREF(dict);
-                        } else {
-                            // There is no dict, or __dict__ doesn't satisfy PyDict_CheckExact.
-                            OPCACHE_DEOPT_LOAD_ATTR();
-                        }
-                    } else {
-                        // The object's class does not have a tp_dictoffset we can use.
-                        OPCACHE_DEOPT_LOAD_ATTR();
-                    }
-                } else if (type->tp_getattro != PyObject_GenericGetAttr) {
-                    OPCACHE_DEOPT_LOAD_ATTR();
+        case TARGET(LOAD_ATTR_ADAPTIVE): {
+            SpecializedCacheEntry *cache = GET_CACHE();
+            if (cache->adaptive.counter == 0) {
+                PyObject *owner = TOP();
+                PyObject *name = GETITEM(names, cache->adaptive.original_oparg);
+                next_instr--;
+                if (_Py_Specialize_LoadAttr(owner, next_instr, name, cache) < 0) {
+                    goto error;
                 }
+                DISPATCH();
             }
+            else {
+                STAT_INC(loadattr_deferred);
+                cache->adaptive.counter--;
+                oparg = cache->adaptive.original_oparg;
+                JUMP_TO_INSTRUCTION(LOAD_ATTR);
+            }
+        }
+
+        case TARGET(LOAD_ATTR_SPLIT_KEYS): {
+            PyObject *owner = TOP();
+            PyObject *res;
+            PyTypeObject *tp = Py_TYPE(owner);
+            SpecializedCacheEntry *caches = GET_CACHE();
+            _PyAdaptiveEntry *cache0 = &caches[0].adaptive;
+            _PyLoadAttrCache *cache1 = &caches[-1].load_attr;
+            assert(cache1->tp_version != 0);
+            DEOPT_IF(tp->tp_version_tag != cache1->tp_version, LOAD_ATTR);
+            assert(tp->tp_dictoffset > 0);
+            PyDictObject *dict = *(PyDictObject **)(((char *)owner) + tp->tp_dictoffset);
+            DEOPT_IF(dict == NULL, LOAD_ATTR);
+            assert(PyDict_CheckExact((PyObject *)dict));
+            DEOPT_IF(dict->ma_keys->dk_version != cache1->dk_version_or_hint, LOAD_ATTR);
+            res = dict->ma_values[cache0->index];
+            DEOPT_IF(res == NULL, LOAD_ATTR);
+            STAT_INC(loadattr_hit);
+            record_cache_hit(cache0);
+            STAT_INC(loadattr_hit);
+            Py_INCREF(res);
+            SET_TOP(res);
+            Py_DECREF(owner);
+            DISPATCH();
+        }
 
-            // Slow path.
-            res = PyObject_GetAttr(owner, name);
+        case TARGET(LOAD_ATTR_MODULE): {
+            PyObject *owner = TOP();
+            PyObject *res;
+            SpecializedCacheEntry *caches = GET_CACHE();
+            _PyAdaptiveEntry *cache0 = &caches[0].adaptive;
+            _PyLoadAttrCache *cache1 = &caches[-1].load_attr;
+            DEOPT_IF(!PyModule_CheckExact(owner), LOAD_ATTR);
+            PyDictObject *dict = (PyDictObject *)((PyModuleObject *)owner)->md_dict;
+            DEOPT_IF(dict->ma_keys->dk_version != cache1->dk_version_or_hint, LOAD_ATTR);
+            assert(dict->ma_keys->dk_kind == DICT_KEYS_UNICODE);
+            assert(cache0->index < dict->ma_keys->dk_nentries);
+            PyDictKeyEntry *ep = DK_ENTRIES(dict->ma_keys) + cache0->index;
+            res = ep->me_value;
+            DEOPT_IF(res == NULL, LOAD_ATTR);
+            STAT_INC(loadattr_hit);
+            record_cache_hit(cache0);
+            Py_INCREF(res);
+            SET_TOP(res);
             Py_DECREF(owner);
+            DISPATCH();
+        }
+
+        case TARGET(LOAD_ATTR_WITH_HINT): {
+            PyObject *owner = TOP();
+            PyObject *res;
+            PyTypeObject *tp = Py_TYPE(owner);
+            SpecializedCacheEntry *caches = GET_CACHE();
+            _PyAdaptiveEntry *cache0 = &caches[0].adaptive;
+            _PyLoadAttrCache *cache1 = &caches[-1].load_attr;
+            assert(cache1->tp_version != 0);
+            DEOPT_IF(tp->tp_version_tag != cache1->tp_version, LOAD_ATTR);
+            assert(tp->tp_dictoffset > 0);
+            PyDictObject *dict = *(PyDictObject **)(((char *)owner) + tp->tp_dictoffset);
+            DEOPT_IF(dict == NULL, LOAD_ATTR);
+            assert(PyDict_CheckExact((PyObject *)dict));
+            PyObject *name = GETITEM(names, cache0->original_oparg);
+            uint32_t hint = cache1->dk_version_or_hint;
+            DEOPT_IF(hint >= dict->ma_keys->dk_nentries, LOAD_ATTR);
+            PyDictKeyEntry *ep = DK_ENTRIES(dict->ma_keys) + hint;
+            DEOPT_IF(ep->me_key != name, LOAD_ATTR);
+            res = ep->me_value;
+            DEOPT_IF(res == NULL, LOAD_ATTR);
+            STAT_INC(loadattr_hit);
+            record_cache_hit(cache0);
+            Py_INCREF(res);
             SET_TOP(res);
-            if (res == NULL)
-                goto error;
+            Py_DECREF(owner);
+            DISPATCH();
+        }
+
+        case TARGET(LOAD_ATTR_SLOT): {
+            PyObject *owner = TOP();
+            PyObject *res;
+            PyTypeObject *tp = Py_TYPE(owner);
+            SpecializedCacheEntry *caches = GET_CACHE();
+            _PyAdaptiveEntry *cache0 = &caches[0].adaptive;
+            _PyLoadAttrCache *cache1 = &caches[-1].load_attr;
+            assert(cache1->tp_version != 0);
+            DEOPT_IF(tp->tp_version_tag != cache1->tp_version, LOAD_ATTR);
+            char *addr = (char *)owner + cache0->index;
+            res = *(PyObject **)addr;
+            DEOPT_IF(res == NULL, LOAD_ATTR);
+            STAT_INC(loadattr_hit);
+            record_cache_hit(cache0);
+            Py_INCREF(res);
+            SET_TOP(res);
+            Py_DECREF(owner);
             DISPATCH();
         }
 
@@ -3879,6 +3821,27 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag)
 
         case TARGET(JUMP_ABSOLUTE): {
             PREDICTED(JUMP_ABSOLUTE);
+            if (oparg < INSTR_OFFSET()) {
+                /* Increment the warmup counter and quicken if warm enough
+                * _Py_Quicken is idempotent so we don't worry about overflow */
+                if (!PyCodeObject_IsWarmedUp(co)) {
+                    PyCodeObject_IncrementWarmup(co);
+                    if (PyCodeObject_IsWarmedUp(co)) {
+                        if (_Py_Quicken(co)) {
+                            goto error;
+                        }
+                        int nexti = INSTR_OFFSET();
+                        first_instr = co->co_firstinstr;
+                        next_instr = first_instr + nexti;
+                    }
+                }
+            }
+            JUMPTO(oparg);
+            CHECK_EVAL_BREAKER();
+            DISPATCH();
+        }
+
+        case TARGET(JUMP_ABSOLUTE_QUICK): {
             JUMPTO(oparg);
             CHECK_EVAL_BREAKER();
             DISPATCH();
@@ -4494,6 +4457,22 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag)
            or goto error. */
         Py_UNREACHABLE();
 
+/* Cache misses */
+
+LOAD_ATTR_miss:
+    {
+        STAT_INC(loadattr_miss);
+        _PyAdaptiveEntry *cache = &GET_CACHE()->adaptive;
+        record_cache_miss(cache);
+        if (too_many_cache_misses(cache)) {
+            next_instr[-1] = _Py_MAKECODEUNIT(LOAD_ATTR_ADAPTIVE, _Py_OPARG(next_instr[-1]));
+            STAT_INC(loadattr_deopt);
+            cache_backoff(cache);
+        }
+        oparg = cache->original_oparg;
+        JUMP_TO_INSTRUCTION(LOAD_ATTR);
+    }
+
 error:
         /* Double-check exception status. */
 #ifdef NDEBUG
@@ -4515,6 +4494,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag)
             call_exc_trace(tstate->c_tracefunc, tstate->c_traceobj,
                            tstate, f, &trace_info);
         }
+
 exception_unwind:
         f->f_state = FRAME_UNWINDING;
         /* We can't use f->f_lasti here, as RERAISE may have set it */
diff --git a/Python/makeopcodetargets.py b/Python/makeopcodetargets.py
index 023c9e6c9f1adc..189d72a8c84af3 100755
--- a/Python/makeopcodetargets.py
+++ b/Python/makeopcodetargets.py
@@ -34,6 +34,11 @@ def write_contents(f):
     targets = ['_unknown_opcode'] * 256
     for opname, op in opcode.opmap.items():
         targets[op] = "TARGET_%s" % opname
+    next_op = 1
+    for opname in opcode._specialized_instructions:
+        while targets[next_op] != '_unknown_opcode':
+            next_op += 1
+        targets[next_op] = "TARGET_%s" % opname
     f.write("static void *opcode_targets[256] = {\n")
     f.write(",\n".join(["    &&%s" % s for s in targets]))
     f.write("\n};\n")
diff --git a/Python/opcode_targets.h b/Python/opcode_targets.h
index 94b2a7c9b6e930..47beee7d59dbcc 100644
--- a/Python/opcode_targets.h
+++ b/Python/opcode_targets.h
@@ -6,21 +6,21 @@ static void *opcode_targets[256] = {
     &&TARGET_DUP_TOP,
     &&TARGET_DUP_TOP_TWO,
     &&TARGET_ROT_FOUR,
-    &&_unknown_opcode,
-    &&_unknown_opcode,
+    &&TARGET_JUMP_ABSOLUTE_QUICK,
+    &&TARGET_LOAD_ATTR_ADAPTIVE,
     &&TARGET_NOP,
     &&TARGET_UNARY_POSITIVE,
     &&TARGET_UNARY_NEGATIVE,
     &&TARGET_UNARY_NOT,
-    &&_unknown_opcode,
-    &&_unknown_opcode,
+    &&TARGET_LOAD_ATTR_SPLIT_KEYS,
+    &&TARGET_LOAD_ATTR_WITH_HINT,
     &&TARGET_UNARY_INVERT,
     &&TARGET_BINARY_MATRIX_MULTIPLY,
     &&TARGET_INPLACE_MATRIX_MULTIPLY,
-    &&_unknown_opcode,
+    &&TARGET_LOAD_ATTR_SLOT,
     &&TARGET_BINARY_POWER,
     &&TARGET_BINARY_MULTIPLY,
-    &&_unknown_opcode,
+    &&TARGET_LOAD_ATTR_MODULE,
     &&TARGET_BINARY_MODULO,
     &&TARGET_BINARY_ADD,
     &&TARGET_BINARY_SUBTRACT,
diff --git a/Python/specialize.c b/Python/specialize.c
index 07152d80538307..1801e6620f1e3c 100644
--- a/Python/specialize.c
+++ b/Python/specialize.c
@@ -1,7 +1,10 @@
 
 #include "Python.h"
 #include "pycore_code.h"
+#include "pycore_dict.h"
+#include "pycore_moduleobject.h"
 #include "opcode.h"
+#include "structmember.h"         // struct PyMemberDef, T_OFFSET_EX
 
 
 /* We layout the quickened data as a bi-directional array:
@@ -29,6 +32,22 @@
 */
 
 Py_ssize_t _Py_QuickenedCount = 0;
+#if SPECIALIZATION_STATS
+SpecializationStats _specialization_stats = { 0 };
+
+#define PRINT_STAT(name) fprintf(stderr, #name " : %" PRIu64" \n", _specialization_stats.name);
+void
+_Py_PrintSpecializationStats(void)
+{
+    PRINT_STAT(specialization_success);
+    PRINT_STAT(specialization_failure);
+    PRINT_STAT(loadattr_hit);
+    PRINT_STAT(loadattr_deferred);
+    PRINT_STAT(loadattr_miss);
+    PRINT_STAT(loadattr_deopt);
+}
+
+#endif
 
 static SpecializedCacheOrInstruction *
 allocate(int cache_count, int instruction_count)
@@ -56,10 +75,14 @@ get_cache_count(SpecializedCacheOrInstruction *quickened) {
 
 /* Map from opcode to adaptive opcode.
   Values of zero are ignored. */
-static uint8_t adaptive_opcodes[256] = { 0 };
+static uint8_t adaptive_opcodes[256] = {
+    [LOAD_ATTR] = LOAD_ATTR_ADAPTIVE,
+};
 
 /* The number of cache entries required for a "family" of instructions. */
-static uint8_t cache_requirements[256] = { 0 };
+static uint8_t cache_requirements[256] = {
+    [LOAD_ATTR] = 2,
+};
 
 /* Return the oparg for the cache_offset and instruction index.
  *
@@ -158,6 +181,9 @@ optimize(SpecializedCacheOrInstruction *quickened, int len)
             /* Super instructions don't use the cache,
              * so no need to update the offset. */
             switch (opcode) {
+                case JUMP_ABSOLUTE:
+                    instructions[i] = _Py_MAKECODEUNIT(JUMP_ABSOLUTE_QUICK, oparg);
+                    break;
                 /* Insert superinstructions here
                  E.g.
                 case LOAD_FAST:
@@ -195,3 +221,150 @@ _Py_Quicken(PyCodeObject *code) {
     return 0;
 }
 
+static int
+specialize_module_load_attr(
+    PyObject *owner, _Py_CODEUNIT *instr, PyObject *name,
+    _PyAdaptiveEntry *cache0, _PyLoadAttrCache *cache1)
+{
+    PyModuleObject *m = (PyModuleObject *)owner;
+    PyObject *value = NULL;
+    PyObject *getattr;
+    _Py_IDENTIFIER(__getattr__);
+    PyDictObject *dict = (PyDictObject *)m->md_dict;
+    if (dict == NULL) {
+        return -1;
+    }
+    if (dict->ma_keys->dk_kind != DICT_KEYS_UNICODE) {
+        return -1;
+    }
+    getattr = _PyUnicode_FromId(&PyId___getattr__); /* borrowed */
+    if (getattr == NULL) {
+        PyErr_Clear();
+        return -1;
+    }
+    Py_ssize_t index = _PyDict_GetItemHint(dict, getattr, -1,  &value);
+    assert(index != DKIX_ERROR);
+    if (index != DKIX_EMPTY) {
+        return -1;
+    }
+    index = _PyDict_GetItemHint(dict, name, -1, &value);
+    assert (index != DKIX_ERROR);
+    if (index != (uint16_t)index) {
+        return -1;
+    }
+    uint32_t keys_version = _PyDictKeys_GetVersionForCurrentState(dict);
+    if (keys_version == 0) {
+        return -1;
+    }
+    cache1->dk_version_or_hint = keys_version;
+    cache0->index = (uint16_t)index;
+    *instr = _Py_MAKECODEUNIT(LOAD_ATTR_MODULE, _Py_OPARG(*instr));
+    return 0;
+}
+
+int
+_Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache)
+{
+    _PyAdaptiveEntry *cache0 = &cache->adaptive;
+    _PyLoadAttrCache *cache1 = &cache[-1].load_attr;
+    if (PyModule_CheckExact(owner)) {
+        int err = specialize_module_load_attr(owner, instr, name, cache0, cache1);
+        if (err) {
+            goto fail;
+        }
+        goto success;
+    }
+    PyTypeObject *type = Py_TYPE(owner);
+    if (type->tp_getattro != PyObject_GenericGetAttr) {
+        goto fail;
+    }
+    if (type->tp_dict == NULL) {
+        if (PyType_Ready(type) < 0) {
+            return -1;
+        }
+    }
+    PyObject *descr = _PyType_Lookup(type, name);
+    if (descr != NULL) {
+        // We found an attribute with a data-like descriptor.
+        PyTypeObject *dtype = Py_TYPE(descr);
+        if (dtype != &PyMemberDescr_Type) {
+            goto fail;
+        }
+        // It's a slot
+        PyMemberDescrObject *member = (PyMemberDescrObject *)descr;
+        struct PyMemberDef *dmem = member->d_member;
+        if (dmem->type != T_OBJECT_EX) {
+            // It's a slot of a different type.  We don't handle those.
+            goto fail;
+        }
+        Py_ssize_t offset = dmem->offset;
+        if (offset != (uint16_t)offset) {
+            goto fail;
+        }
+        assert(offset > 0);
+        cache0->index = (uint16_t)offset;
+        cache1->tp_version = type->tp_version_tag;
+        *instr = _Py_MAKECODEUNIT(LOAD_ATTR_SLOT, _Py_OPARG(*instr));
+        goto success;
+    }
+    // No desciptor
+    if (type->tp_dictoffset <= 0) {
+        // No dictionary, or computed offset dictionary
+        goto fail;
+    }
+    PyObject **dictptr = (PyObject **) ((char *)owner + type->tp_dictoffset);
+    if (*dictptr == NULL || !PyDict_CheckExact(*dictptr)) {
+        goto fail;
+    }
+    // We found an instance with a __dict__.
+    PyDictObject *dict = (PyDictObject *)*dictptr;
+    if ((type->tp_flags & Py_TPFLAGS_HEAPTYPE)
+        && dict->ma_keys == ((PyHeapTypeObject*)type)->ht_cached_keys
+    ) {
+        // Keys are shared
+        assert(PyUnicode_CheckExact(name));
+        Py_hash_t hash = PyObject_Hash(name);
+        if (hash == -1) {
+            return -1;
+        }
+        PyObject *value;
+        Py_ssize_t index = _Py_dict_lookup(dict, name, hash, &value);
+        assert (index != DKIX_ERROR);
+        if (index != (uint16_t)index) {
+            goto fail;
+        }
+        uint32_t keys_version = _PyDictKeys_GetVersionForCurrentState(dict);
+        if (keys_version == 0) {
+            goto fail;
+        }
+        cache1->dk_version_or_hint = keys_version;
+        cache1->tp_version = type->tp_version_tag;
+        cache0->index = (uint16_t)index;
+        *instr = _Py_MAKECODEUNIT(LOAD_ATTR_SPLIT_KEYS, _Py_OPARG(*instr));
+        goto success;
+    }
+    else {
+        PyObject *value = NULL;
+        Py_ssize_t hint =
+            _PyDict_GetItemHint(dict, name, -1, &value);
+        if (hint != (uint32_t)hint) {
+            goto fail;
+        }
+        cache1->dk_version_or_hint = (uint32_t)hint;
+        cache1->tp_version = type->tp_version_tag;
+        *instr = _Py_MAKECODEUNIT(LOAD_ATTR_WITH_HINT, _Py_OPARG(*instr));
+        goto success;
+    }
+
+fail:
+    STAT_INC(specialization_failure);
+    assert(!PyErr_Occurred());
+    cache_backoff(cache0);
+    return 0;
+success:
+    STAT_INC(specialization_success);
+    assert(!PyErr_Occurred());
+    cache0->counter = saturating_start();
+    return 0;
+}
+
diff --git a/Tools/scripts/generate_opcode_h.py b/Tools/scripts/generate_opcode_h.py
index 290f6251af1745..41ae3fe6e53686 100644
--- a/Tools/scripts/generate_opcode_h.py
+++ b/Tools/scripts/generate_opcode_h.py
@@ -53,6 +53,10 @@ def main(opcode_py, outfile='Include/opcode.h'):
     opmap = opcode['opmap']
     hasjrel = opcode['hasjrel']
     hasjabs = opcode['hasjabs']
+    used = [ False ] * 256
+    next_op = 1
+    for name, op in opmap.items():
+        used[op] = True
     with open(outfile, 'w') as fobj:
         fobj.write(header)
         for name in opcode['opname']:
@@ -61,6 +65,11 @@ def main(opcode_py, outfile='Include/opcode.h'):
             if name == 'POP_EXCEPT': # Special entry for HAVE_ARGUMENT
                 fobj.write("#define %-23s %3d\n" %
                             ('HAVE_ARGUMENT', opcode['HAVE_ARGUMENT']))
+        for name in opcode['_specialized_instructions']:
+            while used[next_op]:
+                next_op += 1
+            fobj.write("#define %-23s %3s\n" % (name, next_op))
+            used[next_op] = True
         fobj.write("#ifdef NEED_OPCODE_JUMP_TABLES\n")
         write_int_array_from_ops("_PyOpcode_RelativeJump", opcode['hasjrel'], fobj)
         write_int_array_from_ops("_PyOpcode_Jump", opcode['hasjrel'] + opcode['hasjabs'], fobj)



More information about the Python-checkins mailing list