[Python-checkins] GH-106485: Dematerialize instance dictionaries when possible (GH-106539)

brandtbucher webhook-mailer at python.org
Wed Aug 9 15:14:55 EDT 2023


https://github.com/python/cpython/commit/326f0ba1c5dda1d9613dbba11ea2470654b0d9c8
commit: 326f0ba1c5dda1d9613dbba11ea2470654b0d9c8
branch: main
author: Brandt Bucher <brandtbucher at microsoft.com>
committer: brandtbucher <brandtbucher at gmail.com>
date: 2023-08-09T19:14:50Z
summary:

GH-106485: Dematerialize instance dictionaries when possible (GH-106539)

files:
A Misc/NEWS.d/next/Core and Builtins/2023-07-16-07-55-19.gh-issue-106485.wPb1bH.rst
M Include/internal/pycore_dict.h
M Include/pystats.h
M Lib/test/test_opcache.py
M Objects/dictobject.c
M Objects/typeobject.c
M Python/bytecodes.c
M Python/executor_cases.c.h
M Python/generated_cases.c.h
M Python/specialize.c

diff --git a/Include/internal/pycore_dict.h b/Include/internal/pycore_dict.h
index 26a913435ef9c..6ae81c033c43a 100644
--- a/Include/internal/pycore_dict.h
+++ b/Include/internal/pycore_dict.h
@@ -10,6 +10,7 @@ extern "C" {
 #endif
 
 #include "pycore_dict_state.h"
+#include "pycore_object.h"
 #include "pycore_runtime.h"         // _PyRuntime
 
 // Unsafe flavor of PyDict_GetItemWithError(): no error checking
@@ -62,6 +63,8 @@ extern uint32_t _PyDictKeys_GetVersionForCurrentState(
 
 extern size_t _PyDict_KeysSize(PyDictKeysObject *keys);
 
+extern void _PyDictKeys_DecRef(PyDictKeysObject *keys);
+
 /* _Py_dict_lookup() returns index of entry which can be used like DK_ENTRIES(dk)[index].
  * -1 when no entry found, -3 when compare raises error.
  */
@@ -196,6 +199,7 @@ _PyDict_NotifyEvent(PyInterpreterState *interp,
 }
 
 extern PyObject *_PyObject_MakeDictFromInstanceAttributes(PyObject *obj, PyDictValues *values);
+extern int _PyObject_MakeInstanceAttributesFromDict(PyObject *obj, PyDictOrValues *dorv);
 extern PyObject *_PyDict_FromItems(
         PyObject *const *keys, Py_ssize_t keys_offset,
         PyObject *const *values, Py_ssize_t values_offset,
diff --git a/Include/pystats.h b/Include/pystats.h
index e24aef5fe8072..b1957596745f0 100644
--- a/Include/pystats.h
+++ b/Include/pystats.h
@@ -65,6 +65,7 @@ typedef struct _object_stats {
     uint64_t dict_materialized_new_key;
     uint64_t dict_materialized_too_big;
     uint64_t dict_materialized_str_subclass;
+    uint64_t dict_dematerialized;
     uint64_t type_cache_hits;
     uint64_t type_cache_misses;
     uint64_t type_cache_dunder_hits;
diff --git a/Lib/test/test_opcache.py b/Lib/test/test_opcache.py
index 564dc4745ae64..7d317c012a304 100644
--- a/Lib/test/test_opcache.py
+++ b/Lib/test/test_opcache.py
@@ -865,8 +865,10 @@ class C:
             items = []
             for _ in range(self.ITEMS):
                 item = C()
-                item.__dict__
                 item.a = None
+                # Resize into a combined unicode dict:
+                for i in range(29):
+                    setattr(item, f"_{i}", None)
                 items.append(item)
             return items
 
@@ -932,7 +934,9 @@ class C:
             items = []
             for _ in range(self.ITEMS):
                 item = C()
-                item.__dict__
+                # Resize into a combined unicode dict:
+                for i in range(29):
+                    setattr(item, f"_{i}", None)
                 items.append(item)
             return items
 
diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-07-16-07-55-19.gh-issue-106485.wPb1bH.rst b/Misc/NEWS.d/next/Core and Builtins/2023-07-16-07-55-19.gh-issue-106485.wPb1bH.rst
new file mode 100644
index 0000000000000..1f80082821eda
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2023-07-16-07-55-19.gh-issue-106485.wPb1bH.rst	
@@ -0,0 +1,2 @@
+Reduce the number of materialized instances dictionaries by dematerializing
+them when possible.
diff --git a/Objects/dictobject.c b/Objects/dictobject.c
index 41ae1fca90b46..931103cdc1a06 100644
--- a/Objects/dictobject.c
+++ b/Objects/dictobject.c
@@ -5464,6 +5464,35 @@ _PyObject_MakeDictFromInstanceAttributes(PyObject *obj, PyDictValues *values)
     return make_dict_from_instance_attributes(interp, keys, values);
 }
 
+// Return 1 if the dict was dematerialized, 0 otherwise.
+int
+_PyObject_MakeInstanceAttributesFromDict(PyObject *obj, PyDictOrValues *dorv)
+{
+    assert(_PyObject_DictOrValuesPointer(obj) == dorv);
+    assert(!_PyDictOrValues_IsValues(*dorv));
+    PyDictObject *dict = (PyDictObject *)_PyDictOrValues_GetDict(*dorv);
+    if (dict == NULL) {
+        return 0;
+    }
+    // It's likely that this dict still shares its keys (if it was materialized
+    // on request and not heavily modified):
+    assert(PyDict_CheckExact(dict));
+    assert(_PyType_HasFeature(Py_TYPE(obj), Py_TPFLAGS_HEAPTYPE));
+    if (dict->ma_keys != CACHED_KEYS(Py_TYPE(obj)) || Py_REFCNT(dict) != 1) {
+        return 0;
+    }
+    assert(dict->ma_values);
+    // We have an opportunity to do something *really* cool: dematerialize it!
+    _PyDictKeys_DecRef(dict->ma_keys);
+    _PyDictOrValues_SetValues(dorv, dict->ma_values);
+    OBJECT_STAT_INC(dict_dematerialized);
+    // Don't try this at home, kids:
+    dict->ma_keys = NULL;
+    dict->ma_values = NULL;
+    Py_DECREF(dict);
+    return 1;
+}
+
 int
 _PyObject_StoreInstanceAttribute(PyObject *obj, PyDictValues *values,
                               PyObject *name, PyObject *value)
@@ -5688,6 +5717,7 @@ PyObject_GenericGetDict(PyObject *obj, void *context)
             dict = _PyDictOrValues_GetDict(*dorv_ptr);
             if (dict == NULL) {
                 dictkeys_incref(CACHED_KEYS(tp));
+                OBJECT_STAT_INC(dict_materialized_on_request);
                 dict = new_dict_with_shared_keys(interp, CACHED_KEYS(tp));
                 dorv_ptr->dict = dict;
             }
@@ -5731,6 +5761,9 @@ _PyObjectDict_SetItem(PyTypeObject *tp, PyObject **dictptr,
         dict = *dictptr;
         if (dict == NULL) {
             dictkeys_incref(cached);
+            if (_PyType_HasFeature(tp, Py_TPFLAGS_MANAGED_DICT)) {
+                OBJECT_STAT_INC(dict_materialized_on_request);
+            }
             dict = new_dict_with_shared_keys(interp, cached);
             if (dict == NULL)
                 return -1;
diff --git a/Objects/typeobject.c b/Objects/typeobject.c
index 76809494dd881..71e96f5af4e41 100644
--- a/Objects/typeobject.c
+++ b/Objects/typeobject.c
@@ -4965,9 +4965,6 @@ type_setattro(PyTypeObject *type, PyObject *name, PyObject *value)
     return res;
 }
 
-extern void
-_PyDictKeys_DecRef(PyDictKeysObject *keys);
-
 
 static void
 type_dealloc_common(PyTypeObject *type)
diff --git a/Python/bytecodes.c b/Python/bytecodes.c
index b2281abc6663d..5efa36fcf5c62 100644
--- a/Python/bytecodes.c
+++ b/Python/bytecodes.c
@@ -1827,8 +1827,10 @@ dummy_func(
         op(_CHECK_MANAGED_OBJECT_HAS_VALUES, (owner -- owner)) {
             assert(Py_TYPE(owner)->tp_dictoffset < 0);
             assert(Py_TYPE(owner)->tp_flags & Py_TPFLAGS_MANAGED_DICT);
-            PyDictOrValues dorv = *_PyObject_DictOrValuesPointer(owner);
-            DEOPT_IF(!_PyDictOrValues_IsValues(dorv), LOAD_ATTR);
+            PyDictOrValues *dorv = _PyObject_DictOrValuesPointer(owner);
+            DEOPT_IF(!_PyDictOrValues_IsValues(*dorv) &&
+                     !_PyObject_MakeInstanceAttributesFromDict(owner, dorv),
+                     LOAD_ATTR);
         }
 
         op(_LOAD_ATTR_INSTANCE_VALUE, (index/1, owner -- attr, null if (oparg & 1))) {
@@ -2727,8 +2729,10 @@ dummy_func(
             assert(type_version != 0);
             DEOPT_IF(owner_cls->tp_version_tag != type_version, LOAD_ATTR);
             assert(owner_cls->tp_flags & Py_TPFLAGS_MANAGED_DICT);
-            PyDictOrValues dorv = *_PyObject_DictOrValuesPointer(owner);
-            DEOPT_IF(!_PyDictOrValues_IsValues(dorv), LOAD_ATTR);
+            PyDictOrValues *dorv = _PyObject_DictOrValuesPointer(owner);
+            DEOPT_IF(!_PyDictOrValues_IsValues(*dorv) &&
+                     !_PyObject_MakeInstanceAttributesFromDict(owner, dorv),
+                     LOAD_ATTR);
             PyHeapTypeObject *owner_heap_type = (PyHeapTypeObject *)owner_cls;
             DEOPT_IF(owner_heap_type->ht_cached_keys->dk_version !=
                      keys_version, LOAD_ATTR);
@@ -2757,8 +2761,10 @@ dummy_func(
             assert(type_version != 0);
             DEOPT_IF(owner_cls->tp_version_tag != type_version, LOAD_ATTR);
             assert(owner_cls->tp_flags & Py_TPFLAGS_MANAGED_DICT);
-            PyDictOrValues dorv = *_PyObject_DictOrValuesPointer(owner);
-            DEOPT_IF(!_PyDictOrValues_IsValues(dorv), LOAD_ATTR);
+            PyDictOrValues *dorv = _PyObject_DictOrValuesPointer(owner);
+            DEOPT_IF(!_PyDictOrValues_IsValues(*dorv) &&
+                     !_PyObject_MakeInstanceAttributesFromDict(owner, dorv),
+                     LOAD_ATTR);
             PyHeapTypeObject *owner_heap_type = (PyHeapTypeObject *)owner_cls;
             DEOPT_IF(owner_heap_type->ht_cached_keys->dk_version !=
                      keys_version, LOAD_ATTR);
diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h
index d6d541a3b61ab..a7b5054417ed8 100644
--- a/Python/executor_cases.c.h
+++ b/Python/executor_cases.c.h
@@ -1623,8 +1623,10 @@
             owner = stack_pointer[-1];
             assert(Py_TYPE(owner)->tp_dictoffset < 0);
             assert(Py_TYPE(owner)->tp_flags & Py_TPFLAGS_MANAGED_DICT);
-            PyDictOrValues dorv = *_PyObject_DictOrValuesPointer(owner);
-            DEOPT_IF(!_PyDictOrValues_IsValues(dorv), LOAD_ATTR);
+            PyDictOrValues *dorv = _PyObject_DictOrValuesPointer(owner);
+            DEOPT_IF(!_PyDictOrValues_IsValues(*dorv) &&
+                     !_PyObject_MakeInstanceAttributesFromDict(owner, dorv),
+                     LOAD_ATTR);
             break;
         }
 
diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h
index cf20b869b8182..ccf43c727b9e0 100644
--- a/Python/generated_cases.c.h
+++ b/Python/generated_cases.c.h
@@ -2366,8 +2366,10 @@
             {
                 assert(Py_TYPE(owner)->tp_dictoffset < 0);
                 assert(Py_TYPE(owner)->tp_flags & Py_TPFLAGS_MANAGED_DICT);
-                PyDictOrValues dorv = *_PyObject_DictOrValuesPointer(owner);
-                DEOPT_IF(!_PyDictOrValues_IsValues(dorv), LOAD_ATTR);
+                PyDictOrValues *dorv = _PyObject_DictOrValuesPointer(owner);
+                DEOPT_IF(!_PyDictOrValues_IsValues(*dorv) &&
+                         !_PyObject_MakeInstanceAttributesFromDict(owner, dorv),
+                         LOAD_ATTR);
             }
             // _LOAD_ATTR_INSTANCE_VALUE
             {
@@ -3507,8 +3509,10 @@
             assert(type_version != 0);
             DEOPT_IF(owner_cls->tp_version_tag != type_version, LOAD_ATTR);
             assert(owner_cls->tp_flags & Py_TPFLAGS_MANAGED_DICT);
-            PyDictOrValues dorv = *_PyObject_DictOrValuesPointer(owner);
-            DEOPT_IF(!_PyDictOrValues_IsValues(dorv), LOAD_ATTR);
+            PyDictOrValues *dorv = _PyObject_DictOrValuesPointer(owner);
+            DEOPT_IF(!_PyDictOrValues_IsValues(*dorv) &&
+                     !_PyObject_MakeInstanceAttributesFromDict(owner, dorv),
+                     LOAD_ATTR);
             PyHeapTypeObject *owner_heap_type = (PyHeapTypeObject *)owner_cls;
             DEOPT_IF(owner_heap_type->ht_cached_keys->dk_version !=
                      keys_version, LOAD_ATTR);
@@ -3559,8 +3563,10 @@
             assert(type_version != 0);
             DEOPT_IF(owner_cls->tp_version_tag != type_version, LOAD_ATTR);
             assert(owner_cls->tp_flags & Py_TPFLAGS_MANAGED_DICT);
-            PyDictOrValues dorv = *_PyObject_DictOrValuesPointer(owner);
-            DEOPT_IF(!_PyDictOrValues_IsValues(dorv), LOAD_ATTR);
+            PyDictOrValues *dorv = _PyObject_DictOrValuesPointer(owner);
+            DEOPT_IF(!_PyDictOrValues_IsValues(*dorv) &&
+                     !_PyObject_MakeInstanceAttributesFromDict(owner, dorv),
+                     LOAD_ATTR);
             PyHeapTypeObject *owner_heap_type = (PyHeapTypeObject *)owner_cls;
             DEOPT_IF(owner_heap_type->ht_cached_keys->dk_version !=
                      keys_version, LOAD_ATTR);
diff --git a/Python/specialize.c b/Python/specialize.c
index 98455ae3efc60..2d514c0dc476d 100644
--- a/Python/specialize.c
+++ b/Python/specialize.c
@@ -192,6 +192,7 @@ print_object_stats(FILE *out, ObjectStats *stats)
     fprintf(out, "Object materialize dict (new key): %" PRIu64 "\n", stats->dict_materialized_new_key);
     fprintf(out, "Object materialize dict (too big): %" PRIu64 "\n", stats->dict_materialized_too_big);
     fprintf(out, "Object materialize dict (str subclass): %" PRIu64 "\n", stats->dict_materialized_str_subclass);
+    fprintf(out, "Object dematerialize dict: %" PRIu64 "\n", stats->dict_dematerialized);
     fprintf(out, "Object method cache hits: %" PRIu64 "\n", stats->type_cache_hits);
     fprintf(out, "Object method cache misses: %" PRIu64 "\n", stats->type_cache_misses);
     fprintf(out, "Object method cache collisions: %" PRIu64 "\n", stats->type_cache_collisions);
@@ -685,8 +686,10 @@ specialize_dict_access(
         return 0;
     }
     _PyAttrCache *cache = (_PyAttrCache *)(instr + 1);
-    PyDictOrValues dorv = *_PyObject_DictOrValuesPointer(owner);
-    if (_PyDictOrValues_IsValues(dorv)) {
+    PyDictOrValues *dorv = _PyObject_DictOrValuesPointer(owner);
+    if (_PyDictOrValues_IsValues(*dorv) ||
+        _PyObject_MakeInstanceAttributesFromDict(owner, dorv))
+    {
         // Virtual dictionary
         PyDictKeysObject *keys = ((PyHeapTypeObject *)type)->ht_cached_keys;
         assert(PyUnicode_CheckExact(name));
@@ -704,12 +707,16 @@ specialize_dict_access(
         instr->op.code = values_op;
     }
     else {
-        PyDictObject *dict = (PyDictObject *)_PyDictOrValues_GetDict(dorv);
+        PyDictObject *dict = (PyDictObject *)_PyDictOrValues_GetDict(*dorv);
         if (dict == NULL || !PyDict_CheckExact(dict)) {
             SPECIALIZATION_FAIL(base_op, SPEC_FAIL_NO_DICT);
             return 0;
         }
         // We found an instance with a __dict__.
+        if (dict->ma_values) {
+            SPECIALIZATION_FAIL(base_op, SPEC_FAIL_ATTR_NON_STRING_OR_SPLIT);
+            return 0;
+        }
         Py_ssize_t index =
             _PyDict_LookupIndex(dict, name);
         if (index != (uint16_t)index) {
@@ -1100,9 +1107,11 @@ PyObject *descr, DescriptorClassification kind, bool is_method)
     assert(descr != NULL);
     assert((is_method && kind == METHOD) || (!is_method && kind == NON_DESCRIPTOR));
     if (owner_cls->tp_flags & Py_TPFLAGS_MANAGED_DICT) {
-        PyDictOrValues dorv = *_PyObject_DictOrValuesPointer(owner);
+        PyDictOrValues *dorv = _PyObject_DictOrValuesPointer(owner);
         PyDictKeysObject *keys = ((PyHeapTypeObject *)owner_cls)->ht_cached_keys;
-        if (!_PyDictOrValues_IsValues(dorv)) {
+        if (!_PyDictOrValues_IsValues(*dorv) &&
+            !_PyObject_MakeInstanceAttributesFromDict(owner, dorv))
+        {
             SPECIALIZATION_FAIL(LOAD_ATTR, SPEC_FAIL_ATTR_HAS_MANAGED_DICT);
             return 0;
         }



More information about the Python-checkins mailing list