[Python-checkins] GH-106485: Handle dict subclasses correctly when dematerializing `__dict__` (GH-107837)

markshannon webhook-mailer at python.org
Thu Aug 10 08:34:05 EDT 2023


https://github.com/python/cpython/commit/1d976b2da26cd62b5d0e860e3055df6fb893577f
commit: 1d976b2da26cd62b5d0e860e3055df6fb893577f
branch: main
author: Mark Shannon <mark at hotpy.org>
committer: markshannon <mark at hotpy.org>
date: 2023-08-10T13:34:00+01:00
summary:

GH-106485: Handle dict subclasses correctly when dematerializing `__dict__` (GH-107837)

files:
M Include/internal/pycore_dict.h
M Include/internal/pycore_global_objects_fini_generated.h
M Include/internal/pycore_global_strings.h
M Include/internal/pycore_runtime_init_generated.h
M Lib/test/test_opcache.py
M Modules/_testinternalcapi.c
M Objects/dictobject.c

diff --git a/Include/internal/pycore_dict.h b/Include/internal/pycore_dict.h
index 6ae81c033c43a..2ad6ef0f7c04d 100644
--- a/Include/internal/pycore_dict.h
+++ b/Include/internal/pycore_dict.h
@@ -199,7 +199,7 @@ _PyDict_NotifyEvent(PyInterpreterState *interp,
 }
 
 extern PyObject *_PyObject_MakeDictFromInstanceAttributes(PyObject *obj, PyDictValues *values);
-extern int _PyObject_MakeInstanceAttributesFromDict(PyObject *obj, PyDictOrValues *dorv);
+extern bool _PyObject_MakeInstanceAttributesFromDict(PyObject *obj, PyDictOrValues *dorv);
 extern PyObject *_PyDict_FromItems(
         PyObject *const *keys, Py_ssize_t keys_offset,
         PyObject *const *values, Py_ssize_t values_offset,
diff --git a/Include/internal/pycore_global_objects_fini_generated.h b/Include/internal/pycore_global_objects_fini_generated.h
index 6d50ffd0a02f1..ee9010583ff8b 100644
--- a/Include/internal/pycore_global_objects_fini_generated.h
+++ b/Include/internal/pycore_global_objects_fini_generated.h
@@ -547,6 +547,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) {
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(anon_lambda));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(anon_listcomp));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(anon_module));
+    _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(anon_null));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(anon_setcomp));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(anon_string));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(anon_unknown));
diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h
index bb1fb13f342fc..b081c0e023fa4 100644
--- a/Include/internal/pycore_global_strings.h
+++ b/Include/internal/pycore_global_strings.h
@@ -33,6 +33,7 @@ struct _Py_global_strings {
         STRUCT_FOR_STR(anon_lambda, "<lambda>")
         STRUCT_FOR_STR(anon_listcomp, "<listcomp>")
         STRUCT_FOR_STR(anon_module, "<module>")
+        STRUCT_FOR_STR(anon_null, "<NULL>")
         STRUCT_FOR_STR(anon_setcomp, "<setcomp>")
         STRUCT_FOR_STR(anon_string, "<string>")
         STRUCT_FOR_STR(anon_unknown, "<unknown>")
diff --git a/Include/internal/pycore_runtime_init_generated.h b/Include/internal/pycore_runtime_init_generated.h
index 2d66647438b19..8c9c7f753d857 100644
--- a/Include/internal/pycore_runtime_init_generated.h
+++ b/Include/internal/pycore_runtime_init_generated.h
@@ -539,6 +539,7 @@ extern "C" {
     INIT_STR(anon_lambda, "<lambda>"), \
     INIT_STR(anon_listcomp, "<listcomp>"), \
     INIT_STR(anon_module, "<module>"), \
+    INIT_STR(anon_null, "<NULL>"), \
     INIT_STR(anon_setcomp, "<setcomp>"), \
     INIT_STR(anon_string, "<string>"), \
     INIT_STR(anon_unknown, "<unknown>"), \
diff --git a/Lib/test/test_opcache.py b/Lib/test/test_opcache.py
index 7d317c012a304..1baa10cbdfdef 100644
--- a/Lib/test/test_opcache.py
+++ b/Lib/test/test_opcache.py
@@ -1,8 +1,11 @@
+import copy
+import pickle
 import dis
 import threading
 import types
 import unittest
 from test.support import threading_helper
+import _testinternalcapi
 
 
 class TestLoadSuperAttrCache(unittest.TestCase):
@@ -997,6 +1000,124 @@ def write(items):
         opname = "UNPACK_SEQUENCE_LIST"
         self.assert_races_do_not_crash(opname, get_items, read, write)
 
+class C:
+    pass
+
+class TestInstanceDict(unittest.TestCase):
+
+    def setUp(self):
+        c = C()
+        c.a, c.b, c.c = 0,0,0
+
+    def test_values_on_instance(self):
+        c = C()
+        c.a = 1
+        C().b = 2
+        c.c = 3
+        self.assertEqual(
+            _testinternalcapi.get_object_dict_values(c),
+            (1, '<NULL>', 3)
+        )
+
+    def test_dict_materialization(self):
+        c = C()
+        c.a = 1
+        c.b = 2
+        c.__dict__
+        self.assertIs(
+            _testinternalcapi.get_object_dict_values(c),
+            None
+        )
+
+    def test_dict_dematerialization(self):
+        c = C()
+        c.a = 1
+        c.b = 2
+        c.__dict__
+        self.assertIs(
+            _testinternalcapi.get_object_dict_values(c),
+            None
+        )
+        for _ in range(100):
+            c.a
+        self.assertEqual(
+            _testinternalcapi.get_object_dict_values(c),
+            (1, 2, '<NULL>')
+        )
+
+    def test_dict_dematerialization_multiple_refs(self):
+        c = C()
+        c.a = 1
+        c.b = 2
+        d = c.__dict__
+        for _ in range(100):
+            c.a
+        self.assertIs(
+            _testinternalcapi.get_object_dict_values(c),
+            None
+        )
+        self.assertIs(c.__dict__, d)
+
+    def test_dict_dematerialization_copy(self):
+        c = C()
+        c.a = 1
+        c.b = 2
+        c2 = copy.copy(c)
+        for _ in range(100):
+            c.a
+            c2.a
+        self.assertEqual(
+            _testinternalcapi.get_object_dict_values(c),
+            (1, 2, '<NULL>')
+        )
+        self.assertEqual(
+            _testinternalcapi.get_object_dict_values(c2),
+            (1, 2, '<NULL>')
+        )
+        c3 = copy.deepcopy(c)
+        for _ in range(100):
+            c.a
+            c3.a
+        self.assertEqual(
+            _testinternalcapi.get_object_dict_values(c),
+            (1, 2, '<NULL>')
+        )
+        #NOTE -- c3.__dict__ does not de-materialize
+
+    def test_dict_dematerialization_pickle(self):
+        c = C()
+        c.a = 1
+        c.b = 2
+        c2 = pickle.loads(pickle.dumps(c))
+        for _ in range(100):
+            c.a
+            c2.a
+        self.assertEqual(
+            _testinternalcapi.get_object_dict_values(c),
+            (1, 2, '<NULL>')
+        )
+        self.assertEqual(
+            _testinternalcapi.get_object_dict_values(c2),
+            (1, 2, '<NULL>')
+        )
+
+    def test_dict_dematerialization_subclass(self):
+        class D(dict): pass
+        c = C()
+        c.a = 1
+        c.b = 2
+        c.__dict__ = D(c.__dict__)
+        for _ in range(100):
+            c.a
+        self.assertIs(
+            _testinternalcapi.get_object_dict_values(c),
+            None
+        )
+        self.assertEqual(
+            c.__dict__,
+            {'a':1, 'b':2}
+        )
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c
index f28b46dd9846c..d1082c7dae8ae 100644
--- a/Modules/_testinternalcapi.c
+++ b/Modules/_testinternalcapi.c
@@ -15,6 +15,7 @@
 #include "pycore_bytesobject.h"  // _PyBytes_Find()
 #include "pycore_compile.h"      // _PyCompile_CodeGen, _PyCompile_OptimizeCfg, _PyCompile_Assemble, _PyCompile_CleanDoc
 #include "pycore_ceval.h"        // _PyEval_AddPendingCall
+#include "pycore_dict.h"        // _PyDictOrValues_GetValues
 #include "pycore_fileutils.h"    // _Py_normpath
 #include "pycore_frame.h"        // _PyInterpreterFrame
 #include "pycore_gc.h"           // PyGC_Head
@@ -1530,6 +1531,40 @@ test_pymem_getallocatorsname(PyObject *self, PyObject *args)
     return PyUnicode_FromString(name);
 }
 
+static PyObject *
+get_object_dict_values(PyObject *self, PyObject *obj)
+{
+    PyTypeObject *type = Py_TYPE(obj);
+    if (!_PyType_HasFeature(type, Py_TPFLAGS_MANAGED_DICT)) {
+        Py_RETURN_NONE;
+    }
+    PyDictOrValues dorv = *_PyObject_DictOrValuesPointer(obj);
+    if (!_PyDictOrValues_IsValues(dorv)) {
+        Py_RETURN_NONE;
+    }
+    PyDictValues *values = _PyDictOrValues_GetValues(dorv);
+    PyDictKeysObject *keys = ((PyHeapTypeObject *)type)->ht_cached_keys;
+    assert(keys != NULL);
+    int size = (int)keys->dk_nentries;
+    assert(size >= 0);
+    PyObject *res = PyTuple_New(size);
+    if (res == NULL) {
+        return NULL;
+    }
+    _Py_DECLARE_STR(anon_null, "<NULL>");
+    for(int i = 0; i < size; i++) {
+        PyObject *item = values->values[i];
+        if (item == NULL) {
+            item = &_Py_STR(anon_null);
+        }
+        else {
+            Py_INCREF(item);
+        }
+        PyTuple_SET_ITEM(res, i, item);
+    }
+    return res;
+}
+
 
 static PyMethodDef module_functions[] = {
     {"get_configs", get_configs, METH_NOARGS},
@@ -1594,6 +1629,7 @@ static PyMethodDef module_functions[] = {
     {"check_pyobject_uninitialized_is_freed",
                               check_pyobject_uninitialized_is_freed, METH_NOARGS},
     {"pymem_getallocatorsname", test_pymem_getallocatorsname, METH_NOARGS},
+    {"get_object_dict_values", get_object_dict_values, METH_O},
     {NULL, NULL} /* sentinel */
 };
 
diff --git a/Objects/dictobject.c b/Objects/dictobject.c
index 931103cdc1a06..36375f50646fd 100644
--- a/Objects/dictobject.c
+++ b/Objects/dictobject.c
@@ -5464,22 +5464,24 @@ _PyObject_MakeDictFromInstanceAttributes(PyObject *obj, PyDictValues *values)
     return make_dict_from_instance_attributes(interp, keys, values);
 }
 
-// Return 1 if the dict was dematerialized, 0 otherwise.
-int
+// Return true if the dict was dematerialized, false otherwise.
+bool
 _PyObject_MakeInstanceAttributesFromDict(PyObject *obj, PyDictOrValues *dorv)
 {
     assert(_PyObject_DictOrValuesPointer(obj) == dorv);
     assert(!_PyDictOrValues_IsValues(*dorv));
     PyDictObject *dict = (PyDictObject *)_PyDictOrValues_GetDict(*dorv);
     if (dict == NULL) {
-        return 0;
+        return false;
     }
     // It's likely that this dict still shares its keys (if it was materialized
     // on request and not heavily modified):
-    assert(PyDict_CheckExact(dict));
+    if (!PyDict_CheckExact(dict)) {
+        return false;
+    }
     assert(_PyType_HasFeature(Py_TYPE(obj), Py_TPFLAGS_HEAPTYPE));
     if (dict->ma_keys != CACHED_KEYS(Py_TYPE(obj)) || Py_REFCNT(dict) != 1) {
-        return 0;
+        return false;
     }
     assert(dict->ma_values);
     // We have an opportunity to do something *really* cool: dematerialize it!
@@ -5490,7 +5492,7 @@ _PyObject_MakeInstanceAttributesFromDict(PyObject *obj, PyDictOrValues *dorv)
     dict->ma_keys = NULL;
     dict->ma_values = NULL;
     Py_DECREF(dict);
-    return 1;
+    return true;
 }
 
 int



More information about the Python-checkins mailing list