[Python-checkins] bpo-40116: Add insertion order bit-vector to dict values to allow dicts to share keys more freely. (GH-28520)

markshannon webhook-mailer at python.org
Wed Oct 6 08:20:02 EDT 2021


https://github.com/python/cpython/commit/a7252f88d3fa33036bdd6036b8c97bc785ed6f17
commit: a7252f88d3fa33036bdd6036b8c97bc785ed6f17
branch: main
author: Mark Shannon <mark at hotpy.org>
committer: markshannon <mark at hotpy.org>
date: 2021-10-06T13:19:53+01:00
summary:

bpo-40116: Add insertion order bit-vector to dict values to allow dicts to share keys more freely. (GH-28520)

files:
A Misc/NEWS.d/next/Core and Builtins/2021-09-23-14-00-05.bpo-40116.KaoeFs.rst
M Include/cpython/dictobject.h
M Include/internal/pycore_dict.h
M Lib/test/test_dict.py
M Modules/_testcapimodule.c
M Objects/dictobject.c
M Python/ceval.c
M Tools/gdb/libpython.py

diff --git a/Include/cpython/dictobject.h b/Include/cpython/dictobject.h
index 7c63374c566c7..ba118788f7b1b 100644
--- a/Include/cpython/dictobject.h
+++ b/Include/cpython/dictobject.h
@@ -3,6 +3,7 @@
 #endif
 
 typedef struct _dictkeysobject PyDictKeysObject;
+typedef struct _dictvalues PyDictValues;
 
 /* The ma_values pointer is NULL for a combined table
  * or points to an array of PyObject* for a split table
@@ -24,7 +25,7 @@ typedef struct {
 
        If ma_values is not NULL, the table is splitted:
        keys are stored in ma_keys and values are stored in ma_values */
-    PyObject **ma_values;
+    PyDictValues *ma_values;
 } PyDictObject;
 
 PyAPI_FUNC(PyObject *) _PyDict_GetItem_KnownHash(PyObject *mp, PyObject *key,
diff --git a/Include/internal/pycore_dict.h b/Include/internal/pycore_dict.h
index 2becc30beb4d8..d37ef71bbcad3 100644
--- a/Include/internal/pycore_dict.h
+++ b/Include/internal/pycore_dict.h
@@ -71,6 +71,14 @@ struct _dictkeysobject {
        see the DK_ENTRIES() macro */
 };
 
+/* This must be no more than 16, for the order vector to fit in 64 bits */
+#define SHARED_KEYS_MAX_SIZE 16
+
+struct _dictvalues {
+    uint64_t mv_order;
+    PyObject *values[1];
+};
+
 #define DK_LOG_SIZE(dk)  ((dk)->dk_log2_size)
 #if SIZEOF_VOID_P > 4
 #define DK_SIZE(dk)      (((int64_t)1)<<DK_LOG_SIZE(dk))
diff --git a/Lib/test/test_dict.py b/Lib/test/test_dict.py
index 666cd81e68d81..a6ce6f98c8290 100644
--- a/Lib/test/test_dict.py
+++ b/Lib/test/test_dict.py
@@ -1020,7 +1020,6 @@ def test_splittable_del(self):
         with self.assertRaises(KeyError):
             del a['y']
 
-        self.assertGreater(sys.getsizeof(a), orig_size)
         self.assertEqual(list(a), ['x', 'z'])
         self.assertEqual(list(b), ['x', 'y', 'z'])
 
@@ -1031,16 +1030,12 @@ def test_splittable_del(self):
 
     @support.cpython_only
     def test_splittable_pop(self):
-        """split table must be combined when d.pop(k)"""
         a, b = self.make_shared_key_dict(2)
 
-        orig_size = sys.getsizeof(a)
-
-        a.pop('y')  # split table is combined
+        a.pop('y')
         with self.assertRaises(KeyError):
             a.pop('y')
 
-        self.assertGreater(sys.getsizeof(a), orig_size)
         self.assertEqual(list(a), ['x', 'z'])
         self.assertEqual(list(b), ['x', 'y', 'z'])
 
@@ -1074,36 +1069,6 @@ def test_splittable_popitem(self):
         self.assertEqual(list(a), ['x', 'y'])
         self.assertEqual(list(b), ['x', 'y', 'z'])
 
-    @support.cpython_only
-    def test_splittable_setattr_after_pop(self):
-        """setattr() must not convert combined table into split table."""
-        # Issue 28147
-        import _testcapi
-
-        class C:
-            pass
-        a = C()
-
-        a.a = 1
-        self.assertTrue(_testcapi.dict_hassplittable(a.__dict__))
-
-        # dict.pop() convert it to combined table
-        a.__dict__.pop('a')
-        self.assertFalse(_testcapi.dict_hassplittable(a.__dict__))
-
-        # But C should not convert a.__dict__ to split table again.
-        a.a = 1
-        self.assertFalse(_testcapi.dict_hassplittable(a.__dict__))
-
-        # Same for popitem()
-        a = C()
-        a.a = 2
-        self.assertTrue(_testcapi.dict_hassplittable(a.__dict__))
-        a.__dict__.popitem()
-        self.assertFalse(_testcapi.dict_hassplittable(a.__dict__))
-        a.a = 3
-        self.assertFalse(_testcapi.dict_hassplittable(a.__dict__))
-
     def test_iterator_pickling(self):
         for proto in range(pickle.HIGHEST_PROTOCOL + 1):
             data = {1:"a", 2:"b", 3:"c"}
diff --git a/Misc/NEWS.d/next/Core and Builtins/2021-09-23-14-00-05.bpo-40116.KaoeFs.rst b/Misc/NEWS.d/next/Core and Builtins/2021-09-23-14-00-05.bpo-40116.KaoeFs.rst
new file mode 100644
index 0000000000000..24ce96376d87e
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2021-09-23-14-00-05.bpo-40116.KaoeFs.rst	
@@ -0,0 +1,4 @@
+Change to the implementation of split dictionaries. Classes where the
+instances differ either in the exact set of attributes, or in the order in
+which those attributes are set, can still share keys. This should have no
+observable effect on users of Python or the C-API. Patch by Mark Shannon.
diff --git a/Modules/_testcapimodule.c b/Modules/_testcapimodule.c
index e3eec0c47f73a..03dc7763bfd05 100644
--- a/Modules/_testcapimodule.c
+++ b/Modules/_testcapimodule.c
@@ -334,19 +334,6 @@ dict_getitem_knownhash(PyObject *self, PyObject *args)
     return result;
 }
 
-static PyObject*
-dict_hassplittable(PyObject *self, PyObject *arg)
-{
-    if (!PyDict_Check(arg)) {
-        PyErr_Format(PyExc_TypeError,
-                     "dict_hassplittable() argument must be dict, not '%s'",
-                     Py_TYPE(arg)->tp_name);
-        return NULL;
-    }
-
-    return PyBool_FromLong(_PyDict_HasSplitTable((PyDictObject*)arg));
-}
-
 /* Issue #4701: Check that PyObject_Hash implicitly calls
  *   PyType_Ready if it hasn't already been called
  */
@@ -5721,7 +5708,6 @@ static PyMethodDef TestMethods[] = {
     {"test_list_api",           test_list_api,                   METH_NOARGS},
     {"test_dict_iteration",     test_dict_iteration,             METH_NOARGS},
     {"dict_getitem_knownhash",  dict_getitem_knownhash,          METH_VARARGS},
-    {"dict_hassplittable",      dict_hassplittable,              METH_O},
     {"test_lazy_hash_inheritance",      test_lazy_hash_inheritance,METH_NOARGS},
     {"test_long_api",           test_long_api,                   METH_NOARGS},
     {"test_xincref_doesnt_leak",test_xincref_doesnt_leak,        METH_NOARGS},
diff --git a/Objects/dictobject.c b/Objects/dictobject.c
index ae0098be5b547..824cba949d7a8 100644
--- a/Objects/dictobject.c
+++ b/Objects/dictobject.c
@@ -60,7 +60,6 @@ The DictObject can be in one of two forms.
     ma_values != NULL, dk_refcnt >= 1
     Values are stored in the ma_values array.
     Only string (unicode) keys are allowed.
-    All dicts sharing same key must have same insertion order.
 
 There are four kinds of slots in the table (slot is index, and
 DK_ENTRIES(keys)[index] if index >= 0):
@@ -96,9 +95,10 @@ dk_nentries to achieve amortized O(1).  Since there are DKIX_DUMMY remains in
 dk_indices, we can't increment dk_usable even though dk_nentries is
 decremented.
 
-In split table, inserting into pending entry is allowed only for dk_entries[ix]
-where ix == mp->ma_used. Inserting into other index and deleting item cause
-converting the dict to the combined table.
+To preserve the order in a split table, a bit vector is used  to record the
+insertion order. When a key is inserted the bit vector is shifted up by 4 bits
+and the index of the key is stored in the low 4 bits.
+As a consequence of this, split keys have a maximum size of 16.
 */
 
 /* PyDict_MINSIZE is the starting size for any new dict.
@@ -445,7 +445,9 @@ static PyDictKeysObject empty_keys_struct = {
          DKIX_EMPTY, DKIX_EMPTY, DKIX_EMPTY, DKIX_EMPTY}, /* dk_indices */
 };
 
-static PyObject *empty_values[1] = { NULL };
+
+static PyDictValues empty_values_struct = { 0, { NULL }};
+#define empty_values (&empty_values_struct)
 
 #define Py_EMPTY_KEYS &empty_keys_struct
 
@@ -458,6 +460,13 @@ static PyObject *empty_values[1] = { NULL };
 #  define ASSERT_CONSISTENT(op) assert(_PyDict_CheckConsistency((PyObject *)(op), 0))
 #endif
 
+static inline int
+get_index_from_order(PyDictObject *mp, Py_ssize_t i)
+{
+    assert(mp->ma_used <= 16);
+    int shift = (int)(mp->ma_used-1-i)*4;
+    return (int)(mp->ma_values->mv_order >> shift) & 15;
+}
 
 int
 _PyDict_CheckConsistency(PyObject *op, int check_content)
@@ -482,17 +491,19 @@ _PyDict_CheckConsistency(PyObject *op, int check_content)
         /* combined table */
         CHECK(keys->dk_refcnt == 1);
     }
+    else {
+        CHECK(mp->ma_used <= SHARED_KEYS_MAX_SIZE);
+    }
 
     if (check_content) {
         PyDictKeyEntry *entries = DK_ENTRIES(keys);
-        Py_ssize_t i;
 
-        for (i=0; i < DK_SIZE(keys); i++) {
+        for (Py_ssize_t i=0; i < DK_SIZE(keys); i++) {
             Py_ssize_t ix = dictkeys_get_index(keys, i);
             CHECK(DKIX_DUMMY <= ix && ix <= usable);
         }
 
-        for (i=0; i < usable; i++) {
+        for (Py_ssize_t i=0; i < usable; i++) {
             PyDictKeyEntry *entry = &entries[i];
             PyObject *key = entry->me_key;
 
@@ -517,9 +528,14 @@ _PyDict_CheckConsistency(PyObject *op, int check_content)
         }
 
         if (splitted) {
+            CHECK(mp->ma_used <= SHARED_KEYS_MAX_SIZE);
             /* splitted table */
-            for (i=0; i < mp->ma_used; i++) {
-                CHECK(mp->ma_values[i] != NULL);
+            int duplicate_check = 0;
+            for (Py_ssize_t i=0; i < mp->ma_used; i++) {
+                int index = get_index_from_order(mp, i);
+                CHECK((duplicate_check & (1<<index)) == 0);
+                duplicate_check |= (1<<index);
+                CHECK(mp->ma_values->values[index] != NULL);
             }
         }
     }
@@ -576,9 +592,9 @@ new_keys_object(uint8_t log2_size)
 #endif
     dk->dk_refcnt = 1;
     dk->dk_log2_size = log2_size;
-    dk->dk_usable = usable;
     dk->dk_kind = DICT_KEYS_UNICODE;
     dk->dk_nentries = 0;
+    dk->dk_usable = usable;
     dk->dk_version = 0;
     memset(&dk->dk_indices[0], 0xff, es<<log2_size);
     memset(DK_ENTRIES(dk), 0, sizeof(PyDictKeyEntry) * usable);
@@ -606,12 +622,18 @@ free_keys_object(PyDictKeysObject *keys)
     PyObject_Free(keys);
 }
 
-#define new_values(size) PyMem_NEW(PyObject *, size)
+static inline PyDictValues*
+new_values(Py_ssize_t size)
+{
+    Py_ssize_t n = sizeof(PyDictValues) + sizeof(PyObject *) * (size-1);
+    return (PyDictValues*)PyMem_Malloc(n);
+}
+
 #define free_values(values) PyMem_Free(values)
 
 /* Consumes a reference to the keys object */
 static PyObject *
-new_dict(PyDictKeysObject *keys, PyObject **values)
+new_dict(PyDictKeysObject *keys, PyDictValues *values)
 {
     PyDictObject *mp;
     assert(keys != NULL);
@@ -648,7 +670,7 @@ new_dict(PyDictKeysObject *keys, PyObject **values)
 static PyObject *
 new_dict_with_shared_keys(PyDictKeysObject *keys)
 {
-    PyObject **values;
+    PyDictValues *values;
     Py_ssize_t i, size;
 
     size = USABLE_FRACTION(DK_SIZE(keys));
@@ -657,8 +679,9 @@ new_dict_with_shared_keys(PyDictKeysObject *keys)
         dictkeys_decref(keys);
         return PyErr_NoMemory();
     }
+    values->mv_order = 0;
     for (i = 0; i < size; i++) {
-        values[i] = NULL;
+        values->values[i] = NULL;
     }
     return new_dict(keys, values);
 }
@@ -829,7 +852,7 @@ _Py_dict_lookup(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject **valu
             *value_addr = NULL;
         }
         else if (kind == DICT_KEYS_SPLIT) {
-            *value_addr = mp->ma_values[ix];
+            *value_addr = mp->ma_values->values[ix];
         }
         else {
             *value_addr = DK_ENTRIES(dk)[ix].me_value;
@@ -879,7 +902,7 @@ _Py_dict_lookup(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject **valu
     Py_UNREACHABLE();
 found:
     if (dk->dk_kind == DICT_KEYS_SPLIT) {
-        *value_addr = mp->ma_values[ix];
+        *value_addr = mp->ma_values->values[ix];
     }
     else {
         *value_addr = ep0[ix].me_value;
@@ -928,7 +951,7 @@ _PyDict_MaybeUntrack(PyObject *op)
     numentries = mp->ma_keys->dk_nentries;
     if (_PyDict_HasSplitTable(mp)) {
         for (i = 0; i < numentries; i++) {
-            if ((value = mp->ma_values[i]) == NULL)
+            if ((value = mp->ma_values->values[i]) == NULL)
                 continue;
             if (_PyObject_GC_MAY_BE_TRACKED(value)) {
                 assert(!_PyObject_GC_MAY_BE_TRACKED(ep0[i].me_key));
@@ -998,17 +1021,6 @@ insertdict(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject *value)
 
     MAINTAIN_TRACKING(mp, key, value);
 
-    /* When insertion order is different from shared key, we can't share
-     * the key anymore.  Convert this instance to combine table.
-     */
-    if (_PyDict_HasSplitTable(mp) &&
-        ((ix >= 0 && old_value == NULL && mp->ma_used != ix) ||
-         (ix == DKIX_EMPTY && mp->ma_used != mp->ma_keys->dk_nentries))) {
-        if (insertion_resize(mp) < 0)
-            goto Fail;
-        ix = DKIX_EMPTY;
-    }
-
     if (ix == DKIX_EMPTY) {
         /* Insert into new slot. */
         mp->ma_keys->dk_version = 0;
@@ -1027,8 +1039,12 @@ insertdict(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject *value)
         ep->me_key = key;
         ep->me_hash = hash;
         if (mp->ma_values) {
-            assert (mp->ma_values[mp->ma_keys->dk_nentries] == NULL);
-            mp->ma_values[mp->ma_keys->dk_nentries] = value;
+            Py_ssize_t index = mp->ma_keys->dk_nentries;
+            assert(index < SHARED_KEYS_MAX_SIZE);
+            assert((mp->ma_values->mv_order >> 60) == 0);
+            mp->ma_values->mv_order = (mp->ma_values->mv_order)<<4 | index;
+            assert (mp->ma_values->values[index] == NULL);
+            mp->ma_values->values[index] = value;
         }
         else {
             ep->me_value = value;
@@ -1044,10 +1060,9 @@ insertdict(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject *value)
 
     if (old_value != value) {
         if (_PyDict_HasSplitTable(mp)) {
-            mp->ma_values[ix] = value;
+            mp->ma_values->values[ix] = value;
             if (old_value == NULL) {
-                /* pending state */
-                assert(ix == mp->ma_used);
+                mp->ma_values->mv_order = (mp->ma_values->mv_order << 4) | ix;
                 mp->ma_used++;
             }
         }
@@ -1136,7 +1151,7 @@ dictresize(PyDictObject *mp, uint8_t log2_newsize)
 {
     Py_ssize_t numentries;
     PyDictKeysObject *oldkeys;
-    PyObject **oldvalues;
+    PyDictValues *oldvalues;
     PyDictKeyEntry *oldentries, *newentries;
 
     if (log2_newsize >= SIZEOF_SIZE_T*8) {
@@ -1173,13 +1188,14 @@ dictresize(PyDictObject *mp, uint8_t log2_newsize)
          * Note that values of split table is always dense.
          */
         for (Py_ssize_t i = 0; i < numentries; i++) {
-            assert(oldvalues[i] != NULL);
-            PyDictKeyEntry *ep = &oldentries[i];
+            int index = oldvalues->mv_order >> ((numentries-1-i)*4) & 15;
+            assert(oldvalues->values[index] != NULL);
+            PyDictKeyEntry *ep = &oldentries[index];
             PyObject *key = ep->me_key;
             Py_INCREF(key);
             newentries[i].me_key = key;
             newentries[i].me_hash = ep->me_hash;
-            newentries[i].me_value = oldvalues[i];
+            newentries[i].me_value = oldvalues->values[index];
         }
 
         dictkeys_decref(oldkeys);
@@ -1238,9 +1254,12 @@ make_keys_shared(PyObject *op)
 
     if (!PyDict_CheckExact(op))
         return NULL;
+    if (mp->ma_used > SHARED_KEYS_MAX_SIZE) {
+        return NULL;
+    }
     if (!_PyDict_HasSplitTable(mp)) {
         PyDictKeyEntry *ep0;
-        PyObject **values;
+        PyDictValues *values;
         assert(mp->ma_keys->dk_refcnt == 1);
         if (mp->ma_keys->dk_kind == DICT_KEYS_GENERAL) {
             return NULL;
@@ -1260,14 +1279,29 @@ make_keys_shared(PyObject *op)
                 "Not enough memory to allocate new values array");
             return NULL;
         }
-        for (i = 0; i < size; i++) {
-            values[i] = ep0[i].me_value;
+        uint64_t order = 0;
+        for (i = 0; i < mp->ma_used; i++) {
+            order <<= 4;
+            order |= i;
+            assert(ep0[i].me_value != NULL);
+            values->values[i] = ep0[i].me_value;
+            ep0[i].me_value = NULL;
+        }
+        values->mv_order = order;
+        for (; i < size; i++) {
+            assert(ep0[i].me_value == NULL);
+            values->values[i] = NULL;
             ep0[i].me_value = NULL;
         }
+        if (mp->ma_keys->dk_nentries + mp->ma_keys->dk_usable > SHARED_KEYS_MAX_SIZE) {
+            assert(mp->ma_keys->dk_nentries <= SHARED_KEYS_MAX_SIZE);
+            mp->ma_keys->dk_usable = SHARED_KEYS_MAX_SIZE - mp->ma_keys->dk_nentries;
+        }
         mp->ma_keys->dk_kind = DICT_KEYS_SPLIT;
         mp->ma_values = values;
     }
     dictkeys_incref(mp->ma_keys);
+    ASSERT_CONSISTENT(mp);
     return mp->ma_keys;
 }
 
@@ -1366,7 +1400,7 @@ _PyDict_GetItemHint(PyDictObject *mp, PyObject *key,
         if (ep->me_key == key) {
             if (mp->ma_keys->dk_kind == DICT_KEYS_SPLIT) {
                 assert(mp->ma_values != NULL);
-                res = mp->ma_values[(size_t)hint];
+                res = mp->ma_values->values[(size_t)hint];
             }
             else {
                 res = ep->me_value;
@@ -1569,11 +1603,30 @@ delitem_common(PyDictObject *mp, Py_hash_t hash, Py_ssize_t ix,
     mp->ma_keys->dk_version = 0;
     mp->ma_version_tag = DICT_NEXT_VERSION();
     ep = &DK_ENTRIES(mp->ma_keys)[ix];
-    dictkeys_set_index(mp->ma_keys, hashpos, DKIX_DUMMY);
-    old_key = ep->me_key;
-    ep->me_key = NULL;
-    ep->me_value = NULL;
-    Py_DECREF(old_key);
+    if (mp->ma_values) {
+        assert(old_value == mp->ma_values->values[ix]);
+        mp->ma_values->values[ix] = NULL;
+        assert(ix < SHARED_KEYS_MAX_SIZE);
+        /* Update order */
+        for (int i = 0;; i+= 4) {
+            assert (i < 64);
+            if (((mp->ma_values->mv_order >> i) & 15) == (uint64_t)ix) {
+                /* Remove 4 bits at ith position */
+                uint64_t order = mp->ma_values->mv_order;
+                uint64_t high = ((order>>i)>>4)<<i;
+                uint64_t low = order & ((((uint64_t)1)<<i)-1);
+                mp->ma_values->mv_order = high | low;
+                break;
+            }
+        }
+    }
+    else {
+        dictkeys_set_index(mp->ma_keys, hashpos, DKIX_DUMMY);
+        old_key = ep->me_key;
+        ep->me_key = NULL;
+        ep->me_value = NULL;
+        Py_DECREF(old_key);
+    }
     Py_DECREF(old_value);
 
     ASSERT_CONSISTENT(mp);
@@ -1617,15 +1670,6 @@ _PyDict_DelItem_KnownHash(PyObject *op, PyObject *key, Py_hash_t hash)
         return -1;
     }
 
-    // Split table doesn't allow deletion.  Combine it.
-    if (_PyDict_HasSplitTable(mp)) {
-        if (dictresize(mp, DK_LOG_SIZE(mp->ma_keys))) {
-            return -1;
-        }
-        ix = _Py_dict_lookup(mp, key, hash, &old_value);
-        assert(ix >= 0);
-    }
-
     return delitem_common(mp, hash, ix, old_value);
 }
 
@@ -1660,15 +1704,6 @@ _PyDict_DelItemIf(PyObject *op, PyObject *key,
         return -1;
     }
 
-    // Split table doesn't allow deletion.  Combine it.
-    if (_PyDict_HasSplitTable(mp)) {
-        if (dictresize(mp, DK_LOG_SIZE(mp->ma_keys))) {
-            return -1;
-        }
-        ix = _Py_dict_lookup(mp, key, hash, &old_value);
-        assert(ix >= 0);
-    }
-
     res = predicate(old_value);
     if (res == -1)
         return -1;
@@ -1688,7 +1723,7 @@ PyDict_Clear(PyObject *op)
 {
     PyDictObject *mp;
     PyDictKeysObject *oldkeys;
-    PyObject **oldvalues;
+    PyDictValues *oldvalues;
     Py_ssize_t i, n;
 
     if (!PyDict_Check(op))
@@ -1708,7 +1743,7 @@ PyDict_Clear(PyObject *op)
     if (oldvalues != NULL) {
         n = oldkeys->dk_nentries;
         for (i = 0; i < n; i++)
-            Py_CLEAR(oldvalues[i]);
+            Py_CLEAR(oldvalues->values[i]);
         free_values(oldvalues);
         dictkeys_decref(oldkeys);
     }
@@ -1738,11 +1773,12 @@ _PyDict_Next(PyObject *op, Py_ssize_t *ppos, PyObject **pkey,
     mp = (PyDictObject *)op;
     i = *ppos;
     if (mp->ma_values) {
+        assert(mp->ma_used <= SHARED_KEYS_MAX_SIZE);
         if (i < 0 || i >= mp->ma_used)
             return 0;
-        /* values of split table is always dense */
-        entry_ptr = &DK_ENTRIES(mp->ma_keys)[i];
-        value = mp->ma_values[i];
+        int index = get_index_from_order(mp, i);
+        entry_ptr = &DK_ENTRIES(mp->ma_keys)[index];
+        value = mp->ma_values->values[index];
         assert(value != NULL);
     }
     else {
@@ -1796,9 +1832,8 @@ PyDict_Next(PyObject *op, Py_ssize_t *ppos, PyObject **pkey, PyObject **pvalue)
 PyObject *
 _PyDict_Pop_KnownHash(PyObject *dict, PyObject *key, Py_hash_t hash, PyObject *deflt)
 {
-    Py_ssize_t ix, hashpos;
-    PyObject *old_value, *old_key;
-    PyDictKeyEntry *ep;
+    Py_ssize_t ix;
+    PyObject *old_value;
     PyDictObject *mp;
 
     assert(PyDict_Check(dict));
@@ -1823,29 +1858,9 @@ _PyDict_Pop_KnownHash(PyObject *dict, PyObject *key, Py_hash_t hash, PyObject *d
         _PyErr_SetKeyError(key);
         return NULL;
     }
-
-    // Split table doesn't allow deletion.  Combine it.
-    if (_PyDict_HasSplitTable(mp)) {
-        if (dictresize(mp, DK_LOG_SIZE(mp->ma_keys))) {
-            return NULL;
-        }
-        ix = _Py_dict_lookup(mp, key, hash, &old_value);
-        assert(ix >= 0);
-    }
-
-    hashpos = lookdict_index(mp->ma_keys, hash, ix);
-    assert(hashpos >= 0);
     assert(old_value != NULL);
-    mp->ma_used--;
-    mp->ma_version_tag = DICT_NEXT_VERSION();
-    mp->ma_keys->dk_version = 0;
-    dictkeys_set_index(mp->ma_keys, hashpos, DKIX_DUMMY);
-    ep = &DK_ENTRIES(mp->ma_keys)[ix];
-    mp->ma_keys->dk_version = 0;
-    old_key = ep->me_key;
-    ep->me_key = NULL;
-    ep->me_value = NULL;
-    Py_DECREF(old_key);
+    Py_INCREF(old_value);
+    delitem_common(mp, hash, ix, old_value);
 
     ASSERT_CONSISTENT(mp);
     return old_value;
@@ -1966,7 +1981,7 @@ _PyDict_FromKeys(PyObject *cls, PyObject *iterable, PyObject *value)
 static void
 dict_dealloc(PyDictObject *mp)
 {
-    PyObject **values = mp->ma_values;
+    PyDictValues *values = mp->ma_values;
     PyDictKeysObject *keys = mp->ma_keys;
     Py_ssize_t i, n;
 
@@ -1976,7 +1991,7 @@ dict_dealloc(PyDictObject *mp)
     if (values != NULL) {
         if (values != empty_values) {
             for (i = 0, n = mp->ma_keys->dk_nentries; i < n; i++) {
-                Py_XDECREF(values[i]);
+                Py_XDECREF(values->values[i]);
             }
             free_values(values);
         }
@@ -2165,7 +2180,7 @@ dict_keys(PyDictObject *mp)
     }
     ep = DK_ENTRIES(mp->ma_keys);
     if (mp->ma_values) {
-        value_ptr = mp->ma_values;
+        value_ptr = mp->ma_values->values;
         offset = sizeof(PyObject *);
     }
     else {
@@ -2208,7 +2223,7 @@ dict_values(PyDictObject *mp)
     }
     ep = DK_ENTRIES(mp->ma_keys);
     if (mp->ma_values) {
-        value_ptr = mp->ma_values;
+        value_ptr = mp->ma_values->values;
         offset = sizeof(PyObject *);
     }
     else {
@@ -2265,7 +2280,7 @@ dict_items(PyDictObject *mp)
     /* Nothing we do below makes any function calls. */
     ep = DK_ENTRIES(mp->ma_keys);
     if (mp->ma_values) {
-        value_ptr = mp->ma_values;
+        value_ptr = mp->ma_values->values;
         offset = sizeof(PyObject *);
     }
     else {
@@ -2534,7 +2549,7 @@ dict_merge(PyObject *a, PyObject *b, int override)
             key = entry->me_key;
             hash = entry->me_hash;
             if (other->ma_values)
-                value = other->ma_values[i];
+                value = other->ma_values->values[i];
             else
                 value = entry->me_value;
 
@@ -2677,7 +2692,7 @@ PyDict_Copy(PyObject *o)
     if (_PyDict_HasSplitTable(mp)) {
         PyDictObject *split_copy;
         Py_ssize_t size = USABLE_FRACTION(DK_SIZE(mp->ma_keys));
-        PyObject **newvalues;
+        PyDictValues *newvalues;
         newvalues = new_values(size);
         if (newvalues == NULL)
             return PyErr_NoMemory();
@@ -2686,15 +2701,16 @@ PyDict_Copy(PyObject *o)
             free_values(newvalues);
             return NULL;
         }
+        newvalues->mv_order = mp->ma_values->mv_order;
         split_copy->ma_values = newvalues;
         split_copy->ma_keys = mp->ma_keys;
         split_copy->ma_used = mp->ma_used;
         split_copy->ma_version_tag = DICT_NEXT_VERSION();
         dictkeys_incref(mp->ma_keys);
         for (i = 0, n = size; i < n; i++) {
-            PyObject *value = mp->ma_values[i];
+            PyObject *value = mp->ma_values->values[i];
             Py_XINCREF(value);
-            split_copy->ma_values[i] = value;
+            split_copy->ma_values->values[i] = value;
         }
         if (_PyObject_GC_IS_TRACKED(mp))
             _PyObject_GC_TRACK(split_copy);
@@ -2806,7 +2822,7 @@ dict_equal(PyDictObject *a, PyDictObject *b)
         PyDictKeyEntry *ep = &DK_ENTRIES(a->ma_keys)[i];
         PyObject *aval;
         if (a->ma_values)
-            aval = a->ma_values[i];
+            aval = a->ma_values->values[i];
         else
             aval = ep->me_value;
         if (aval != NULL) {
@@ -2993,8 +3009,11 @@ PyDict_SetDefault(PyObject *d, PyObject *key, PyObject *defaultobj)
         ep->me_key = key;
         ep->me_hash = hash;
         if (_PyDict_HasSplitTable(mp)) {
-            assert(mp->ma_values[mp->ma_keys->dk_nentries] == NULL);
-            mp->ma_values[mp->ma_keys->dk_nentries] = value;
+            int index = (int)mp->ma_keys->dk_nentries;
+            assert(index < SHARED_KEYS_MAX_SIZE);
+            assert(mp->ma_values->values[index] == NULL);
+            mp->ma_values->values[index] = value;
+            mp->ma_values->mv_order = (mp->ma_values->mv_order << 4) | index;
         }
         else {
             ep->me_value = value;
@@ -3011,7 +3030,8 @@ PyDict_SetDefault(PyObject *d, PyObject *key, PyObject *defaultobj)
         assert(ix == mp->ma_used);
         Py_INCREF(value);
         MAINTAIN_TRACKING(mp, key, value);
-        mp->ma_values[ix] = value;
+        mp->ma_values->values[ix] = value;
+        mp->ma_values->mv_order = (mp->ma_values->mv_order << 4) | ix;
         mp->ma_used++;
         mp->ma_version_tag = DICT_NEXT_VERSION();
     }
@@ -3159,7 +3179,7 @@ dict_traverse(PyObject *op, visitproc visit, void *arg)
     else {
         if (mp->ma_values != NULL) {
             for (i = 0; i < n; i++) {
-                Py_VISIT(mp->ma_values[i]);
+                Py_VISIT(mp->ma_values->values[i]);
             }
         }
         else {
@@ -3677,8 +3697,9 @@ dictiter_iternextkey(dictiterobject *di)
     if (d->ma_values) {
         if (i >= d->ma_used)
             goto fail;
-        key = DK_ENTRIES(k)[i].me_key;
-        assert(d->ma_values[i] != NULL);
+        int index = get_index_from_order(d, i);
+        key = DK_ENTRIES(k)[index].me_key;
+        assert(d->ma_values->values[index] != NULL);
     }
     else {
         Py_ssize_t n = k->dk_nentries;
@@ -3764,7 +3785,8 @@ dictiter_iternextvalue(dictiterobject *di)
     if (d->ma_values) {
         if (i >= d->ma_used)
             goto fail;
-        value = d->ma_values[i];
+        int index = get_index_from_order(d, i);
+        value = d->ma_values->values[index];
         assert(value != NULL);
     }
     else {
@@ -3851,8 +3873,9 @@ dictiter_iternextitem(dictiterobject *di)
     if (d->ma_values) {
         if (i >= d->ma_used)
             goto fail;
-        key = DK_ENTRIES(d->ma_keys)[i].me_key;
-        value = d->ma_values[i];
+        int index = get_index_from_order(d, i);
+        key = DK_ENTRIES(d->ma_keys)[index].me_key;
+        value = d->ma_values->values[index];
         assert(value != NULL);
     }
     else {
@@ -3968,8 +3991,9 @@ dictreviter_iternext(dictiterobject *di)
         goto fail;
     }
     if (d->ma_values) {
-        key = DK_ENTRIES(k)[i].me_key;
-        value = d->ma_values[i];
+        int index = get_index_from_order(d, i);
+        key = DK_ENTRIES(k)[index].me_key;
+        value = d->ma_values->values[index];
         assert (value != NULL);
     }
     else {
@@ -4976,19 +5000,14 @@ _PyObjectDict_SetItem(PyTypeObject *tp, PyObject **dictptr,
         }
         if (value == NULL) {
             res = PyDict_DelItem(dict, key);
-            // Since key sharing dict doesn't allow deletion, PyDict_DelItem()
-            // always converts dict to combined form.
-            if ((cached = CACHED_KEYS(tp)) != NULL) {
-                CACHED_KEYS(tp) = NULL;
-                dictkeys_decref(cached);
-            }
         }
         else {
             int was_shared = (cached == ((PyDictObject *)dict)->ma_keys);
             res = PyDict_SetItem(dict, key, value);
             if (was_shared &&
                     (cached = CACHED_KEYS(tp)) != NULL &&
-                    cached != ((PyDictObject *)dict)->ma_keys) {
+                    cached != ((PyDictObject *)dict)->ma_keys &&
+                    cached->dk_nentries <= SHARED_KEYS_MAX_SIZE) {
                 /* PyDict_SetItem() may call dictresize and convert split table
                  * into combined table.  In such case, convert it to split
                  * table again and update type's shared key only when this is
@@ -5004,14 +5023,15 @@ _PyObjectDict_SetItem(PyTypeObject *tp, PyObject **dictptr,
                  *     a = C()
                  */
                 if (cached->dk_refcnt == 1) {
-                    CACHED_KEYS(tp) = make_keys_shared(dict);
-                }
-                else {
-                    CACHED_KEYS(tp) = NULL;
+                    PyDictKeysObject *new_cached = make_keys_shared(dict);
+                    if (new_cached != NULL) {
+                        CACHED_KEYS(tp) = new_cached;
+                        dictkeys_decref(cached);
+                    }
+                    else if (PyErr_Occurred()) {
+                        return -1;
+                    }
                 }
-                dictkeys_decref(cached);
-                if (CACHED_KEYS(tp) == NULL && PyErr_Occurred())
-                    return -1;
             }
         }
     } else {
@@ -5028,6 +5048,7 @@ _PyObjectDict_SetItem(PyTypeObject *tp, PyObject **dictptr,
             res = PyDict_SetItem(dict, key, value);
         }
     }
+    ASSERT_CONSISTENT(dict);
     return res;
 }
 
diff --git a/Python/ceval.c b/Python/ceval.c
index a3a173dfb7013..e39ec67614bf5 100644
--- a/Python/ceval.c
+++ b/Python/ceval.c
@@ -3616,7 +3616,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
             DEOPT_IF(dict == NULL, LOAD_ATTR);
             assert(PyDict_CheckExact((PyObject *)dict));
             DEOPT_IF(dict->ma_keys->dk_version != cache1->dk_version_or_hint, LOAD_ATTR);
-            res = dict->ma_values[cache0->index];
+            res = dict->ma_values->values[cache0->index];
             DEOPT_IF(res == NULL, LOAD_ATTR);
             STAT_INC(LOAD_ATTR, hit);
             record_cache_hit(cache0);
@@ -3722,15 +3722,16 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
             DEOPT_IF(dict == NULL, STORE_ATTR);
             assert(PyDict_CheckExact((PyObject *)dict));
             DEOPT_IF(dict->ma_keys->dk_version != cache1->dk_version_or_hint, STORE_ATTR);
-            /* Need to maintain ordering of dicts */
-            DEOPT_IF(cache0->index > 0 && dict->ma_values[cache0->index-1] == NULL, STORE_ATTR);
             STAT_INC(STORE_ATTR, hit);
             record_cache_hit(cache0);
+            int index = cache0->index;
             STACK_SHRINK(1);
             PyObject *value = POP();
-            PyObject *old_value = dict->ma_values[cache0->index];
-            dict->ma_values[cache0->index] = value;
+            PyObject *old_value = dict->ma_values->values[index];
+            dict->ma_values->values[index] = value;
             if (old_value == NULL) {
+                assert(index < 16);
+                dict->ma_values->mv_order = (dict->ma_values->mv_order << 4) | index;
                 dict->ma_used++;
             }
             else {
diff --git a/Tools/gdb/libpython.py b/Tools/gdb/libpython.py
index c11b23e74b9be..62eb1976b715f 100755
--- a/Tools/gdb/libpython.py
+++ b/Tools/gdb/libpython.py
@@ -686,10 +686,13 @@ def iteritems(self):
         '''
         keys = self.field('ma_keys')
         values = self.field('ma_values')
+        has_values = long(values)
+        if has_values:
+            values = values['values']
         entries, nentries = self._get_entries(keys)
         for i in safe_range(nentries):
             ep = entries[i]
-            if long(values):
+            if has_values:
                 pyop_value = PyObjectPtr.from_pyobject_ptr(values[i])
             else:
                 pyop_value = PyObjectPtr.from_pyobject_ptr(ep['me_value'])



More information about the Python-checkins mailing list