[Python-checkins] bpo-40116: Add insertion order bit-vector to dict values to allow dicts to share keys more freely. (GH-28520)
markshannon
webhook-mailer at python.org
Wed Oct 6 08:20:02 EDT 2021
https://github.com/python/cpython/commit/a7252f88d3fa33036bdd6036b8c97bc785ed6f17
commit: a7252f88d3fa33036bdd6036b8c97bc785ed6f17
branch: main
author: Mark Shannon <mark at hotpy.org>
committer: markshannon <mark at hotpy.org>
date: 2021-10-06T13:19:53+01:00
summary:
bpo-40116: Add insertion order bit-vector to dict values to allow dicts to share keys more freely. (GH-28520)
files:
A Misc/NEWS.d/next/Core and Builtins/2021-09-23-14-00-05.bpo-40116.KaoeFs.rst
M Include/cpython/dictobject.h
M Include/internal/pycore_dict.h
M Lib/test/test_dict.py
M Modules/_testcapimodule.c
M Objects/dictobject.c
M Python/ceval.c
M Tools/gdb/libpython.py
diff --git a/Include/cpython/dictobject.h b/Include/cpython/dictobject.h
index 7c63374c566c7..ba118788f7b1b 100644
--- a/Include/cpython/dictobject.h
+++ b/Include/cpython/dictobject.h
@@ -3,6 +3,7 @@
#endif
typedef struct _dictkeysobject PyDictKeysObject;
+typedef struct _dictvalues PyDictValues;
/* The ma_values pointer is NULL for a combined table
* or points to an array of PyObject* for a split table
@@ -24,7 +25,7 @@ typedef struct {
If ma_values is not NULL, the table is splitted:
keys are stored in ma_keys and values are stored in ma_values */
- PyObject **ma_values;
+ PyDictValues *ma_values;
} PyDictObject;
PyAPI_FUNC(PyObject *) _PyDict_GetItem_KnownHash(PyObject *mp, PyObject *key,
diff --git a/Include/internal/pycore_dict.h b/Include/internal/pycore_dict.h
index 2becc30beb4d8..d37ef71bbcad3 100644
--- a/Include/internal/pycore_dict.h
+++ b/Include/internal/pycore_dict.h
@@ -71,6 +71,14 @@ struct _dictkeysobject {
see the DK_ENTRIES() macro */
};
+/* This must be no more than 16, for the order vector to fit in 64 bits */
+#define SHARED_KEYS_MAX_SIZE 16
+
+struct _dictvalues {
+ uint64_t mv_order;
+ PyObject *values[1];
+};
+
#define DK_LOG_SIZE(dk) ((dk)->dk_log2_size)
#if SIZEOF_VOID_P > 4
#define DK_SIZE(dk) (((int64_t)1)<<DK_LOG_SIZE(dk))
diff --git a/Lib/test/test_dict.py b/Lib/test/test_dict.py
index 666cd81e68d81..a6ce6f98c8290 100644
--- a/Lib/test/test_dict.py
+++ b/Lib/test/test_dict.py
@@ -1020,7 +1020,6 @@ def test_splittable_del(self):
with self.assertRaises(KeyError):
del a['y']
- self.assertGreater(sys.getsizeof(a), orig_size)
self.assertEqual(list(a), ['x', 'z'])
self.assertEqual(list(b), ['x', 'y', 'z'])
@@ -1031,16 +1030,12 @@ def test_splittable_del(self):
@support.cpython_only
def test_splittable_pop(self):
- """split table must be combined when d.pop(k)"""
a, b = self.make_shared_key_dict(2)
- orig_size = sys.getsizeof(a)
-
- a.pop('y') # split table is combined
+ a.pop('y')
with self.assertRaises(KeyError):
a.pop('y')
- self.assertGreater(sys.getsizeof(a), orig_size)
self.assertEqual(list(a), ['x', 'z'])
self.assertEqual(list(b), ['x', 'y', 'z'])
@@ -1074,36 +1069,6 @@ def test_splittable_popitem(self):
self.assertEqual(list(a), ['x', 'y'])
self.assertEqual(list(b), ['x', 'y', 'z'])
- @support.cpython_only
- def test_splittable_setattr_after_pop(self):
- """setattr() must not convert combined table into split table."""
- # Issue 28147
- import _testcapi
-
- class C:
- pass
- a = C()
-
- a.a = 1
- self.assertTrue(_testcapi.dict_hassplittable(a.__dict__))
-
- # dict.pop() convert it to combined table
- a.__dict__.pop('a')
- self.assertFalse(_testcapi.dict_hassplittable(a.__dict__))
-
- # But C should not convert a.__dict__ to split table again.
- a.a = 1
- self.assertFalse(_testcapi.dict_hassplittable(a.__dict__))
-
- # Same for popitem()
- a = C()
- a.a = 2
- self.assertTrue(_testcapi.dict_hassplittable(a.__dict__))
- a.__dict__.popitem()
- self.assertFalse(_testcapi.dict_hassplittable(a.__dict__))
- a.a = 3
- self.assertFalse(_testcapi.dict_hassplittable(a.__dict__))
-
def test_iterator_pickling(self):
for proto in range(pickle.HIGHEST_PROTOCOL + 1):
data = {1:"a", 2:"b", 3:"c"}
diff --git a/Misc/NEWS.d/next/Core and Builtins/2021-09-23-14-00-05.bpo-40116.KaoeFs.rst b/Misc/NEWS.d/next/Core and Builtins/2021-09-23-14-00-05.bpo-40116.KaoeFs.rst
new file mode 100644
index 0000000000000..24ce96376d87e
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2021-09-23-14-00-05.bpo-40116.KaoeFs.rst
@@ -0,0 +1,4 @@
+Change to the implementation of split dictionaries. Classes where the
+instances differ either in the exact set of attributes, or in the order in
+which those attributes are set, can still share keys. This should have no
+observable effect on users of Python or the C-API. Patch by Mark Shannon.
diff --git a/Modules/_testcapimodule.c b/Modules/_testcapimodule.c
index e3eec0c47f73a..03dc7763bfd05 100644
--- a/Modules/_testcapimodule.c
+++ b/Modules/_testcapimodule.c
@@ -334,19 +334,6 @@ dict_getitem_knownhash(PyObject *self, PyObject *args)
return result;
}
-static PyObject*
-dict_hassplittable(PyObject *self, PyObject *arg)
-{
- if (!PyDict_Check(arg)) {
- PyErr_Format(PyExc_TypeError,
- "dict_hassplittable() argument must be dict, not '%s'",
- Py_TYPE(arg)->tp_name);
- return NULL;
- }
-
- return PyBool_FromLong(_PyDict_HasSplitTable((PyDictObject*)arg));
-}
-
/* Issue #4701: Check that PyObject_Hash implicitly calls
* PyType_Ready if it hasn't already been called
*/
@@ -5721,7 +5708,6 @@ static PyMethodDef TestMethods[] = {
{"test_list_api", test_list_api, METH_NOARGS},
{"test_dict_iteration", test_dict_iteration, METH_NOARGS},
{"dict_getitem_knownhash", dict_getitem_knownhash, METH_VARARGS},
- {"dict_hassplittable", dict_hassplittable, METH_O},
{"test_lazy_hash_inheritance", test_lazy_hash_inheritance,METH_NOARGS},
{"test_long_api", test_long_api, METH_NOARGS},
{"test_xincref_doesnt_leak",test_xincref_doesnt_leak, METH_NOARGS},
diff --git a/Objects/dictobject.c b/Objects/dictobject.c
index ae0098be5b547..824cba949d7a8 100644
--- a/Objects/dictobject.c
+++ b/Objects/dictobject.c
@@ -60,7 +60,6 @@ The DictObject can be in one of two forms.
ma_values != NULL, dk_refcnt >= 1
Values are stored in the ma_values array.
Only string (unicode) keys are allowed.
- All dicts sharing same key must have same insertion order.
There are four kinds of slots in the table (slot is index, and
DK_ENTRIES(keys)[index] if index >= 0):
@@ -96,9 +95,10 @@ dk_nentries to achieve amortized O(1). Since there are DKIX_DUMMY remains in
dk_indices, we can't increment dk_usable even though dk_nentries is
decremented.
-In split table, inserting into pending entry is allowed only for dk_entries[ix]
-where ix == mp->ma_used. Inserting into other index and deleting item cause
-converting the dict to the combined table.
+To preserve the order in a split table, a bit vector is used to record the
+insertion order. When a key is inserted the bit vector is shifted up by 4 bits
+and the index of the key is stored in the low 4 bits.
+As a consequence of this, split keys have a maximum size of 16.
*/
/* PyDict_MINSIZE is the starting size for any new dict.
@@ -445,7 +445,9 @@ static PyDictKeysObject empty_keys_struct = {
DKIX_EMPTY, DKIX_EMPTY, DKIX_EMPTY, DKIX_EMPTY}, /* dk_indices */
};
-static PyObject *empty_values[1] = { NULL };
+
+static PyDictValues empty_values_struct = { 0, { NULL }};
+#define empty_values (&empty_values_struct)
#define Py_EMPTY_KEYS &empty_keys_struct
@@ -458,6 +460,13 @@ static PyObject *empty_values[1] = { NULL };
# define ASSERT_CONSISTENT(op) assert(_PyDict_CheckConsistency((PyObject *)(op), 0))
#endif
+static inline int
+get_index_from_order(PyDictObject *mp, Py_ssize_t i)
+{
+ assert(mp->ma_used <= 16);
+ int shift = (int)(mp->ma_used-1-i)*4;
+ return (int)(mp->ma_values->mv_order >> shift) & 15;
+}
int
_PyDict_CheckConsistency(PyObject *op, int check_content)
@@ -482,17 +491,19 @@ _PyDict_CheckConsistency(PyObject *op, int check_content)
/* combined table */
CHECK(keys->dk_refcnt == 1);
}
+ else {
+ CHECK(mp->ma_used <= SHARED_KEYS_MAX_SIZE);
+ }
if (check_content) {
PyDictKeyEntry *entries = DK_ENTRIES(keys);
- Py_ssize_t i;
- for (i=0; i < DK_SIZE(keys); i++) {
+ for (Py_ssize_t i=0; i < DK_SIZE(keys); i++) {
Py_ssize_t ix = dictkeys_get_index(keys, i);
CHECK(DKIX_DUMMY <= ix && ix <= usable);
}
- for (i=0; i < usable; i++) {
+ for (Py_ssize_t i=0; i < usable; i++) {
PyDictKeyEntry *entry = &entries[i];
PyObject *key = entry->me_key;
@@ -517,9 +528,14 @@ _PyDict_CheckConsistency(PyObject *op, int check_content)
}
if (splitted) {
+ CHECK(mp->ma_used <= SHARED_KEYS_MAX_SIZE);
/* splitted table */
- for (i=0; i < mp->ma_used; i++) {
- CHECK(mp->ma_values[i] != NULL);
+ int duplicate_check = 0;
+ for (Py_ssize_t i=0; i < mp->ma_used; i++) {
+ int index = get_index_from_order(mp, i);
+ CHECK((duplicate_check & (1<<index)) == 0);
+ duplicate_check |= (1<<index);
+ CHECK(mp->ma_values->values[index] != NULL);
}
}
}
@@ -576,9 +592,9 @@ new_keys_object(uint8_t log2_size)
#endif
dk->dk_refcnt = 1;
dk->dk_log2_size = log2_size;
- dk->dk_usable = usable;
dk->dk_kind = DICT_KEYS_UNICODE;
dk->dk_nentries = 0;
+ dk->dk_usable = usable;
dk->dk_version = 0;
memset(&dk->dk_indices[0], 0xff, es<<log2_size);
memset(DK_ENTRIES(dk), 0, sizeof(PyDictKeyEntry) * usable);
@@ -606,12 +622,18 @@ free_keys_object(PyDictKeysObject *keys)
PyObject_Free(keys);
}
-#define new_values(size) PyMem_NEW(PyObject *, size)
+static inline PyDictValues*
+new_values(Py_ssize_t size)
+{
+ Py_ssize_t n = sizeof(PyDictValues) + sizeof(PyObject *) * (size-1);
+ return (PyDictValues*)PyMem_Malloc(n);
+}
+
#define free_values(values) PyMem_Free(values)
/* Consumes a reference to the keys object */
static PyObject *
-new_dict(PyDictKeysObject *keys, PyObject **values)
+new_dict(PyDictKeysObject *keys, PyDictValues *values)
{
PyDictObject *mp;
assert(keys != NULL);
@@ -648,7 +670,7 @@ new_dict(PyDictKeysObject *keys, PyObject **values)
static PyObject *
new_dict_with_shared_keys(PyDictKeysObject *keys)
{
- PyObject **values;
+ PyDictValues *values;
Py_ssize_t i, size;
size = USABLE_FRACTION(DK_SIZE(keys));
@@ -657,8 +679,9 @@ new_dict_with_shared_keys(PyDictKeysObject *keys)
dictkeys_decref(keys);
return PyErr_NoMemory();
}
+ values->mv_order = 0;
for (i = 0; i < size; i++) {
- values[i] = NULL;
+ values->values[i] = NULL;
}
return new_dict(keys, values);
}
@@ -829,7 +852,7 @@ _Py_dict_lookup(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject **valu
*value_addr = NULL;
}
else if (kind == DICT_KEYS_SPLIT) {
- *value_addr = mp->ma_values[ix];
+ *value_addr = mp->ma_values->values[ix];
}
else {
*value_addr = DK_ENTRIES(dk)[ix].me_value;
@@ -879,7 +902,7 @@ _Py_dict_lookup(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject **valu
Py_UNREACHABLE();
found:
if (dk->dk_kind == DICT_KEYS_SPLIT) {
- *value_addr = mp->ma_values[ix];
+ *value_addr = mp->ma_values->values[ix];
}
else {
*value_addr = ep0[ix].me_value;
@@ -928,7 +951,7 @@ _PyDict_MaybeUntrack(PyObject *op)
numentries = mp->ma_keys->dk_nentries;
if (_PyDict_HasSplitTable(mp)) {
for (i = 0; i < numentries; i++) {
- if ((value = mp->ma_values[i]) == NULL)
+ if ((value = mp->ma_values->values[i]) == NULL)
continue;
if (_PyObject_GC_MAY_BE_TRACKED(value)) {
assert(!_PyObject_GC_MAY_BE_TRACKED(ep0[i].me_key));
@@ -998,17 +1021,6 @@ insertdict(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject *value)
MAINTAIN_TRACKING(mp, key, value);
- /* When insertion order is different from shared key, we can't share
- * the key anymore. Convert this instance to combine table.
- */
- if (_PyDict_HasSplitTable(mp) &&
- ((ix >= 0 && old_value == NULL && mp->ma_used != ix) ||
- (ix == DKIX_EMPTY && mp->ma_used != mp->ma_keys->dk_nentries))) {
- if (insertion_resize(mp) < 0)
- goto Fail;
- ix = DKIX_EMPTY;
- }
-
if (ix == DKIX_EMPTY) {
/* Insert into new slot. */
mp->ma_keys->dk_version = 0;
@@ -1027,8 +1039,12 @@ insertdict(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject *value)
ep->me_key = key;
ep->me_hash = hash;
if (mp->ma_values) {
- assert (mp->ma_values[mp->ma_keys->dk_nentries] == NULL);
- mp->ma_values[mp->ma_keys->dk_nentries] = value;
+ Py_ssize_t index = mp->ma_keys->dk_nentries;
+ assert(index < SHARED_KEYS_MAX_SIZE);
+ assert((mp->ma_values->mv_order >> 60) == 0);
+ mp->ma_values->mv_order = (mp->ma_values->mv_order)<<4 | index;
+ assert (mp->ma_values->values[index] == NULL);
+ mp->ma_values->values[index] = value;
}
else {
ep->me_value = value;
@@ -1044,10 +1060,9 @@ insertdict(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject *value)
if (old_value != value) {
if (_PyDict_HasSplitTable(mp)) {
- mp->ma_values[ix] = value;
+ mp->ma_values->values[ix] = value;
if (old_value == NULL) {
- /* pending state */
- assert(ix == mp->ma_used);
+ mp->ma_values->mv_order = (mp->ma_values->mv_order << 4) | ix;
mp->ma_used++;
}
}
@@ -1136,7 +1151,7 @@ dictresize(PyDictObject *mp, uint8_t log2_newsize)
{
Py_ssize_t numentries;
PyDictKeysObject *oldkeys;
- PyObject **oldvalues;
+ PyDictValues *oldvalues;
PyDictKeyEntry *oldentries, *newentries;
if (log2_newsize >= SIZEOF_SIZE_T*8) {
@@ -1173,13 +1188,14 @@ dictresize(PyDictObject *mp, uint8_t log2_newsize)
* Note that values of split table is always dense.
*/
for (Py_ssize_t i = 0; i < numentries; i++) {
- assert(oldvalues[i] != NULL);
- PyDictKeyEntry *ep = &oldentries[i];
+ int index = oldvalues->mv_order >> ((numentries-1-i)*4) & 15;
+ assert(oldvalues->values[index] != NULL);
+ PyDictKeyEntry *ep = &oldentries[index];
PyObject *key = ep->me_key;
Py_INCREF(key);
newentries[i].me_key = key;
newentries[i].me_hash = ep->me_hash;
- newentries[i].me_value = oldvalues[i];
+ newentries[i].me_value = oldvalues->values[index];
}
dictkeys_decref(oldkeys);
@@ -1238,9 +1254,12 @@ make_keys_shared(PyObject *op)
if (!PyDict_CheckExact(op))
return NULL;
+ if (mp->ma_used > SHARED_KEYS_MAX_SIZE) {
+ return NULL;
+ }
if (!_PyDict_HasSplitTable(mp)) {
PyDictKeyEntry *ep0;
- PyObject **values;
+ PyDictValues *values;
assert(mp->ma_keys->dk_refcnt == 1);
if (mp->ma_keys->dk_kind == DICT_KEYS_GENERAL) {
return NULL;
@@ -1260,14 +1279,29 @@ make_keys_shared(PyObject *op)
"Not enough memory to allocate new values array");
return NULL;
}
- for (i = 0; i < size; i++) {
- values[i] = ep0[i].me_value;
+ uint64_t order = 0;
+ for (i = 0; i < mp->ma_used; i++) {
+ order <<= 4;
+ order |= i;
+ assert(ep0[i].me_value != NULL);
+ values->values[i] = ep0[i].me_value;
+ ep0[i].me_value = NULL;
+ }
+ values->mv_order = order;
+ for (; i < size; i++) {
+ assert(ep0[i].me_value == NULL);
+ values->values[i] = NULL;
ep0[i].me_value = NULL;
}
+ if (mp->ma_keys->dk_nentries + mp->ma_keys->dk_usable > SHARED_KEYS_MAX_SIZE) {
+ assert(mp->ma_keys->dk_nentries <= SHARED_KEYS_MAX_SIZE);
+ mp->ma_keys->dk_usable = SHARED_KEYS_MAX_SIZE - mp->ma_keys->dk_nentries;
+ }
mp->ma_keys->dk_kind = DICT_KEYS_SPLIT;
mp->ma_values = values;
}
dictkeys_incref(mp->ma_keys);
+ ASSERT_CONSISTENT(mp);
return mp->ma_keys;
}
@@ -1366,7 +1400,7 @@ _PyDict_GetItemHint(PyDictObject *mp, PyObject *key,
if (ep->me_key == key) {
if (mp->ma_keys->dk_kind == DICT_KEYS_SPLIT) {
assert(mp->ma_values != NULL);
- res = mp->ma_values[(size_t)hint];
+ res = mp->ma_values->values[(size_t)hint];
}
else {
res = ep->me_value;
@@ -1569,11 +1603,30 @@ delitem_common(PyDictObject *mp, Py_hash_t hash, Py_ssize_t ix,
mp->ma_keys->dk_version = 0;
mp->ma_version_tag = DICT_NEXT_VERSION();
ep = &DK_ENTRIES(mp->ma_keys)[ix];
- dictkeys_set_index(mp->ma_keys, hashpos, DKIX_DUMMY);
- old_key = ep->me_key;
- ep->me_key = NULL;
- ep->me_value = NULL;
- Py_DECREF(old_key);
+ if (mp->ma_values) {
+ assert(old_value == mp->ma_values->values[ix]);
+ mp->ma_values->values[ix] = NULL;
+ assert(ix < SHARED_KEYS_MAX_SIZE);
+ /* Update order */
+ for (int i = 0;; i+= 4) {
+ assert (i < 64);
+ if (((mp->ma_values->mv_order >> i) & 15) == (uint64_t)ix) {
+ /* Remove 4 bits at ith position */
+ uint64_t order = mp->ma_values->mv_order;
+ uint64_t high = ((order>>i)>>4)<<i;
+ uint64_t low = order & ((((uint64_t)1)<<i)-1);
+ mp->ma_values->mv_order = high | low;
+ break;
+ }
+ }
+ }
+ else {
+ dictkeys_set_index(mp->ma_keys, hashpos, DKIX_DUMMY);
+ old_key = ep->me_key;
+ ep->me_key = NULL;
+ ep->me_value = NULL;
+ Py_DECREF(old_key);
+ }
Py_DECREF(old_value);
ASSERT_CONSISTENT(mp);
@@ -1617,15 +1670,6 @@ _PyDict_DelItem_KnownHash(PyObject *op, PyObject *key, Py_hash_t hash)
return -1;
}
- // Split table doesn't allow deletion. Combine it.
- if (_PyDict_HasSplitTable(mp)) {
- if (dictresize(mp, DK_LOG_SIZE(mp->ma_keys))) {
- return -1;
- }
- ix = _Py_dict_lookup(mp, key, hash, &old_value);
- assert(ix >= 0);
- }
-
return delitem_common(mp, hash, ix, old_value);
}
@@ -1660,15 +1704,6 @@ _PyDict_DelItemIf(PyObject *op, PyObject *key,
return -1;
}
- // Split table doesn't allow deletion. Combine it.
- if (_PyDict_HasSplitTable(mp)) {
- if (dictresize(mp, DK_LOG_SIZE(mp->ma_keys))) {
- return -1;
- }
- ix = _Py_dict_lookup(mp, key, hash, &old_value);
- assert(ix >= 0);
- }
-
res = predicate(old_value);
if (res == -1)
return -1;
@@ -1688,7 +1723,7 @@ PyDict_Clear(PyObject *op)
{
PyDictObject *mp;
PyDictKeysObject *oldkeys;
- PyObject **oldvalues;
+ PyDictValues *oldvalues;
Py_ssize_t i, n;
if (!PyDict_Check(op))
@@ -1708,7 +1743,7 @@ PyDict_Clear(PyObject *op)
if (oldvalues != NULL) {
n = oldkeys->dk_nentries;
for (i = 0; i < n; i++)
- Py_CLEAR(oldvalues[i]);
+ Py_CLEAR(oldvalues->values[i]);
free_values(oldvalues);
dictkeys_decref(oldkeys);
}
@@ -1738,11 +1773,12 @@ _PyDict_Next(PyObject *op, Py_ssize_t *ppos, PyObject **pkey,
mp = (PyDictObject *)op;
i = *ppos;
if (mp->ma_values) {
+ assert(mp->ma_used <= SHARED_KEYS_MAX_SIZE);
if (i < 0 || i >= mp->ma_used)
return 0;
- /* values of split table is always dense */
- entry_ptr = &DK_ENTRIES(mp->ma_keys)[i];
- value = mp->ma_values[i];
+ int index = get_index_from_order(mp, i);
+ entry_ptr = &DK_ENTRIES(mp->ma_keys)[index];
+ value = mp->ma_values->values[index];
assert(value != NULL);
}
else {
@@ -1796,9 +1832,8 @@ PyDict_Next(PyObject *op, Py_ssize_t *ppos, PyObject **pkey, PyObject **pvalue)
PyObject *
_PyDict_Pop_KnownHash(PyObject *dict, PyObject *key, Py_hash_t hash, PyObject *deflt)
{
- Py_ssize_t ix, hashpos;
- PyObject *old_value, *old_key;
- PyDictKeyEntry *ep;
+ Py_ssize_t ix;
+ PyObject *old_value;
PyDictObject *mp;
assert(PyDict_Check(dict));
@@ -1823,29 +1858,9 @@ _PyDict_Pop_KnownHash(PyObject *dict, PyObject *key, Py_hash_t hash, PyObject *d
_PyErr_SetKeyError(key);
return NULL;
}
-
- // Split table doesn't allow deletion. Combine it.
- if (_PyDict_HasSplitTable(mp)) {
- if (dictresize(mp, DK_LOG_SIZE(mp->ma_keys))) {
- return NULL;
- }
- ix = _Py_dict_lookup(mp, key, hash, &old_value);
- assert(ix >= 0);
- }
-
- hashpos = lookdict_index(mp->ma_keys, hash, ix);
- assert(hashpos >= 0);
assert(old_value != NULL);
- mp->ma_used--;
- mp->ma_version_tag = DICT_NEXT_VERSION();
- mp->ma_keys->dk_version = 0;
- dictkeys_set_index(mp->ma_keys, hashpos, DKIX_DUMMY);
- ep = &DK_ENTRIES(mp->ma_keys)[ix];
- mp->ma_keys->dk_version = 0;
- old_key = ep->me_key;
- ep->me_key = NULL;
- ep->me_value = NULL;
- Py_DECREF(old_key);
+ Py_INCREF(old_value);
+ delitem_common(mp, hash, ix, old_value);
ASSERT_CONSISTENT(mp);
return old_value;
@@ -1966,7 +1981,7 @@ _PyDict_FromKeys(PyObject *cls, PyObject *iterable, PyObject *value)
static void
dict_dealloc(PyDictObject *mp)
{
- PyObject **values = mp->ma_values;
+ PyDictValues *values = mp->ma_values;
PyDictKeysObject *keys = mp->ma_keys;
Py_ssize_t i, n;
@@ -1976,7 +1991,7 @@ dict_dealloc(PyDictObject *mp)
if (values != NULL) {
if (values != empty_values) {
for (i = 0, n = mp->ma_keys->dk_nentries; i < n; i++) {
- Py_XDECREF(values[i]);
+ Py_XDECREF(values->values[i]);
}
free_values(values);
}
@@ -2165,7 +2180,7 @@ dict_keys(PyDictObject *mp)
}
ep = DK_ENTRIES(mp->ma_keys);
if (mp->ma_values) {
- value_ptr = mp->ma_values;
+ value_ptr = mp->ma_values->values;
offset = sizeof(PyObject *);
}
else {
@@ -2208,7 +2223,7 @@ dict_values(PyDictObject *mp)
}
ep = DK_ENTRIES(mp->ma_keys);
if (mp->ma_values) {
- value_ptr = mp->ma_values;
+ value_ptr = mp->ma_values->values;
offset = sizeof(PyObject *);
}
else {
@@ -2265,7 +2280,7 @@ dict_items(PyDictObject *mp)
/* Nothing we do below makes any function calls. */
ep = DK_ENTRIES(mp->ma_keys);
if (mp->ma_values) {
- value_ptr = mp->ma_values;
+ value_ptr = mp->ma_values->values;
offset = sizeof(PyObject *);
}
else {
@@ -2534,7 +2549,7 @@ dict_merge(PyObject *a, PyObject *b, int override)
key = entry->me_key;
hash = entry->me_hash;
if (other->ma_values)
- value = other->ma_values[i];
+ value = other->ma_values->values[i];
else
value = entry->me_value;
@@ -2677,7 +2692,7 @@ PyDict_Copy(PyObject *o)
if (_PyDict_HasSplitTable(mp)) {
PyDictObject *split_copy;
Py_ssize_t size = USABLE_FRACTION(DK_SIZE(mp->ma_keys));
- PyObject **newvalues;
+ PyDictValues *newvalues;
newvalues = new_values(size);
if (newvalues == NULL)
return PyErr_NoMemory();
@@ -2686,15 +2701,16 @@ PyDict_Copy(PyObject *o)
free_values(newvalues);
return NULL;
}
+ newvalues->mv_order = mp->ma_values->mv_order;
split_copy->ma_values = newvalues;
split_copy->ma_keys = mp->ma_keys;
split_copy->ma_used = mp->ma_used;
split_copy->ma_version_tag = DICT_NEXT_VERSION();
dictkeys_incref(mp->ma_keys);
for (i = 0, n = size; i < n; i++) {
- PyObject *value = mp->ma_values[i];
+ PyObject *value = mp->ma_values->values[i];
Py_XINCREF(value);
- split_copy->ma_values[i] = value;
+ split_copy->ma_values->values[i] = value;
}
if (_PyObject_GC_IS_TRACKED(mp))
_PyObject_GC_TRACK(split_copy);
@@ -2806,7 +2822,7 @@ dict_equal(PyDictObject *a, PyDictObject *b)
PyDictKeyEntry *ep = &DK_ENTRIES(a->ma_keys)[i];
PyObject *aval;
if (a->ma_values)
- aval = a->ma_values[i];
+ aval = a->ma_values->values[i];
else
aval = ep->me_value;
if (aval != NULL) {
@@ -2993,8 +3009,11 @@ PyDict_SetDefault(PyObject *d, PyObject *key, PyObject *defaultobj)
ep->me_key = key;
ep->me_hash = hash;
if (_PyDict_HasSplitTable(mp)) {
- assert(mp->ma_values[mp->ma_keys->dk_nentries] == NULL);
- mp->ma_values[mp->ma_keys->dk_nentries] = value;
+ int index = (int)mp->ma_keys->dk_nentries;
+ assert(index < SHARED_KEYS_MAX_SIZE);
+ assert(mp->ma_values->values[index] == NULL);
+ mp->ma_values->values[index] = value;
+ mp->ma_values->mv_order = (mp->ma_values->mv_order << 4) | index;
}
else {
ep->me_value = value;
@@ -3011,7 +3030,8 @@ PyDict_SetDefault(PyObject *d, PyObject *key, PyObject *defaultobj)
assert(ix == mp->ma_used);
Py_INCREF(value);
MAINTAIN_TRACKING(mp, key, value);
- mp->ma_values[ix] = value;
+ mp->ma_values->values[ix] = value;
+ mp->ma_values->mv_order = (mp->ma_values->mv_order << 4) | ix;
mp->ma_used++;
mp->ma_version_tag = DICT_NEXT_VERSION();
}
@@ -3159,7 +3179,7 @@ dict_traverse(PyObject *op, visitproc visit, void *arg)
else {
if (mp->ma_values != NULL) {
for (i = 0; i < n; i++) {
- Py_VISIT(mp->ma_values[i]);
+ Py_VISIT(mp->ma_values->values[i]);
}
}
else {
@@ -3677,8 +3697,9 @@ dictiter_iternextkey(dictiterobject *di)
if (d->ma_values) {
if (i >= d->ma_used)
goto fail;
- key = DK_ENTRIES(k)[i].me_key;
- assert(d->ma_values[i] != NULL);
+ int index = get_index_from_order(d, i);
+ key = DK_ENTRIES(k)[index].me_key;
+ assert(d->ma_values->values[index] != NULL);
}
else {
Py_ssize_t n = k->dk_nentries;
@@ -3764,7 +3785,8 @@ dictiter_iternextvalue(dictiterobject *di)
if (d->ma_values) {
if (i >= d->ma_used)
goto fail;
- value = d->ma_values[i];
+ int index = get_index_from_order(d, i);
+ value = d->ma_values->values[index];
assert(value != NULL);
}
else {
@@ -3851,8 +3873,9 @@ dictiter_iternextitem(dictiterobject *di)
if (d->ma_values) {
if (i >= d->ma_used)
goto fail;
- key = DK_ENTRIES(d->ma_keys)[i].me_key;
- value = d->ma_values[i];
+ int index = get_index_from_order(d, i);
+ key = DK_ENTRIES(d->ma_keys)[index].me_key;
+ value = d->ma_values->values[index];
assert(value != NULL);
}
else {
@@ -3968,8 +3991,9 @@ dictreviter_iternext(dictiterobject *di)
goto fail;
}
if (d->ma_values) {
- key = DK_ENTRIES(k)[i].me_key;
- value = d->ma_values[i];
+ int index = get_index_from_order(d, i);
+ key = DK_ENTRIES(k)[index].me_key;
+ value = d->ma_values->values[index];
assert (value != NULL);
}
else {
@@ -4976,19 +5000,14 @@ _PyObjectDict_SetItem(PyTypeObject *tp, PyObject **dictptr,
}
if (value == NULL) {
res = PyDict_DelItem(dict, key);
- // Since key sharing dict doesn't allow deletion, PyDict_DelItem()
- // always converts dict to combined form.
- if ((cached = CACHED_KEYS(tp)) != NULL) {
- CACHED_KEYS(tp) = NULL;
- dictkeys_decref(cached);
- }
}
else {
int was_shared = (cached == ((PyDictObject *)dict)->ma_keys);
res = PyDict_SetItem(dict, key, value);
if (was_shared &&
(cached = CACHED_KEYS(tp)) != NULL &&
- cached != ((PyDictObject *)dict)->ma_keys) {
+ cached != ((PyDictObject *)dict)->ma_keys &&
+ cached->dk_nentries <= SHARED_KEYS_MAX_SIZE) {
/* PyDict_SetItem() may call dictresize and convert split table
* into combined table. In such case, convert it to split
* table again and update type's shared key only when this is
@@ -5004,14 +5023,15 @@ _PyObjectDict_SetItem(PyTypeObject *tp, PyObject **dictptr,
* a = C()
*/
if (cached->dk_refcnt == 1) {
- CACHED_KEYS(tp) = make_keys_shared(dict);
- }
- else {
- CACHED_KEYS(tp) = NULL;
+ PyDictKeysObject *new_cached = make_keys_shared(dict);
+ if (new_cached != NULL) {
+ CACHED_KEYS(tp) = new_cached;
+ dictkeys_decref(cached);
+ }
+ else if (PyErr_Occurred()) {
+ return -1;
+ }
}
- dictkeys_decref(cached);
- if (CACHED_KEYS(tp) == NULL && PyErr_Occurred())
- return -1;
}
}
} else {
@@ -5028,6 +5048,7 @@ _PyObjectDict_SetItem(PyTypeObject *tp, PyObject **dictptr,
res = PyDict_SetItem(dict, key, value);
}
}
+ ASSERT_CONSISTENT(dict);
return res;
}
diff --git a/Python/ceval.c b/Python/ceval.c
index a3a173dfb7013..e39ec67614bf5 100644
--- a/Python/ceval.c
+++ b/Python/ceval.c
@@ -3616,7 +3616,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
DEOPT_IF(dict == NULL, LOAD_ATTR);
assert(PyDict_CheckExact((PyObject *)dict));
DEOPT_IF(dict->ma_keys->dk_version != cache1->dk_version_or_hint, LOAD_ATTR);
- res = dict->ma_values[cache0->index];
+ res = dict->ma_values->values[cache0->index];
DEOPT_IF(res == NULL, LOAD_ATTR);
STAT_INC(LOAD_ATTR, hit);
record_cache_hit(cache0);
@@ -3722,15 +3722,16 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
DEOPT_IF(dict == NULL, STORE_ATTR);
assert(PyDict_CheckExact((PyObject *)dict));
DEOPT_IF(dict->ma_keys->dk_version != cache1->dk_version_or_hint, STORE_ATTR);
- /* Need to maintain ordering of dicts */
- DEOPT_IF(cache0->index > 0 && dict->ma_values[cache0->index-1] == NULL, STORE_ATTR);
STAT_INC(STORE_ATTR, hit);
record_cache_hit(cache0);
+ int index = cache0->index;
STACK_SHRINK(1);
PyObject *value = POP();
- PyObject *old_value = dict->ma_values[cache0->index];
- dict->ma_values[cache0->index] = value;
+ PyObject *old_value = dict->ma_values->values[index];
+ dict->ma_values->values[index] = value;
if (old_value == NULL) {
+ assert(index < 16);
+ dict->ma_values->mv_order = (dict->ma_values->mv_order << 4) | index;
dict->ma_used++;
}
else {
diff --git a/Tools/gdb/libpython.py b/Tools/gdb/libpython.py
index c11b23e74b9be..62eb1976b715f 100755
--- a/Tools/gdb/libpython.py
+++ b/Tools/gdb/libpython.py
@@ -686,10 +686,13 @@ def iteritems(self):
'''
keys = self.field('ma_keys')
values = self.field('ma_values')
+ has_values = long(values)
+ if has_values:
+ values = values['values']
entries, nentries = self._get_entries(keys)
for i in safe_range(nentries):
ep = entries[i]
- if long(values):
+ if has_values:
pyop_value = PyObjectPtr.from_pyobject_ptr(values[i])
else:
pyop_value = PyObjectPtr.from_pyobject_ptr(ep['me_value'])
More information about the Python-checkins
mailing list