[Python-checkins] cpython (merge 3.6 -> default): Issue #28583: PyDict_SetDefault didn't combine split table when needed.

inada.naoki python-checkins at python.org
Wed Nov 2 05:47:45 EDT 2016


https://hg.python.org/cpython/rev/a6a79053aec4
changeset:   104870:a6a79053aec4
parent:      104868:b671422240cd
parent:      104869:4e9c7704f373
user:        INADA Naoki <songofacandy at gmail.com>
date:        Wed Nov 02 18:47:24 2016 +0900
summary:
  Issue #28583: PyDict_SetDefault didn't combine split table when needed.

Patch by Xiang Zhang.

files:
  Lib/test/test_dict.py |  17 ++++++++
  Misc/NEWS             |   3 +
  Objects/dictobject.c  |  60 +++++++++++++++++++++++-------
  3 files changed, 65 insertions(+), 15 deletions(-)


diff --git a/Lib/test/test_dict.py b/Lib/test/test_dict.py
--- a/Lib/test/test_dict.py
+++ b/Lib/test/test_dict.py
@@ -852,6 +852,23 @@
         return dicts
 
     @support.cpython_only
+    def test_splittable_setdefault(self):
+        """split table must be combined when setdefault()
+        breaks insertion order"""
+        a, b = self.make_shared_key_dict(2)
+
+        a['a'] = 1
+        size_a = sys.getsizeof(a)
+        a['b'] = 2
+        b.setdefault('b', 2)
+        size_b = sys.getsizeof(b)
+        b['a'] = 1
+
+        self.assertGreater(size_b, size_a)
+        self.assertEqual(list(a), ['x', 'y', 'z', 'a', 'b'])
+        self.assertEqual(list(b), ['x', 'y', 'z', 'b', 'a'])
+
+    @support.cpython_only
     def test_splittable_del(self):
         """split table must be combined when del d[k]"""
         a, b = self.make_shared_key_dict(2)
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,9 @@
 Core and Builtins
 -----------------
 
+- Issue #28583: PyDict_SetDefault didn't combine split table when needed.
+  Patch by Xiang Zhang.
+
 - Issue #28128: Deprecation warning for invalid str and byte escape
   sequences now prints better information about where the error
   occurs. Patch by Serhiy Storchaka and Eric Smith.
diff --git a/Objects/dictobject.c b/Objects/dictobject.c
--- a/Objects/dictobject.c
+++ b/Objects/dictobject.c
@@ -2758,58 +2758,88 @@
 PyDict_SetDefault(PyObject *d, PyObject *key, PyObject *defaultobj)
 {
     PyDictObject *mp = (PyDictObject *)d;
-    PyObject *val = NULL;
+    PyObject *value;
     Py_hash_t hash;
     Py_ssize_t hashpos, ix;
-    PyDictKeyEntry *ep;
     PyObject **value_addr;
 
     if (!PyDict_Check(d)) {
         PyErr_BadInternalCall();
         return NULL;
     }
+
     if (!PyUnicode_CheckExact(key) ||
         (hash = ((PyASCIIObject *) key)->hash) == -1) {
         hash = PyObject_Hash(key);
         if (hash == -1)
             return NULL;
     }
+
+    if (mp->ma_values != NULL && !PyUnicode_CheckExact(key)) {
+        if (insertion_resize(mp) < 0)
+            return NULL;
+    }
+
     ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr, &hashpos);
     if (ix == DKIX_ERROR)
         return NULL;
-    if (ix == DKIX_EMPTY || *value_addr == NULL) {
-        val = defaultobj;
+
+    if (_PyDict_HasSplitTable(mp) &&
+        ((ix >= 0 && *value_addr == NULL && mp->ma_used != ix) ||
+         (ix == DKIX_EMPTY && mp->ma_used != mp->ma_keys->dk_nentries))) {
+        if (insertion_resize(mp) < 0) {
+            return NULL;
+        }
+        find_empty_slot(mp, key, hash, &value_addr, &hashpos);
+        ix = DKIX_EMPTY;
+    }
+
+    if (ix == DKIX_EMPTY) {
+        PyDictKeyEntry *ep, *ep0;
+        value = defaultobj;
         if (mp->ma_keys->dk_usable <= 0) {
-            /* Need to resize. */
             if (insertion_resize(mp) < 0) {
                 return NULL;
             }
             find_empty_slot(mp, key, hash, &value_addr, &hashpos);
         }
-        ix = mp->ma_keys->dk_nentries;
-        Py_INCREF(defaultobj);
+        ep0 = DK_ENTRIES(mp->ma_keys);
+        ep = &ep0[mp->ma_keys->dk_nentries];
+        dk_set_index(mp->ma_keys, hashpos, mp->ma_keys->dk_nentries);
         Py_INCREF(key);
-        MAINTAIN_TRACKING(mp, key, defaultobj);
-        dk_set_index(mp->ma_keys, hashpos, ix);
-        ep = &DK_ENTRIES(mp->ma_keys)[ix];
+        Py_INCREF(value);
+        MAINTAIN_TRACKING(mp, key, value);
         ep->me_key = key;
         ep->me_hash = hash;
         if (mp->ma_values) {
-            mp->ma_values[ix] = val;
+            assert(mp->ma_values[mp->ma_keys->dk_nentries] == NULL);
+            mp->ma_values[mp->ma_keys->dk_nentries] = value;
         }
         else {
-            ep->me_value = val;
+            ep->me_value = value;
         }
+        mp->ma_used++;
+        mp->ma_version_tag = DICT_NEXT_VERSION();
         mp->ma_keys->dk_usable--;
         mp->ma_keys->dk_nentries++;
+        assert(mp->ma_keys->dk_usable >= 0);
+    }
+    else if (*value_addr == NULL) {
+        value = defaultobj;
+        assert(_PyDict_HasSplitTable(mp));
+        assert(ix == mp->ma_used);
+        Py_INCREF(value);
+        MAINTAIN_TRACKING(mp, key, value);
+        *value_addr = value;
         mp->ma_used++;
         mp->ma_version_tag = DICT_NEXT_VERSION();
-        assert(_PyDict_CheckConsistency(mp));
     }
     else {
-        val = *value_addr;
+        value = *value_addr;
     }
-    return val;
+
+    assert(_PyDict_CheckConsistency(mp));
+    return value;
 }
 
 static PyObject *

-- 
Repository URL: https://hg.python.org/cpython


More information about the Python-checkins mailing list