[Python-checkins] cpython (merge 3.5 -> 3.6): Issue #28427: old keys should not remove new values from

antoine.pitrou python-checkins at python.org
Tue Dec 27 08:37:23 EST 2016


https://hg.python.org/cpython/rev/97d6616b2d22
changeset:   105852:97d6616b2d22
branch:      3.6
parent:      105848:f26c16aba11e
parent:      105851:b8b0718d424f
user:        Antoine Pitrou <solipsis at pitrou.net>
date:        Tue Dec 27 14:23:43 2016 +0100
summary:
  Issue #28427: old keys should not remove new values from
WeakValueDictionary when collecting from another thread.

files:
  Include/dictobject.h        |   2 +
  Lib/test/test_weakref.py    |  12 +++
  Lib/weakref.py              |  34 +++++++-
  Misc/NEWS                   |   3 +
  Modules/_weakref.c          |  41 ++++++++++
  Modules/clinic/_weakref.c.h |  32 ++++++++-
  Objects/dictobject.c        |  95 ++++++++++++++++++++-----
  7 files changed, 195 insertions(+), 24 deletions(-)


diff --git a/Include/dictobject.h b/Include/dictobject.h
--- a/Include/dictobject.h
+++ b/Include/dictobject.h
@@ -88,6 +88,8 @@
 #ifndef Py_LIMITED_API
 PyAPI_FUNC(int) _PyDict_DelItem_KnownHash(PyObject *mp, PyObject *key,
                                           Py_hash_t hash);
+PyAPI_FUNC(int) _PyDict_DelItemIf(PyObject *mp, PyObject *key,
+                                  int (*predicate)(PyObject *value));
 #endif
 PyAPI_FUNC(void) PyDict_Clear(PyObject *mp);
 PyAPI_FUNC(int) PyDict_Next(
diff --git a/Lib/test/test_weakref.py b/Lib/test/test_weakref.py
--- a/Lib/test/test_weakref.py
+++ b/Lib/test/test_weakref.py
@@ -1676,6 +1676,18 @@
                 x = d.pop(10, 10)
                 self.assertIsNot(x, None)  # we never put None in there!
 
+    def test_threaded_weak_valued_consistency(self):
+        # Issue #28427: old keys should not remove new values from
+        # WeakValueDictionary when collecting from another thread.
+        d = weakref.WeakValueDictionary()
+        with collect_in_thread():
+            for i in range(200000):
+                o = RefCycle()
+                d[10] = o
+                # o is still alive, so the dict can't be empty
+                self.assertEqual(len(d), 1)
+                o = None  # lose ref
+
 
 from test import mapping_tests
 
diff --git a/Lib/weakref.py b/Lib/weakref.py
--- a/Lib/weakref.py
+++ b/Lib/weakref.py
@@ -16,7 +16,8 @@
      proxy,
      CallableProxyType,
      ProxyType,
-     ReferenceType)
+     ReferenceType,
+     _remove_dead_weakref)
 
 from _weakrefset import WeakSet, _IterationGuard
 
@@ -111,7 +112,9 @@
                 if self._iterating:
                     self._pending_removals.append(wr.key)
                 else:
-                    del self.data[wr.key]
+                    # Atomic removal is necessary since this function
+                    # can be called asynchronously by the GC
+                    _remove_dead_weakref(d, wr.key)
         self._remove = remove
         # A list of keys to be removed
         self._pending_removals = []
@@ -125,9 +128,12 @@
         # We shouldn't encounter any KeyError, because this method should
         # always be called *before* mutating the dict.
         while l:
-            del d[l.pop()]
+            key = l.pop()
+            _remove_dead_weakref(d, key)
 
     def __getitem__(self, key):
+        if self._pending_removals:
+            self._commit_removals()
         o = self.data[key]()
         if o is None:
             raise KeyError(key)
@@ -140,9 +146,13 @@
         del self.data[key]
 
     def __len__(self):
-        return len(self.data) - len(self._pending_removals)
+        if self._pending_removals:
+            self._commit_removals()
+        return len(self.data)
 
     def __contains__(self, key):
+        if self._pending_removals:
+            self._commit_removals()
         try:
             o = self.data[key]()
         except KeyError:
@@ -158,6 +168,8 @@
         self.data[key] = KeyedRef(value, self._remove, key)
 
     def copy(self):
+        if self._pending_removals:
+            self._commit_removals()
         new = WeakValueDictionary()
         for key, wr in self.data.items():
             o = wr()
@@ -169,6 +181,8 @@
 
     def __deepcopy__(self, memo):
         from copy import deepcopy
+        if self._pending_removals:
+            self._commit_removals()
         new = self.__class__()
         for key, wr in self.data.items():
             o = wr()
@@ -177,6 +191,8 @@
         return new
 
     def get(self, key, default=None):
+        if self._pending_removals:
+            self._commit_removals()
         try:
             wr = self.data[key]
         except KeyError:
@@ -190,6 +206,8 @@
                 return o
 
     def items(self):
+        if self._pending_removals:
+            self._commit_removals()
         with _IterationGuard(self):
             for k, wr in self.data.items():
                 v = wr()
@@ -197,6 +215,8 @@
                     yield k, v
 
     def keys(self):
+        if self._pending_removals:
+            self._commit_removals()
         with _IterationGuard(self):
             for k, wr in self.data.items():
                 if wr() is not None:
@@ -214,10 +234,14 @@
         keep the values around longer than needed.
 
         """
+        if self._pending_removals:
+            self._commit_removals()
         with _IterationGuard(self):
             yield from self.data.values()
 
     def values(self):
+        if self._pending_removals:
+            self._commit_removals()
         with _IterationGuard(self):
             for wr in self.data.values():
                 obj = wr()
@@ -290,6 +314,8 @@
         keep the values around longer than needed.
 
         """
+        if self._pending_removals:
+            self._commit_removals()
         return list(self.data.values())
 
 
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -40,6 +40,9 @@
 Library
 -------
 
+- Issue #28427: old keys should not remove new values from
+  WeakValueDictionary when collecting from another thread.
+
 - Issue 28923: Remove editor artifacts from Tix.py.
 
 - Issue #29055:  Neaten-up empty population error on random.choice()
diff --git a/Modules/_weakref.c b/Modules/_weakref.c
--- a/Modules/_weakref.c
+++ b/Modules/_weakref.c
@@ -35,6 +35,46 @@
 }
 
 
+static int
+is_dead_weakref(PyObject *value)
+{
+    if (!PyWeakref_Check(value)) {
+        PyErr_SetString(PyExc_TypeError, "not a weakref");
+        return -1;
+    }
+    return PyWeakref_GET_OBJECT(value) == Py_None;
+}
+
+/*[clinic input]
+
+_weakref._remove_dead_weakref -> object
+
+  dct: object(subclass_of='&PyDict_Type')
+  key: object
+  /
+
+Atomically remove key from dict if it points to a dead weakref.
+[clinic start generated code]*/
+
+static PyObject *
+_weakref__remove_dead_weakref_impl(PyObject *module, PyObject *dct,
+                                   PyObject *key)
+/*[clinic end generated code: output=d9ff53061fcb875c input=19fc91f257f96a1d]*/
+{
+    if (_PyDict_DelItemIf(dct, key, is_dead_weakref) < 0) {
+        if (PyErr_ExceptionMatches(PyExc_KeyError))
+            /* This function is meant to allow safe weak-value dicts
+               with GC in another thread (see issue #28427), so it's
+               ok if the key doesn't exist anymore.
+               */
+            PyErr_Clear();
+        else
+            return NULL;
+    }
+    Py_RETURN_NONE;
+}
+
+
 PyDoc_STRVAR(weakref_getweakrefs__doc__,
 "getweakrefs(object) -- return a list of all weak reference objects\n"
 "that point to 'object'.");
@@ -88,6 +128,7 @@
 static PyMethodDef
 weakref_functions[] =  {
     _WEAKREF_GETWEAKREFCOUNT_METHODDEF
+    _WEAKREF__REMOVE_DEAD_WEAKREF_METHODDEF
     {"getweakrefs",     weakref_getweakrefs,            METH_O,
      weakref_getweakrefs__doc__},
     {"proxy",           weakref_proxy,                  METH_VARARGS,
diff --git a/Modules/clinic/_weakref.c.h b/Modules/clinic/_weakref.c.h
--- a/Modules/clinic/_weakref.c.h
+++ b/Modules/clinic/_weakref.c.h
@@ -29,4 +29,34 @@
 exit:
     return return_value;
 }
-/*[clinic end generated code: output=e1ad587147323e19 input=a9049054013a1b77]*/
+
+PyDoc_STRVAR(_weakref__remove_dead_weakref__doc__,
+"_remove_dead_weakref($module, dct, key, /)\n"
+"--\n"
+"\n"
+"Atomically remove key from dict if it points to a dead weakref.");
+
+#define _WEAKREF__REMOVE_DEAD_WEAKREF_METHODDEF    \
+    {"_remove_dead_weakref", (PyCFunction)_weakref__remove_dead_weakref, METH_VARARGS, _weakref__remove_dead_weakref__doc__},
+
+static PyObject *
+_weakref__remove_dead_weakref_impl(PyObject *module, PyObject *dct,
+                                   PyObject *key);
+
+static PyObject *
+_weakref__remove_dead_weakref(PyObject *module, PyObject *args)
+{
+    PyObject *return_value = NULL;
+    PyObject *dct;
+    PyObject *key;
+
+    if (!PyArg_ParseTuple(args, "O!O:_remove_dead_weakref",
+        &PyDict_Type, &dct, &key)) {
+        goto exit;
+    }
+    return_value = _weakref__remove_dead_weakref_impl(module, dct, key);
+
+exit:
+    return return_value;
+}
+/*[clinic end generated code: output=e860dd818a44bc9b input=a9049054013a1b77]*/
diff --git a/Objects/dictobject.c b/Objects/dictobject.c
--- a/Objects/dictobject.c
+++ b/Objects/dictobject.c
@@ -1591,11 +1591,34 @@
     return insertdict(mp, key, hash, value);
 }
 
+static int
+delitem_common(PyDictObject *mp, Py_ssize_t hashpos, Py_ssize_t ix,
+               PyObject **value_addr)
+{
+    PyObject *old_key, *old_value;
+    PyDictKeyEntry *ep;
+
+    old_value = *value_addr;
+    assert(old_value != NULL);
+    *value_addr = NULL;
+    mp->ma_used--;
+    mp->ma_version_tag = DICT_NEXT_VERSION();
+    ep = &DK_ENTRIES(mp->ma_keys)[ix];
+    dk_set_index(mp->ma_keys, hashpos, DKIX_DUMMY);
+    ENSURE_ALLOWS_DELETIONS(mp);
+    old_key = ep->me_key;
+    ep->me_key = NULL;
+    Py_DECREF(old_key);
+    Py_DECREF(old_value);
+
+    assert(_PyDict_CheckConsistency(mp));
+    return 0;
+}
+
 int
 PyDict_DelItem(PyObject *op, PyObject *key)
 {
     Py_hash_t hash;
-
     assert(key);
     if (!PyUnicode_CheckExact(key) ||
         (hash = ((PyASCIIObject *) key)->hash) == -1) {
@@ -1612,8 +1635,6 @@
 {
     Py_ssize_t hashpos, ix;
     PyDictObject *mp;
-    PyDictKeyEntry *ep;
-    PyObject *old_key, *old_value;
     PyObject **value_addr;
 
     if (!PyDict_Check(op)) {
@@ -1640,24 +1661,60 @@
         ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr, &hashpos);
         assert(ix >= 0);
     }
-
-    old_value = *value_addr;
-    assert(old_value != NULL);
-    *value_addr = NULL;
-    mp->ma_used--;
-    mp->ma_version_tag = DICT_NEXT_VERSION();
-    ep = &DK_ENTRIES(mp->ma_keys)[ix];
-    dk_set_index(mp->ma_keys, hashpos, DKIX_DUMMY);
-    ENSURE_ALLOWS_DELETIONS(mp);
-    old_key = ep->me_key;
-    ep->me_key = NULL;
-    Py_DECREF(old_key);
-    Py_DECREF(old_value);
-
-    assert(_PyDict_CheckConsistency(mp));
-    return 0;
+    return delitem_common(mp, hashpos, ix, value_addr);
 }
 
+/* This function promises that the predicate -> deletion sequence is atomic
+ * (i.e. protected by the GIL), assuming the predicate itself doesn't
+ * release the GIL.
+ */
+int
+_PyDict_DelItemIf(PyObject *op, PyObject *key,
+                  int (*predicate)(PyObject *value))
+{
+    Py_ssize_t hashpos, ix;
+    PyDictObject *mp;
+    Py_hash_t hash;
+    PyObject **value_addr;
+    int res;
+
+    if (!PyDict_Check(op)) {
+        PyErr_BadInternalCall();
+        return -1;
+    }
+    assert(key);
+    hash = PyObject_Hash(key);
+    if (hash == -1)
+        return -1;
+    mp = (PyDictObject *)op;
+    ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr, &hashpos);
+    if (ix == DKIX_ERROR)
+        return -1;
+    if (ix == DKIX_EMPTY || *value_addr == NULL) {
+        _PyErr_SetKeyError(key);
+        return -1;
+    }
+    assert(dk_get_index(mp->ma_keys, hashpos) == ix);
+
+    // Split table doesn't allow deletion.  Combine it.
+    if (_PyDict_HasSplitTable(mp)) {
+        if (dictresize(mp, DK_SIZE(mp->ma_keys))) {
+            return -1;
+        }
+        ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr, &hashpos);
+        assert(ix >= 0);
+    }
+
+    res = predicate(*value_addr);
+    if (res == -1)
+        return -1;
+    if (res > 0)
+        return delitem_common(mp, hashpos, ix, value_addr);
+    else
+        return 0;
+}
+
+
 void
 PyDict_Clear(PyObject *op)
 {

-- 
Repository URL: https://hg.python.org/cpython


More information about the Python-checkins mailing list