[Python-checkins] cpython (3.5): Issue #28969: Fixed race condition in C implementation of functools.lru_cache.

serhiy.storchaka python-checkins at python.org
Thu Jan 12 12:47:09 EST 2017


https://hg.python.org/cpython/rev/9314aebc9674
changeset:   106109:9314aebc9674
branch:      3.5
parent:      106102:acf2e51ef746
user:        Serhiy Storchaka <storchaka at gmail.com>
date:        Thu Jan 12 18:34:33 2017 +0200
summary:
  Issue #28969: Fixed race condition in C implementation of functools.lru_cache.
KeyError could be raised when cached function with full cache was
simultaneously called from differen threads with the same uncached arguments.

files:
  Include/dictobject.h       |   1 +
  Lib/test/test_functools.py |  15 ++++++
  Misc/NEWS                  |   4 +
  Modules/_functoolsmodule.c |  56 ++++++++++++++++---------
  Objects/dictobject.c       |  31 ++++++++++---
  5 files changed, 78 insertions(+), 29 deletions(-)


diff --git a/Include/dictobject.h b/Include/dictobject.h
--- a/Include/dictobject.h
+++ b/Include/dictobject.h
@@ -102,6 +102,7 @@
 Py_ssize_t _PyDict_KeysSize(PyDictKeysObject *keys);
 Py_ssize_t _PyDict_SizeOf(PyDictObject *);
 PyObject *_PyDict_Pop(PyDictObject *, PyObject *, PyObject *);
+PyObject *_PyDict_Pop_KnownHash(PyDictObject *, PyObject *, Py_hash_t, PyObject *);
 PyObject *_PyDict_FromKeys(PyObject *, PyObject *, PyObject *);
 #define _PyDict_HasSplitTable(d) ((d)->ma_values != NULL)
 
diff --git a/Lib/test/test_functools.py b/Lib/test/test_functools.py
--- a/Lib/test/test_functools.py
+++ b/Lib/test/test_functools.py
@@ -7,6 +7,7 @@
 from random import choice
 import sys
 from test import support
+import time
 import unittest
 from weakref import proxy
 try:
@@ -1364,6 +1365,20 @@
                 pause.reset()
                 self.assertEqual(f.cache_info(), (0, (i+1)*n, m*n, i+1))
 
+    @unittest.skipUnless(threading, 'This test requires threading.')
+    def test_lru_cache_threaded3(self):
+        @self.module.lru_cache(maxsize=2)
+        def f(x):
+            time.sleep(.01)
+            return 3 * x
+        def test(i, x):
+            with self.subTest(thread=i):
+                self.assertEqual(f(x), 3 * x, i)
+        threads = [threading.Thread(target=test, args=(i, v))
+                   for i, v in enumerate([1, 2, 2, 3, 2])]
+        with support.start_threads(threads):
+            pass
+
     def test_need_for_rlock(self):
         # This will deadlock on an LRU cache that uses a regular lock
 
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -13,6 +13,10 @@
 Library
 -------
 
+- Issue #28969: Fixed race condition in C implementation of functools.lru_cache.
+  KeyError could be raised when cached function with full cache was
+  simultaneously called from differen threads with the same uncached arguments.
+
 - Issue #29142: In urllib.request, suffixes in no_proxy environment variable with
   leading dots could match related hostnames again (e.g. .b.c matches a.b.c).
   Patch by Milan Oberkirch.
diff --git a/Modules/_functoolsmodule.c b/Modules/_functoolsmodule.c
--- a/Modules/_functoolsmodule.c
+++ b/Modules/_functoolsmodule.c
@@ -864,42 +864,56 @@
     }
     if (self->full && self->root.next != &self->root) {
         /* Use the oldest item to store the new key and result. */
-        PyObject *oldkey, *oldresult;
+        PyObject *oldkey, *oldresult, *popresult;
         /* Extricate the oldest item. */
         link = self->root.next;
         lru_cache_extricate_link(link);
         /* Remove it from the cache.
            The cache dict holds one reference to the link,
            and the linked list holds yet one reference to it. */
-        if (_PyDict_DelItem_KnownHash(self->cache, link->key,
-                                      link->hash) < 0) {
+        popresult = _PyDict_Pop_KnownHash((PyDictObject *)self->cache,
+                                          link->key, link->hash,
+                                          Py_None);
+        if (popresult == Py_None) {
+            /* Getting here means that this same key was added to the
+               cache while the lock was released.  Since the link
+               update is already done, we need only return the
+               computed result and update the count of misses. */
+            Py_DECREF(popresult);
+            Py_DECREF(link);
+            Py_DECREF(key);
+        }
+        else if (popresult == NULL) {
             lru_cache_append_link(self, link);
             Py_DECREF(key);
             Py_DECREF(result);
             return NULL;
         }
-        /* Keep a reference to the old key and old result to
-           prevent their ref counts from going to zero during the
-           update. That will prevent potentially arbitrary object
-           clean-up code (i.e. __del__) from running while we're
-           still adjusting the links. */
-        oldkey = link->key;
-        oldresult = link->result;
+        else {
+            Py_DECREF(popresult);
+            /* Keep a reference to the old key and old result to
+               prevent their ref counts from going to zero during the
+               update. That will prevent potentially arbitrary object
+               clean-up code (i.e. __del__) from running while we're
+               still adjusting the links. */
+            oldkey = link->key;
+            oldresult = link->result;
 
-        link->hash = hash;
-        link->key = key;
-        link->result = result;
-        if (_PyDict_SetItem_KnownHash(self->cache, key, (PyObject *)link,
-                                      hash) < 0) {
-            Py_DECREF(link);
+            link->hash = hash;
+            link->key = key;
+            link->result = result;
+            if (_PyDict_SetItem_KnownHash(self->cache, key, (PyObject *)link,
+                                          hash) < 0) {
+                Py_DECREF(link);
+                Py_DECREF(oldkey);
+                Py_DECREF(oldresult);
+                return NULL;
+            }
+            lru_cache_append_link(self, link);
+            Py_INCREF(result); /* for return */
             Py_DECREF(oldkey);
             Py_DECREF(oldresult);
-            return NULL;
         }
-        lru_cache_append_link(self, link);
-        Py_INCREF(result); /* for return */
-        Py_DECREF(oldkey);
-        Py_DECREF(oldresult);
     } else {
         /* Put result in a new link at the front of the queue. */
         link = (lru_list_elem *)PyObject_GC_New(lru_list_elem,
diff --git a/Objects/dictobject.c b/Objects/dictobject.c
--- a/Objects/dictobject.c
+++ b/Objects/dictobject.c
@@ -1475,9 +1475,8 @@
 
 /* Internal version of dict.pop(). */
 PyObject *
-_PyDict_Pop(PyDictObject *mp, PyObject *key, PyObject *deflt)
+_PyDict_Pop_KnownHash(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject *deflt)
 {
-    Py_hash_t hash;
     PyObject *old_value, *old_key;
     PyDictKeyEntry *ep;
     PyObject **value_addr;
@@ -1490,12 +1489,6 @@
         _PyErr_SetKeyError(key);
         return NULL;
     }
-    if (!PyUnicode_CheckExact(key) ||
-        (hash = ((PyASCIIObject *) key)->hash) == -1) {
-        hash = PyObject_Hash(key);
-        if (hash == -1)
-            return NULL;
-    }
     ep = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr);
     if (ep == NULL)
         return NULL;
@@ -1520,6 +1513,28 @@
     return old_value;
 }
 
+PyObject *
+_PyDict_Pop(PyDictObject *mp, PyObject *key, PyObject *deflt)
+{
+    Py_hash_t hash;
+
+    if (mp->ma_used == 0) {
+        if (deflt) {
+            Py_INCREF(deflt);
+            return deflt;
+        }
+        _PyErr_SetKeyError(key);
+        return NULL;
+    }
+    if (!PyUnicode_CheckExact(key) ||
+        (hash = ((PyASCIIObject *) key)->hash) == -1) {
+        hash = PyObject_Hash(key);
+        if (hash == -1)
+            return NULL;
+    }
+    return _PyDict_Pop_KnownHash(mp, key, hash, deflt);
+}
+
 /* Internal version of dict.from_keys().  It is subclass-friendly. */
 PyObject *
 _PyDict_FromKeys(PyObject *cls, PyObject *iterable, PyObject *value)

-- 
Repository URL: https://hg.python.org/cpython


More information about the Python-checkins mailing list