[Python-checkins] cpython: Issue #9260: A finer-grained import lock.
antoine.pitrou
python-checkins at python.org
Thu May 17 19:03:04 CEST 2012
http://hg.python.org/cpython/rev/edb9ce3a6c2e
changeset: 77013:edb9ce3a6c2e
user: Antoine Pitrou <solipsis at pitrou.net>
date: Thu May 17 18:55:59 2012 +0200
summary:
Issue #9260: A finer-grained import lock.
Most of the import sequence now uses per-module locks rather than the
global import lock, eliminating well-known issues with threads and imports.
files:
Doc/c-api/import.rst | 14 +-
Doc/library/imp.rst | 31 ++-
Lib/importlib/_bootstrap.py | 191 +++++++++++++++++-
Lib/importlib/test/test_locks.py | 115 +++++++++++
Lib/pydoc.py | 2 +-
Lib/test/lock_tests.py | 12 +-
Lib/test/test_pkg.py | 2 +
Lib/test/test_threaded_import.py | 19 +-
Lib/token.py | 2 +-
Misc/NEWS | 4 +
Python/import.c | 88 ++++----
Python/importlib.h | Bin
12 files changed, 398 insertions(+), 82 deletions(-)
diff --git a/Doc/c-api/import.rst b/Doc/c-api/import.rst
--- a/Doc/c-api/import.rst
+++ b/Doc/c-api/import.rst
@@ -30,13 +30,13 @@
.. c:function:: PyObject* PyImport_ImportModuleNoBlock(const char *name)
- This version of :c:func:`PyImport_ImportModule` does not block. It's intended
- to be used in C functions that import other modules to execute a function.
- The import may block if another thread holds the import lock. The function
- :c:func:`PyImport_ImportModuleNoBlock` never blocks. It first tries to fetch
- the module from sys.modules and falls back to :c:func:`PyImport_ImportModule`
- unless the lock is held, in which case the function will raise an
- :exc:`ImportError`.
+ This function is a deprecated alias of :c:func:`PyImport_ImportModule`.
+
+ .. versionchanged:: 3.3
+ This function used to fail immediately when the import lock was held
+ by another thread. In Python 3.3 though, the locking scheme switched
+ to per-module locks for most purposes, so this function's special
+ behaviour isn't needed anymore.
.. c:function:: PyObject* PyImport_ImportModuleEx(char *name, PyObject *globals, PyObject *locals, PyObject *fromlist)
diff --git a/Doc/library/imp.rst b/Doc/library/imp.rst
--- a/Doc/library/imp.rst
+++ b/Doc/library/imp.rst
@@ -112,18 +112,29 @@
Return ``True`` if the import lock is currently held, else ``False``. On
platforms without threads, always return ``False``.
- On platforms with threads, a thread executing an import holds an internal lock
- until the import is complete. This lock blocks other threads from doing an
- import until the original import completes, which in turn prevents other threads
- from seeing incomplete module objects constructed by the original thread while
- in the process of completing its import (and the imports, if any, triggered by
- that).
+ On platforms with threads, a thread executing an import first holds a
+ global import lock, then sets up a per-module lock for the rest of the
+ import. This blocks other threads from importing the same module until
+ the original import completes, preventing other threads from seeing
+ incomplete module objects constructed by the original thread. An
+ exception is made for circular imports, which by construction have to
+ expose an incomplete module object at some point.
+
+ .. note::
+ Locking semantics of imports are an implementation detail which may
+ vary from release to release. However, Python ensures that circular
+ imports work without any deadlocks.
+
+ .. versionchanged:: 3.3
+ In Python 3.3, the locking scheme has changed to per-module locks for
+ the most part.
.. function:: acquire_lock()
- Acquire the interpreter's import lock for the current thread. This lock should
- be used by import hooks to ensure thread-safety when importing modules.
+ Acquire the interpreter's global import lock for the current thread.
+ This lock should be used by import hooks to ensure thread-safety when
+ importing modules.
Once a thread has acquired the import lock, the same thread may acquire it
again without blocking; the thread must release it once for each time it has
@@ -134,8 +145,8 @@
.. function:: release_lock()
- Release the interpreter's import lock. On platforms without threads, this
- function does nothing.
+ Release the interpreter's global import lock. On platforms without
+ threads, this function does nothing.
.. function:: reload(module)
diff --git a/Lib/importlib/_bootstrap.py b/Lib/importlib/_bootstrap.py
--- a/Lib/importlib/_bootstrap.py
+++ b/Lib/importlib/_bootstrap.py
@@ -159,6 +159,145 @@
return type(_io)(name)
+# Module-level locking ########################################################
+
+# A dict mapping module names to weakrefs of _ModuleLock instances
+_module_locks = {}
+# A dict mapping thread ids to _ModuleLock instances
+_blocking_on = {}
+
+
+class _DeadlockError(RuntimeError):
+ pass
+
+
+class _ModuleLock:
+ """A recursive lock implementation which is able to detect deadlocks
+ (e.g. thread 1 trying to take locks A then B, and thread 2 trying to
+ take locks B then A).
+ """
+
+ def __init__(self, name):
+ self.lock = _thread.allocate_lock()
+ self.wakeup = _thread.allocate_lock()
+ self.name = name
+ self.owner = None
+ self.count = 0
+ self.waiters = 0
+
+ def has_deadlock(self):
+ # Deadlock avoidance for concurrent circular imports.
+ me = _thread.get_ident()
+ tid = self.owner
+ while True:
+ lock = _blocking_on.get(tid)
+ if lock is None:
+ return False
+ tid = lock.owner
+ if tid == me:
+ return True
+
+ def acquire(self):
+ """
+ Acquire the module lock. If a potential deadlock is detected,
+ a _DeadlockError is raised.
+ Otherwise, the lock is always acquired and True is returned.
+ """
+ tid = _thread.get_ident()
+ _blocking_on[tid] = self
+ try:
+ while True:
+ with self.lock:
+ if self.count == 0 or self.owner == tid:
+ self.owner = tid
+ self.count += 1
+ return True
+ if self.has_deadlock():
+ raise _DeadlockError("deadlock detected by %r" % self)
+ if self.wakeup.acquire(False):
+ self.waiters += 1
+ # Wait for a release() call
+ self.wakeup.acquire()
+ self.wakeup.release()
+ finally:
+ del _blocking_on[tid]
+
+ def release(self):
+ tid = _thread.get_ident()
+ with self.lock:
+ if self.owner != tid:
+ raise RuntimeError("cannot release un-acquired lock")
+ assert self.count > 0
+ self.count -= 1
+ if self.count == 0:
+ self.owner = None
+ if self.waiters:
+ self.waiters -= 1
+ self.wakeup.release()
+
+ def __repr__(self):
+ return "_ModuleLock(%r) at %d" % (self.name, id(self))
+
+
+class _DummyModuleLock:
+ """A simple _ModuleLock equivalent for Python builds without
+ multi-threading support."""
+
+ def __init__(self, name):
+ self.name = name
+ self.count = 0
+
+ def acquire(self):
+ self.count += 1
+ return True
+
+ def release(self):
+ if self.count == 0:
+ raise RuntimeError("cannot release un-acquired lock")
+ self.count -= 1
+
+ def __repr__(self):
+ return "_DummyModuleLock(%r) at %d" % (self.name, id(self))
+
+
+# The following two functions are for consumption by Python/import.c.
+
+def _get_module_lock(name):
+ """Get or create the module lock for a given module name.
+
+ Should only be called with the import lock taken."""
+ lock = None
+ if name in _module_locks:
+ lock = _module_locks[name]()
+ if lock is None:
+ if _thread is None:
+ lock = _DummyModuleLock(name)
+ else:
+ lock = _ModuleLock(name)
+ def cb(_):
+ del _module_locks[name]
+ _module_locks[name] = _weakref.ref(lock, cb)
+ return lock
+
+def _lock_unlock_module(name):
+ """Release the global import lock, and acquires then release the
+ module lock for a given module name.
+ This is used to ensure a module is completely initialized, in the
+ event it is being imported by another thread.
+
+ Should only be called with the import lock taken."""
+ lock = _get_module_lock(name)
+ _imp.release_lock()
+ try:
+ lock.acquire()
+ except _DeadlockError:
+ # Concurrent circular import, we'll accept a partially initialized
+ # module object.
+ pass
+ else:
+ lock.release()
+
+
# Finder/loader utility code ##################################################
_PYCACHE = '__pycache__'
@@ -264,12 +403,15 @@
else:
module.__package__ = fullname.rpartition('.')[0]
try:
+ module.__initializing__ = True
# If __package__ was not set above, __import__() will do it later.
return fxn(self, module, *args, **kwargs)
except:
if not is_reload:
del sys.modules[fullname]
raise
+ finally:
+ module.__initializing__ = False
_wrap(module_for_loader_wrapper, fxn)
return module_for_loader_wrapper
@@ -932,7 +1074,8 @@
if not sys.meta_path:
_warnings.warn('sys.meta_path is empty', ImportWarning)
for finder in sys.meta_path:
- loader = finder.find_module(name, path)
+ with _ImportLockContext():
+ loader = finder.find_module(name, path)
if loader is not None:
# The parent import may have already imported this module.
if name not in sys.modules:
@@ -962,8 +1105,7 @@
_ERR_MSG = 'No module named {!r}'
-def _find_and_load(name, import_):
- """Find and load the module."""
+def _find_and_load_unlocked(name, import_):
path = None
parent = name.rpartition('.')[0]
if parent:
@@ -1009,6 +1151,19 @@
return module
+def _find_and_load(name, import_):
+ """Find and load the module, and release the import lock."""
+ try:
+ lock = _get_module_lock(name)
+ finally:
+ _imp.release_lock()
+ lock.acquire()
+ try:
+ return _find_and_load_unlocked(name, import_)
+ finally:
+ lock.release()
+
+
def _gcd_import(name, package=None, level=0):
"""Import and return the module based on its name, the package the call is
being made from, and the level adjustment.
@@ -1021,17 +1176,17 @@
_sanity_check(name, package, level)
if level > 0:
name = _resolve_name(name, package, level)
- with _ImportLockContext():
- try:
- module = sys.modules[name]
- if module is None:
- message = ("import of {} halted; "
- "None in sys.modules".format(name))
- raise ImportError(message, name=name)
- return module
- except KeyError:
- pass # Don't want to chain the exception
+ _imp.acquire_lock()
+ if name not in sys.modules:
return _find_and_load(name, _gcd_import)
+ module = sys.modules[name]
+ if module is None:
+ _imp.release_lock()
+ message = ("import of {} halted; "
+ "None in sys.modules".format(name))
+ raise ImportError(message, name=name)
+ _lock_unlock_module(name)
+ return module
def _handle_fromlist(module, fromlist, import_):
@@ -1149,7 +1304,17 @@
continue
else:
raise ImportError('importlib requires posix or nt')
+
+ try:
+ thread_module = BuiltinImporter.load_module('_thread')
+ except ImportError:
+ # Python was built without threads
+ thread_module = None
+ weakref_module = BuiltinImporter.load_module('_weakref')
+
setattr(self_module, '_os', os_module)
+ setattr(self_module, '_thread', thread_module)
+ setattr(self_module, '_weakref', weakref_module)
setattr(self_module, 'path_sep', path_sep)
setattr(self_module, 'path_separators', set(path_separators))
# Constants
diff --git a/Lib/importlib/test/test_locks.py b/Lib/importlib/test/test_locks.py
new file mode 100644
--- /dev/null
+++ b/Lib/importlib/test/test_locks.py
@@ -0,0 +1,115 @@
+from importlib import _bootstrap
+import time
+import unittest
+import weakref
+
+from test import support
+
+try:
+ import threading
+except ImportError:
+ threading = None
+else:
+ from test import lock_tests
+
+
+LockType = _bootstrap._ModuleLock
+DeadlockError = _bootstrap._DeadlockError
+
+
+if threading is not None:
+ class ModuleLockAsRLockTests(lock_tests.RLockTests):
+ locktype = staticmethod(lambda: LockType("some_lock"))
+
+ # _is_owned() unsupported
+ test__is_owned = None
+ # acquire(blocking=False) unsupported
+ test_try_acquire = None
+ test_try_acquire_contended = None
+ # `with` unsupported
+ test_with = None
+ # acquire(timeout=...) unsupported
+ test_timeout = None
+ # _release_save() unsupported
+ test_release_save_unacquired = None
+
+else:
+ class ModuleLockAsRLockTests(unittest.TestCase):
+ pass
+
+
+ at unittest.skipUnless(threading, "threads needed for this test")
+class DeadlockAvoidanceTests(unittest.TestCase):
+
+ def run_deadlock_avoidance_test(self, create_deadlock):
+ NLOCKS = 10
+ locks = [LockType(str(i)) for i in range(NLOCKS)]
+ pairs = [(locks[i], locks[(i+1)%NLOCKS]) for i in range(NLOCKS)]
+ if create_deadlock:
+ NTHREADS = NLOCKS
+ else:
+ NTHREADS = NLOCKS - 1
+ barrier = threading.Barrier(NTHREADS)
+ results = []
+ def _acquire(lock):
+ """Try to acquire the lock. Return True on success, False on deadlock."""
+ try:
+ lock.acquire()
+ except DeadlockError:
+ return False
+ else:
+ return True
+ def f():
+ a, b = pairs.pop()
+ ra = _acquire(a)
+ barrier.wait()
+ rb = _acquire(b)
+ results.append((ra, rb))
+ if rb:
+ b.release()
+ if ra:
+ a.release()
+ lock_tests.Bunch(f, NTHREADS).wait_for_finished()
+ self.assertEqual(len(results), NTHREADS)
+ return results
+
+ def test_deadlock(self):
+ results = self.run_deadlock_avoidance_test(True)
+ # One of the threads detected a potential deadlock on its second
+ # acquire() call.
+ self.assertEqual(results.count((True, False)), 1)
+ self.assertEqual(results.count((True, True)), len(results) - 1)
+
+ def test_no_deadlock(self):
+ results = self.run_deadlock_avoidance_test(False)
+ self.assertEqual(results.count((True, False)), 0)
+ self.assertEqual(results.count((True, True)), len(results))
+
+
+class LifetimeTests(unittest.TestCase):
+
+ def test_lock_lifetime(self):
+ name = "xyzzy"
+ self.assertNotIn(name, _bootstrap._module_locks)
+ lock = _bootstrap._get_module_lock(name)
+ self.assertIn(name, _bootstrap._module_locks)
+ wr = weakref.ref(lock)
+ del lock
+ support.gc_collect()
+ self.assertNotIn(name, _bootstrap._module_locks)
+ self.assertIs(wr(), None)
+
+ def test_all_locks(self):
+ support.gc_collect()
+ self.assertEqual(0, len(_bootstrap._module_locks))
+
+
+ at support.reap_threads
+def test_main():
+ support.run_unittest(ModuleLockAsRLockTests,
+ DeadlockAvoidanceTests,
+ LifetimeTests)
+
+
+if __name__ == '__main__':
+ test_main()
diff --git a/Lib/pydoc.py b/Lib/pydoc.py
--- a/Lib/pydoc.py
+++ b/Lib/pydoc.py
@@ -167,7 +167,7 @@
if name in {'__builtins__', '__doc__', '__file__', '__path__',
'__module__', '__name__', '__slots__', '__package__',
'__cached__', '__author__', '__credits__', '__date__',
- '__version__', '__qualname__'}:
+ '__version__', '__qualname__', '__initializing__'}:
return 0
# Private names are hidden, but special names are displayed.
if name.startswith('__') and name.endswith('__'): return 1
diff --git a/Lib/test/lock_tests.py b/Lib/test/lock_tests.py
--- a/Lib/test/lock_tests.py
+++ b/Lib/test/lock_tests.py
@@ -247,7 +247,6 @@
# Cannot release an unacquired lock
lock = self.locktype()
self.assertRaises(RuntimeError, lock.release)
- self.assertRaises(RuntimeError, lock._release_save)
lock.acquire()
lock.acquire()
lock.release()
@@ -255,6 +254,17 @@
lock.release()
lock.release()
self.assertRaises(RuntimeError, lock.release)
+
+ def test_release_save_unacquired(self):
+ # Cannot _release_save an unacquired lock
+ lock = self.locktype()
+ self.assertRaises(RuntimeError, lock._release_save)
+ lock.acquire()
+ lock.acquire()
+ lock.release()
+ lock.acquire()
+ lock.release()
+ lock.release()
self.assertRaises(RuntimeError, lock._release_save)
def test_different_thread(self):
diff --git a/Lib/test/test_pkg.py b/Lib/test/test_pkg.py
--- a/Lib/test/test_pkg.py
+++ b/Lib/test/test_pkg.py
@@ -23,6 +23,8 @@
def fixdir(lst):
if "__builtins__" in lst:
lst.remove("__builtins__")
+ if "__initializing__" in lst:
+ lst.remove("__initializing__")
return lst
diff --git a/Lib/test/test_threaded_import.py b/Lib/test/test_threaded_import.py
--- a/Lib/test/test_threaded_import.py
+++ b/Lib/test/test_threaded_import.py
@@ -12,7 +12,7 @@
import shutil
import unittest
from test.support import (
- verbose, import_module, run_unittest, TESTFN, reap_threads)
+ verbose, import_module, run_unittest, TESTFN, reap_threads, forget)
threading = import_module('threading')
def task(N, done, done_tasks, errors):
@@ -187,7 +187,7 @@
contents = contents % {'delay': delay}
with open(os.path.join(TESTFN, name + ".py"), "wb") as f:
f.write(contents.encode('utf-8'))
- self.addCleanup(sys.modules.pop, name, None)
+ self.addCleanup(forget, name)
results = []
def import_ab():
@@ -204,6 +204,21 @@
t2.join()
self.assertEqual(set(results), {'a', 'b'})
+ def test_side_effect_import(self):
+ code = """if 1:
+ import threading
+ def target():
+ import random
+ t = threading.Thread(target=target)
+ t.start()
+ t.join()"""
+ sys.path.insert(0, os.curdir)
+ self.addCleanup(sys.path.remove, os.curdir)
+ with open(TESTFN + ".py", "wb") as f:
+ f.write(code.encode('utf-8'))
+ self.addCleanup(forget, TESTFN)
+ __import__(TESTFN)
+
@reap_threads
def test_main():
diff --git a/Lib/token.py b/Lib/token.py
--- a/Lib/token.py
+++ b/Lib/token.py
@@ -70,7 +70,7 @@
tok_name = {value: name
for name, value in globals().items()
- if isinstance(value, int)}
+ if isinstance(value, int) and not name.startswith('_')}
__all__.extend(tok_name.values())
def ISTERMINAL(x):
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,10 @@
Core and Builtins
-----------------
+- Issue #9260: A finer-grained import lock. Most of the import sequence
+ now uses per-module locks rather than the global import lock, eliminating
+ well-known issues with threads and imports.
+
- Issue #14624: UTF-16 decoding is now 3x to 4x faster on various inputs.
Patch by Serhiy Storchaka.
diff --git a/Python/import.c b/Python/import.c
--- a/Python/import.c
+++ b/Python/import.c
@@ -1370,47 +1370,7 @@
PyObject *
PyImport_ImportModuleNoBlock(const char *name)
{
- PyObject *nameobj, *modules, *result;
-#ifdef WITH_THREAD
- long me;
-#endif
-
- /* Try to get the module from sys.modules[name] */
- modules = PyImport_GetModuleDict();
- if (modules == NULL)
- return NULL;
-
- nameobj = PyUnicode_FromString(name);
- if (nameobj == NULL)
- return NULL;
- result = PyDict_GetItem(modules, nameobj);
- if (result != NULL) {
- Py_DECREF(nameobj);
- Py_INCREF(result);
- return result;
- }
- PyErr_Clear();
-#ifdef WITH_THREAD
- /* check the import lock
- * me might be -1 but I ignore the error here, the lock function
- * takes care of the problem */
- me = PyThread_get_thread_ident();
- if (import_lock_thread == -1 || import_lock_thread == me) {
- /* no thread or me is holding the lock */
- result = PyImport_Import(nameobj);
- }
- else {
- PyErr_Format(PyExc_ImportError,
- "Failed to import %R because the import lock"
- "is held by another thread.",
- nameobj);
- result = NULL;
- }
-#else
- result = PyImport_Import(nameobj);
-#endif
- Py_DECREF(nameobj);
- return result;
+ return PyImport_ImportModule(name);
}
@@ -1420,11 +1380,13 @@
int level)
{
_Py_IDENTIFIER(__import__);
+ _Py_IDENTIFIER(__initializing__);
_Py_IDENTIFIER(__package__);
_Py_IDENTIFIER(__path__);
_Py_IDENTIFIER(__name__);
_Py_IDENTIFIER(_find_and_load);
_Py_IDENTIFIER(_handle_fromlist);
+ _Py_IDENTIFIER(_lock_unlock_module);
_Py_static_string(single_dot, ".");
PyObject *abs_name = NULL;
PyObject *builtins_import = NULL;
@@ -1607,16 +1569,48 @@
goto error_with_unlock;
}
else if (mod != NULL) {
+ PyObject *value;
+ int initializing = 0;
+
Py_INCREF(mod);
+ /* Only call _bootstrap._lock_unlock_module() if __initializing__ is true. */
+ value = _PyObject_GetAttrId(mod, &PyId___initializing__);
+ if (value == NULL)
+ PyErr_Clear();
+ else {
+ initializing = PyObject_IsTrue(value);
+ Py_DECREF(value);
+ if (initializing == -1)
+ PyErr_Clear();
+ }
+ if (initializing > 0) {
+ /* _bootstrap._lock_unlock_module() releases the import lock */
+ value = _PyObject_CallMethodObjIdArgs(interp->importlib,
+ &PyId__lock_unlock_module, abs_name,
+ NULL);
+ if (value == NULL)
+ goto error;
+ Py_DECREF(value);
+ }
+ else {
+#ifdef WITH_THREAD
+ if (_PyImport_ReleaseLock() < 0) {
+ PyErr_SetString(PyExc_RuntimeError, "not holding the import lock");
+ goto error;
+ }
+#endif
+ }
}
else {
+ /* _bootstrap._find_and_load() releases the import lock */
mod = _PyObject_CallMethodObjIdArgs(interp->importlib,
&PyId__find_and_load, abs_name,
builtins_import, NULL);
if (mod == NULL) {
- goto error_with_unlock;
+ goto error;
}
}
+ /* From now on we don't hold the import lock anymore. */
if (PyObject_Not(fromlist)) {
if (level == 0 || PyUnicode_GET_LENGTH(name) > 0) {
@@ -1625,12 +1619,12 @@
PyObject *borrowed_dot = _PyUnicode_FromId(&single_dot);
if (borrowed_dot == NULL) {
- goto error_with_unlock;
+ goto error;
}
partition = PyUnicode_Partition(name, borrowed_dot);
if (partition == NULL) {
- goto error_with_unlock;
+ goto error;
}
if (PyUnicode_GET_LENGTH(PyTuple_GET_ITEM(partition, 1)) == 0) {
@@ -1638,7 +1632,7 @@
Py_DECREF(partition);
final_mod = mod;
Py_INCREF(mod);
- goto exit_with_unlock;
+ goto error;
}
front = PyTuple_GET_ITEM(partition, 0);
@@ -1657,7 +1651,7 @@
abs_name_len - cut_off);
Py_DECREF(front);
if (to_return == NULL) {
- goto error_with_unlock;
+ goto error;
}
final_mod = PyDict_GetItem(interp->modules, to_return);
@@ -1683,8 +1677,8 @@
fromlist, builtins_import,
NULL);
}
+ goto error;
- exit_with_unlock:
error_with_unlock:
#ifdef WITH_THREAD
if (_PyImport_ReleaseLock() < 0) {
diff --git a/Python/importlib.h b/Python/importlib.h
index cf5619a6c4b0587815b87145eae5bf212ec7e5f1..493cd1aac17510f62321955f9d7a45836ef23a7d
GIT binary patch
[stripped]
--
Repository URL: http://hg.python.org/cpython
More information about the Python-checkins
mailing list