[Python-checkins] bpo-37531: Fix regrtest timeout for subprocesses (GH-15072)

Victor Stinner webhook-mailer at python.org
Wed Aug 14 08:18:55 EDT 2019


https://github.com/python/cpython/commit/b0c8369c603633f445ccbb5ca7a8742145ff9eec
commit: b0c8369c603633f445ccbb5ca7a8742145ff9eec
branch: master
author: Victor Stinner <vstinner at redhat.com>
committer: GitHub <noreply at github.com>
date: 2019-08-14T14:18:51+02:00
summary:

bpo-37531: Fix regrtest timeout for subprocesses (GH-15072)

Co-Authored-By: Joannah Nanjekye <joannah.nanjekye at ibm.com>

files:
A Misc/NEWS.d/next/Library/2019-07-09-19-38-26.bpo-37531.GX7s8S.rst
M Lib/test/libregrtest/main.py
M Lib/test/libregrtest/runtest.py
M Lib/test/libregrtest/runtest_mp.py
M Lib/test/test_regrtest.py

diff --git a/Lib/test/libregrtest/main.py b/Lib/test/libregrtest/main.py
index 87278d0c76a2..3cfbb4017dfb 100644
--- a/Lib/test/libregrtest/main.py
+++ b/Lib/test/libregrtest/main.py
@@ -14,7 +14,7 @@
 from test.libregrtest.runtest import (
     findtests, runtest, get_abs_module,
     STDTESTS, NOTTESTS, PASSED, FAILED, ENV_CHANGED, SKIPPED, RESOURCE_DENIED,
-    INTERRUPTED, CHILD_ERROR, TEST_DID_NOT_RUN,
+    INTERRUPTED, CHILD_ERROR, TEST_DID_NOT_RUN, TIMEOUT,
     PROGRESS_MIN_TIME, format_test_result, is_failed)
 from test.libregrtest.setup import setup_tests
 from test.libregrtest.pgo import setup_pgo_tests
@@ -115,6 +115,8 @@ def accumulate_result(self, result, rerun=False):
             self.run_no_tests.append(test_name)
         elif ok == INTERRUPTED:
             self.interrupted = True
+        elif ok == TIMEOUT:
+            self.bad.append(test_name)
         else:
             raise ValueError("invalid test result: %r" % ok)
 
diff --git a/Lib/test/libregrtest/runtest.py b/Lib/test/libregrtest/runtest.py
index a43b7666cd1e..e7dce180cb37 100644
--- a/Lib/test/libregrtest/runtest.py
+++ b/Lib/test/libregrtest/runtest.py
@@ -13,7 +13,7 @@
 from test import support
 from test.libregrtest.refleak import dash_R, clear_caches
 from test.libregrtest.save_env import saved_test_environment
-from test.libregrtest.utils import print_warning
+from test.libregrtest.utils import format_duration, print_warning
 
 
 # Test result constants.
@@ -25,6 +25,7 @@
 INTERRUPTED = -4
 CHILD_ERROR = -5   # error in a child process
 TEST_DID_NOT_RUN = -6
+TIMEOUT = -7
 
 _FORMAT_TEST_RESULT = {
     PASSED: '%s passed',
@@ -35,6 +36,7 @@
     INTERRUPTED: '%s interrupted',
     CHILD_ERROR: '%s crashed',
     TEST_DID_NOT_RUN: '%s run no tests',
+    TIMEOUT: '%s timed out',
 }
 
 # Minimum duration of a test to display its duration or to mention that
@@ -75,7 +77,10 @@ def is_failed(result, ns):
 
 def format_test_result(result):
     fmt = _FORMAT_TEST_RESULT.get(result.result, "%s")
-    return fmt % result.test_name
+    text = fmt % result.test_name
+    if result.result == TIMEOUT:
+        text = '%s (%s)' % (text, format_duration(result.test_time))
+    return text
 
 
 def findtestdir(path=None):
@@ -179,6 +184,7 @@ def runtest(ns, test_name):
         FAILED           test failed
         PASSED           test passed
         EMPTY_TEST_SUITE test ran no subtests.
+        TIMEOUT          test timed out.
 
     If ns.xmlpath is not None, xml_data is a list containing each
     generated testsuite element.
diff --git a/Lib/test/libregrtest/runtest_mp.py b/Lib/test/libregrtest/runtest_mp.py
index aa2409b4ef79..eed643291162 100644
--- a/Lib/test/libregrtest/runtest_mp.py
+++ b/Lib/test/libregrtest/runtest_mp.py
@@ -13,7 +13,7 @@
 
 from test.libregrtest.runtest import (
     runtest, INTERRUPTED, CHILD_ERROR, PROGRESS_MIN_TIME,
-    format_test_result, TestResult, is_failed)
+    format_test_result, TestResult, is_failed, TIMEOUT)
 from test.libregrtest.setup import setup_tests
 from test.libregrtest.utils import format_duration
 
@@ -103,11 +103,12 @@ class ExitThread(Exception):
 
 
 class MultiprocessThread(threading.Thread):
-    def __init__(self, pending, output, ns):
+    def __init__(self, pending, output, ns, timeout):
         super().__init__()
         self.pending = pending
         self.output = output
         self.ns = ns
+        self.timeout = timeout
         self.current_test_name = None
         self.start_time = None
         self._popen = None
@@ -126,6 +127,12 @@ def __repr__(self):
         return '<%s>' % ' '.join(info)
 
     def kill(self):
+        """
+        Kill the current process (if any).
+
+        This method can be called by the thread running the process,
+        or by another thread.
+        """
         self._killed = True
 
         popen = self._popen
@@ -136,6 +143,13 @@ def kill(self):
         # does not hang
         popen.stdout.close()
         popen.stderr.close()
+        popen.wait()
+
+    def mp_result_error(self, test_name, error_type, stdout='', stderr='',
+                        err_msg=None):
+        test_time = time.monotonic() - self.start_time
+        result = TestResult(test_name, error_type, test_time, None)
+        return MultiprocessResult(result, stdout, stderr, err_msg)
 
     def _runtest(self, test_name):
         try:
@@ -154,7 +168,19 @@ def _runtest(self, test_name):
                         raise ExitThread
 
                     try:
+                        stdout, stderr = popen.communicate(timeout=self.timeout)
+                    except subprocess.TimeoutExpired:
+                        if self._killed:
+                            # kill() has been called: communicate() fails
+                            # on reading closed stdout/stderr
+                            raise ExitThread
+
+                        popen.kill()
                         stdout, stderr = popen.communicate()
+                        self.kill()
+
+                        return self.mp_result_error(test_name, TIMEOUT,
+                                                    stdout, stderr)
                     except OSError:
                         if self._killed:
                             # kill() has been called: communicate() fails
@@ -163,7 +189,6 @@ def _runtest(self, test_name):
                         raise
                 except:
                     self.kill()
-                    popen.wait()
                     raise
 
             retcode = popen.wait()
@@ -191,8 +216,7 @@ def _runtest(self, test_name):
                     err_msg = "Failed to parse worker JSON: %s" % exc
 
         if err_msg is not None:
-            test_time = time.monotonic() - self.start_time
-            result = TestResult(test_name, CHILD_ERROR, test_time, None)
+            return self.mp_result_error(test_name, CHILD_ERROR, stdout, stderr, err_msg)
 
         return MultiprocessResult(result, stdout, stderr, err_msg)
 
@@ -236,13 +260,16 @@ def __init__(self, regrtest):
         self.output = queue.Queue()
         self.pending = MultiprocessIterator(self.regrtest.tests)
         if self.ns.timeout is not None:
-            self.test_timeout = self.ns.timeout * 1.5
+            self.worker_timeout = self.ns.timeout * 1.5
+            self.main_timeout = self.ns.timeout * 2.0
         else:
-            self.test_timeout = None
+            self.worker_timeout = None
+            self.main_timeout = None
         self.workers = None
 
     def start_workers(self):
-        self.workers = [MultiprocessThread(self.pending, self.output, self.ns)
+        self.workers = [MultiprocessThread(self.pending, self.output,
+                                           self.ns, self.worker_timeout)
                         for _ in range(self.ns.use_mp)]
         print("Run tests in parallel using %s child processes"
               % len(self.workers))
@@ -274,8 +301,8 @@ def _get_result(self):
                 return None
 
         while True:
-            if self.test_timeout is not None:
-                faulthandler.dump_traceback_later(self.test_timeout, exit=True)
+            if self.main_timeout is not None:
+                faulthandler.dump_traceback_later(self.main_timeout, exit=True)
 
             # wait for a thread
             timeout = max(PROGRESS_UPDATE, PROGRESS_MIN_TIME)
@@ -343,7 +370,7 @@ def run_tests(self):
             print()
             self.regrtest.interrupted = True
         finally:
-            if self.test_timeout is not None:
+            if self.main_timeout is not None:
                 faulthandler.cancel_dump_traceback_later()
 
         # a test failed (and --failfast is set) or all tests completed
diff --git a/Lib/test/test_regrtest.py b/Lib/test/test_regrtest.py
index fdab8c8ef5d1..931f125eb8f9 100644
--- a/Lib/test/test_regrtest.py
+++ b/Lib/test/test_regrtest.py
@@ -1154,6 +1154,33 @@ def test_garbage(self):
                                   env_changed=[testname],
                                   fail_env_changed=True)
 
+    def test_multiprocessing_timeout(self):
+        code = textwrap.dedent(r"""
+            import time
+            import unittest
+            try:
+                import faulthandler
+            except ImportError:
+                faulthandler = None
+
+            class Tests(unittest.TestCase):
+                # test hangs and so should be stopped by the timeout
+                def test_sleep(self):
+                    # we want to test regrtest multiprocessing timeout,
+                    # not faulthandler timeout
+                    if faulthandler is not None:
+                        faulthandler.cancel_dump_traceback_later()
+
+                    time.sleep(60 * 5)
+        """)
+        testname = self.create_test(code=code)
+
+        output = self.run_tests("-j2", "--timeout=1.0", testname, exitcode=2)
+        self.check_executed_tests(output, [testname],
+                                  failed=testname)
+        self.assertRegex(output,
+                         re.compile('%s timed out' % testname, re.MULTILINE))
+
     def test_unraisable_exc(self):
         # --fail-env-changed must catch unraisable exception
         code = textwrap.dedent(r"""
diff --git a/Misc/NEWS.d/next/Library/2019-07-09-19-38-26.bpo-37531.GX7s8S.rst b/Misc/NEWS.d/next/Library/2019-07-09-19-38-26.bpo-37531.GX7s8S.rst
new file mode 100644
index 000000000000..aaf1052bd3c1
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2019-07-09-19-38-26.bpo-37531.GX7s8S.rst
@@ -0,0 +1,2 @@
+"python3 -m test -jN --timeout=TIMEOUT" now kills a worker process if it runs
+longer than *TIMEOUT* seconds.



More information about the Python-checkins mailing list