[Python-checkins] bpo-37531: Enhance regrtest multiprocess timeout (GH-15345)

Victor Stinner webhook-mailer at python.org
Wed Aug 21 05:59:24 EDT 2019


https://github.com/python/cpython/commit/de2d9eed8bc628533e1628b843cc4c7a5010f6e5
commit: de2d9eed8bc628533e1628b843cc4c7a5010f6e5
branch: master
author: Victor Stinner <vstinner at redhat.com>
committer: GitHub <noreply at github.com>
date: 2019-08-21T10:59:20+01:00
summary:

bpo-37531: Enhance regrtest multiprocess timeout (GH-15345)

* Write a message when killing a worker process
* Put a timeout on the second popen.communicate() call
  (after killing the process)
* Put a timeout on popen.wait() call
* Catch popen.kill() and popen.wait() exceptions

files:
A Misc/NEWS.d/next/Tests/2019-08-20-19-24-19.bpo-37531.wRoXfU.rst
M Lib/test/libregrtest/runtest_mp.py

diff --git a/Lib/test/libregrtest/runtest_mp.py b/Lib/test/libregrtest/runtest_mp.py
index eed643291162..c22479b7976f 100644
--- a/Lib/test/libregrtest/runtest_mp.py
+++ b/Lib/test/libregrtest/runtest_mp.py
@@ -126,6 +126,38 @@ def __repr__(self):
             info.append(f'pid={popen.pid}')
         return '<%s>' % ' '.join(info)
 
+    def _kill(self):
+        dt = time.monotonic() - self.start_time
+
+        popen = self._popen
+        pid = popen.pid
+        print("Kill worker process %s running for %.1f sec" % (pid, dt),
+              file=sys.stderr, flush=True)
+
+        try:
+            popen.kill()
+            return True
+        except OSError as exc:
+            print("WARNING: Failed to kill worker process %s: %r" % (pid, exc),
+                  file=sys.stderr, flush=True)
+            return False
+
+    def _close_wait(self):
+        popen = self._popen
+
+        # stdout and stderr must be closed to ensure that communicate()
+        # does not hang
+        popen.stdout.close()
+        popen.stderr.close()
+
+        try:
+            popen.wait(JOIN_TIMEOUT)
+        except (subprocess.TimeoutExpired, OSError) as exc:
+            print("WARNING: Failed to wait for worker process %s "
+                  "completion (timeout=%.1f sec): %r"
+                  % (popen.pid, JOIN_TIMEOUT, exc),
+                  file=sys.stderr, flush=True)
+
     def kill(self):
         """
         Kill the current process (if any).
@@ -135,15 +167,13 @@ def kill(self):
         """
         self._killed = True
 
-        popen = self._popen
-        if popen is None:
+        if self._popen is None:
             return
-        popen.kill()
-        # stdout and stderr must be closed to ensure that communicate()
-        # does not hang
-        popen.stdout.close()
-        popen.stderr.close()
-        popen.wait()
+
+        if not self._kill():
+            return
+
+        self._close_wait()
 
     def mp_result_error(self, test_name, error_type, stdout='', stderr='',
                         err_msg=None):
@@ -151,6 +181,23 @@ def mp_result_error(self, test_name, error_type, stdout='', stderr='',
         result = TestResult(test_name, error_type, test_time, None)
         return MultiprocessResult(result, stdout, stderr, err_msg)
 
+    def _timedout(self, test_name):
+        self._kill()
+
+        stdout = sterr = ''
+        popen = self._popen
+        try:
+            stdout, stderr = popen.communicate(timeout=JOIN_TIMEOUT)
+        except (subprocess.TimeoutExpired, OSError) as exc:
+            print("WARNING: Failed to read worker process %s output "
+                  "(timeout=%.1f sec): %r"
+                  % (popen.pid, exc, timeout),
+                  file=sys.stderr, flush=True)
+
+        self._close_wait()
+
+        return self.mp_result_error(test_name, TIMEOUT, stdout, stderr)
+
     def _runtest(self, test_name):
         try:
             self.start_time = time.monotonic()
@@ -158,7 +205,7 @@ def _runtest(self, test_name):
 
             self._popen = run_test_in_subprocess(test_name, self.ns)
             popen = self._popen
-            with popen:
+            try:
                 try:
                     if self._killed:
                         # If kill() has been called before self._popen is set,
@@ -175,12 +222,7 @@ def _runtest(self, test_name):
                             # on reading closed stdout/stderr
                             raise ExitThread
 
-                        popen.kill()
-                        stdout, stderr = popen.communicate()
-                        self.kill()
-
-                        return self.mp_result_error(test_name, TIMEOUT,
-                                                    stdout, stderr)
+                        return self._timedout(test_name)
                     except OSError:
                         if self._killed:
                             # kill() has been called: communicate() fails
@@ -190,8 +232,10 @@ def _runtest(self, test_name):
                 except:
                     self.kill()
                     raise
+            finally:
+                self._close_wait()
 
-            retcode = popen.wait()
+            retcode = popen.returncode
         finally:
             self.current_test_name = None
             self._popen = None
@@ -286,10 +330,11 @@ def wait_workers(self):
                 if not worker.is_alive():
                     break
                 dt = time.monotonic() - start_time
-                print("Wait for regrtest worker %r for %.1f sec" % (worker, dt))
+                print("Wait for regrtest worker %r for %.1f sec" % (worker, dt),
+                      flush=True)
                 if dt > JOIN_TIMEOUT:
                     print("Warning -- failed to join a regrtest worker %s"
-                          % worker)
+                          % worker, flush=True)
                     break
 
     def _get_result(self):
diff --git a/Misc/NEWS.d/next/Tests/2019-08-20-19-24-19.bpo-37531.wRoXfU.rst b/Misc/NEWS.d/next/Tests/2019-08-20-19-24-19.bpo-37531.wRoXfU.rst
new file mode 100644
index 000000000000..59500ce67a01
--- /dev/null
+++ b/Misc/NEWS.d/next/Tests/2019-08-20-19-24-19.bpo-37531.wRoXfU.rst
@@ -0,0 +1,3 @@
+Enhance regrtest multiprocess timeout: write a message when killing a worker
+process, catch popen.kill() and popen.wait() exceptions, put a timeout on the
+second call to popen.communicate().



More information about the Python-checkins mailing list