[issue12650] Subprocess leaks fd upon kill()

Charles-François Natali report at bugs.python.org
Thu Jul 28 23:30:25 CEST 2011


Charles-François Natali <neologix at free.fr> added the comment:

Alright.
I tested this on default, and couldn't reproduce the FD leak.
It turned out to be due to another bug, affecting only the code path
which calls pure C _posixsubprocess (which is the only implementation
left in 3.3, but 3.2 still has the old pure-Python version).
The code just forgets to set Popen._child_created to true after
fork(), so when Popen.__del__() gets called before the process has
exited, the object is not added to the _active list, and gets
deallocated immediately. While this accidentaly "fixes" the FD leak,
this has another - worse - side effet: the process remains a zombie.

I'm thus attaching three patches, with tests:
- one for 2.7, which fixes the original problem (i.e. remove the
process from _active once exited, even if it got killed by a signal)
- one for default, which also sets _child_created to True after fork()
- another one for 3.2, which does the same thing as the one for
default (but the code is a little different because 3.2 has both
pure-Python and C implementation)

Reviews welcome!

----------
keywords: +patch
Added file: http://bugs.python.org/file22787/issue_12650_default.diff
Added file: http://bugs.python.org/file22788/issue_12650_27.diff
Added file: http://bugs.python.org/file22789/issue_12650_32.diff

_______________________________________
Python tracker <report at bugs.python.org>
<http://bugs.python.org/issue12650>
_______________________________________
-------------- next part --------------
diff -r ccce01988603 Lib/subprocess.py
--- a/Lib/subprocess.py	Thu Jul 28 09:55:13 2011 -0700
+++ b/Lib/subprocess.py	Thu Jul 28 22:51:39 2011 +0200
@@ -424,12 +424,16 @@
 except:
     MAXFD = 256
 
+# This lists holds Popen instances for which the underlying process had not
+# exited at the time its __del__ method got called: those processes are wait()ed
+# for synchronously from _cleanup() when a new Popen object is created, to avoid
+# zombie processes.
 _active = []
 
 def _cleanup():
     for inst in _active[:]:
         res = inst._internal_poll(_deadstate=sys.maxsize)
-        if res is not None and res >= 0:
+        if res is not None:
             try:
                 _active.remove(inst)
             except ValueError:
@@ -1272,6 +1276,7 @@
                             errread, errwrite,
                             errpipe_read, errpipe_write,
                             restore_signals, start_new_session, preexec_fn)
+                    self._child_created = True
                 finally:
                     # be sure the FD is closed no matter what
                     os.close(errpipe_write)
diff -r ccce01988603 Lib/test/test_subprocess.py
--- a/Lib/test/test_subprocess.py	Thu Jul 28 09:55:13 2011 -0700
+++ b/Lib/test/test_subprocess.py	Thu Jul 28 22:51:39 2011 +0200
@@ -1622,6 +1622,63 @@
             if dir is not None:
                 os.rmdir(dir)
 
+    def test_zombie_fast_process_del(self):
+        # Issue #12650: on Unix, if Popen.__del__() was called before the
+        # process exited, it wouldn't be added to subprocess._active, and would
+        # remain a zombie.
+        # spawn a Popen, and delete its reference before it exits
+        p = subprocess.Popen([sys.executable, "-c",
+                              'import sys, time;'
+                              'time.sleep(0.2)'],
+                             stdout=subprocess.PIPE,
+                             stderr=subprocess.PIPE)
+        ident = id(p)
+        pid = p.pid
+        del p
+        # check that p is in the active processes list
+        self.assertIn(ident, [id(o) for o in subprocess._active])
+
+        # sleep a little to let the process exit, and create a new Popen: this
+        # should trigger the wait() of p
+        time.sleep(1)
+        with self.assertRaises(EnvironmentError) as c:
+            with subprocess.Popen(['nonexisting_i_hope'],
+                                  stdout=subprocess.PIPE,
+                                  stderr=subprocess.PIPE) as proc:
+                pass
+        # p should have been wait()ed on, and removed from the _active list
+        self.assertRaises(OSError, os.waitpid, pid, 0)
+        self.assertNotIn(ident, [id(o) for o in subprocess._active])
+
+    def test_leak_fast_process_del_killed(self):
+        # Issue #12650: on Unix, if Popen.__del__() was called before the
+        # process exited, and the process got killed by a signal, it would never
+        # be removed from subprocess._active, which triggered a FD and memory
+        # leak.
+        # spawn a Popen, delete its reference and kill it
+        p = subprocess.Popen([sys.executable, "-c",
+                              'import time;'
+                              'time.sleep(3)'],
+                             stdout=subprocess.PIPE,
+                             stderr=subprocess.PIPE)
+        ident = id(p)
+        pid = p.pid
+        del p
+        os.kill(pid, signal.SIGKILL)
+        # check that p is in the active processes list
+        self.assertIn(ident, [id(o) for o in subprocess._active])
+
+        # let some time for the process to exit, and create a new Popen: this
+        # should trigger the wait() of p
+        time.sleep(0.2)
+        with self.assertRaises(EnvironmentError) as c:
+            with subprocess.Popen(['nonexisting_i_hope'],
+                                  stdout=subprocess.PIPE,
+                                  stderr=subprocess.PIPE) as proc:
+                pass
+        # p should have been wait()ed on, and removed from the _active list
+        self.assertRaises(OSError, os.waitpid, pid, 0)
+        self.assertNotIn(ident, [id(o) for o in subprocess._active])
 
 @unittest.skipUnless(getattr(subprocess, '_has_poll', False),
                      "poll system call not supported")
-------------- next part --------------
diff -r f15442543e24 Lib/subprocess.py
--- a/Lib/subprocess.py	Thu Jul 28 22:30:27 2011 +0800
+++ b/Lib/subprocess.py	Thu Jul 28 23:15:12 2011 +0200
@@ -460,7 +460,7 @@
 def _cleanup():
     for inst in _active[:]:
         res = inst._internal_poll(_deadstate=sys.maxint)
-        if res is not None and res >= 0:
+        if res is not None:
             try:
                 _active.remove(inst)
             except ValueError:
diff -r f15442543e24 Lib/test/test_subprocess.py
--- a/Lib/test/test_subprocess.py	Thu Jul 28 22:30:27 2011 +0800
+++ b/Lib/test/test_subprocess.py	Thu Jul 28 23:15:12 2011 +0200
@@ -1080,6 +1080,63 @@
                          subprocess._eintr_retry_call(fake_os_func, 666))
         self.assertEqual([(256, 999), (666,), (666,)], record_calls)
 
+    def test_zombie_fast_process_del(self):
+        # Issue #12650: on Unix, if Popen.__del__() was called before the
+        # process exited, it wouldn't be added to subprocess._active, and would
+        # remain a zombie.
+        # spawn a Popen, and delete its reference before it exits
+        p = subprocess.Popen([sys.executable, "-c",
+                              'import sys, time;'
+                              'time.sleep(0.2)'],
+                             stdout=subprocess.PIPE,
+                             stderr=subprocess.PIPE)
+        ident = id(p)
+        pid = p.pid
+        del p
+        # check that p is in the active processes list
+        self.assertIn(ident, [id(o) for o in subprocess._active])
+
+        # sleep a little to let the process exit, and create a new Popen: this
+        # should trigger the wait() of p
+        time.sleep(1)
+        with self.assertRaises(EnvironmentError) as c:
+            with subprocess.Popen(['nonexisting_i_hope'],
+                                  stdout=subprocess.PIPE,
+                                  stderr=subprocess.PIPE) as proc:
+                pass
+        # p should have been wait()ed on, and removed from the _active list
+        self.assertRaises(OSError, os.waitpid, pid, 0)
+        self.assertNotIn(ident, [id(o) for o in subprocess._active])
+
+    def test_leak_fast_process_del_killed(self):
+        # Issue #12650: on Unix, if Popen.__del__() was called before the
+        # process exited, and the process got killed by a signal, it would never
+        # be removed from subprocess._active, which triggered a FD and memory
+        # leak.
+        # spawn a Popen, delete its reference and kill it
+        p = subprocess.Popen([sys.executable, "-c",
+                              'import time;'
+                              'time.sleep(3)'],
+                             stdout=subprocess.PIPE,
+                             stderr=subprocess.PIPE)
+        ident = id(p)
+        pid = p.pid
+        del p
+        os.kill(pid, signal.SIGKILL)
+        # check that p is in the active processes list
+        self.assertIn(ident, [id(o) for o in subprocess._active])
+
+        # let some time for the process to exit, and create a new Popen: this
+        # should trigger the wait() of p
+        time.sleep(0.2)
+        with self.assertRaises(EnvironmentError) as c:
+            with subprocess.Popen(['nonexisting_i_hope'],
+                                  stdout=subprocess.PIPE,
+                                  stderr=subprocess.PIPE) as proc:
+                pass
+        # p should have been wait()ed on, and removed from the _active list
+        self.assertRaises(OSError, os.waitpid, pid, 0)
+        self.assertNotIn(ident, [id(o) for o in subprocess._active])
 
 @unittest.skipUnless(mswindows, "mswindows only")
 class CommandsWithSpaces (BaseTestCase):
-------------- next part --------------
diff -r 3e26c9033306 Lib/subprocess.py
--- a/Lib/subprocess.py	Thu Jul 28 22:32:49 2011 +0800
+++ b/Lib/subprocess.py	Thu Jul 28 23:17:57 2011 +0200
@@ -429,12 +429,16 @@
 except:
     MAXFD = 256
 
+# This lists holds Popen instances for which the underlying process had not
+# exited at the time its __del__ method got called: those processes are wait()ed
+# for synchronously from _cleanup() when a new Popen object is created, to avoid
+# zombie processes.
 _active = []
 
 def _cleanup():
     for inst in _active[:]:
         res = inst._internal_poll(_deadstate=sys.maxsize)
-        if res is not None and res >= 0:
+        if res is not None:
             try:
                 _active.remove(inst)
             except ValueError:
@@ -1191,6 +1195,7 @@
                                 errread, errwrite,
                                 errpipe_read, errpipe_write,
                                 restore_signals, start_new_session, preexec_fn)
+                        self._child_created = True
                     else:
                         # Pure Python implementation: It is not thread safe.
                         # This implementation may deadlock in the child if your
diff -r 3e26c9033306 Lib/test/test_subprocess.py
--- a/Lib/test/test_subprocess.py	Thu Jul 28 22:32:49 2011 +0800
+++ b/Lib/test/test_subprocess.py	Thu Jul 28 23:17:57 2011 +0200
@@ -1525,6 +1525,63 @@
             if dir is not None:
                 os.rmdir(dir)
 
+    def test_zombie_fast_process_del(self):
+        # Issue #12650: on Unix, if Popen.__del__() was called before the
+        # process exited, it wouldn't be added to subprocess._active, and would
+        # remain a zombie.
+        # spawn a Popen, and delete its reference before it exits
+        p = subprocess.Popen([sys.executable, "-c",
+                              'import sys, time;'
+                              'time.sleep(0.2)'],
+                             stdout=subprocess.PIPE,
+                             stderr=subprocess.PIPE)
+        ident = id(p)
+        pid = p.pid
+        del p
+        # check that p is in the active processes list
+        self.assertIn(ident, [id(o) for o in subprocess._active])
+
+        # sleep a little to let the process exit, and create a new Popen: this
+        # should trigger the wait() of p
+        time.sleep(1)
+        with self.assertRaises(EnvironmentError) as c:
+            with subprocess.Popen(['nonexisting_i_hope'],
+                                  stdout=subprocess.PIPE,
+                                  stderr=subprocess.PIPE) as proc:
+                pass
+        # p should have been wait()ed on, and removed from the _active list
+        self.assertRaises(OSError, os.waitpid, pid, 0)
+        self.assertNotIn(ident, [id(o) for o in subprocess._active])
+
+    def test_leak_fast_process_del_killed(self):
+        # Issue #12650: on Unix, if Popen.__del__() was called before the
+        # process exited, and the process got killed by a signal, it would never
+        # be removed from subprocess._active, which triggered a FD and memory
+        # leak.
+        # spawn a Popen, delete its reference and kill it
+        p = subprocess.Popen([sys.executable, "-c",
+                              'import time;'
+                              'time.sleep(3)'],
+                             stdout=subprocess.PIPE,
+                             stderr=subprocess.PIPE)
+        ident = id(p)
+        pid = p.pid
+        del p
+        os.kill(pid, signal.SIGKILL)
+        # check that p is in the active processes list
+        self.assertIn(ident, [id(o) for o in subprocess._active])
+
+        # let some time for the process to exit, and create a new Popen: this
+        # should trigger the wait() of p
+        time.sleep(0.2)
+        with self.assertRaises(EnvironmentError) as c:
+            with subprocess.Popen(['nonexisting_i_hope'],
+                                  stdout=subprocess.PIPE,
+                                  stderr=subprocess.PIPE) as proc:
+                pass
+        # p should have been wait()ed on, and removed from the _active list
+        self.assertRaises(OSError, os.waitpid, pid, 0)
+        self.assertNotIn(ident, [id(o) for o in subprocess._active])
 
 @unittest.skipUnless(getattr(subprocess, '_has_poll', False),
                      "poll system call not supported")


More information about the Python-bugs-list mailing list