[Python-checkins] gh-104372: Cleanup _posixsubprocess `make_inheritable` for async signal safety and no GIL requirement (#104518)

gpshead webhook-mailer at python.org
Wed May 17 12:00:04 EDT 2023


https://github.com/python/cpython/commit/c649df63e0d052044a4660101d5769ff46ae9234
commit: c649df63e0d052044a4660101d5769ff46ae9234
branch: main
author: Gregory P. Smith <greg at krypto.org>
committer: gpshead <greg at krypto.org>
date: 2023-05-17T08:59:45-07:00
summary:

gh-104372: Cleanup _posixsubprocess `make_inheritable` for async signal safety and no GIL requirement (#104518)

Move all of the Python C API calls into the parent process up front
instead of doing PyLong_AsLong and PyErr_Occurred and PyTuple_GET from
the post-fork/vfork child process.

Much of this was long overdue. We shouldn't have been using PyTuple and
PyLong APIs within all of these low level functions anyways.

files:
A Misc/NEWS.d/next/Library/2023-05-17-08-01-36.gh-issue-104372.jpoWs6.rst
M Modules/_posixsubprocess.c

diff --git a/Misc/NEWS.d/next/Library/2023-05-17-08-01-36.gh-issue-104372.jpoWs6.rst b/Misc/NEWS.d/next/Library/2023-05-17-08-01-36.gh-issue-104372.jpoWs6.rst
new file mode 100644
index 000000000000..c228f503aab4
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2023-05-17-08-01-36.gh-issue-104372.jpoWs6.rst
@@ -0,0 +1 @@
+Refactored the ``_posixsubprocess`` internals to avoid Python C API usage between fork and exec when marking ``pass_fds=`` file descriptors inheritable.
diff --git a/Modules/_posixsubprocess.c b/Modules/_posixsubprocess.c
index 2bf83db0e228..75965d338d59 100644
--- a/Modules/_posixsubprocess.c
+++ b/Modules/_posixsubprocess.c
@@ -160,16 +160,17 @@ _sanity_check_python_fd_sequence(PyObject *fd_sequence)
 
 /* Is fd found in the sorted Python Sequence? */
 static int
-_is_fd_in_sorted_fd_sequence(int fd, PyObject *fd_sequence)
+_is_fd_in_sorted_fd_sequence(int fd, int *fd_sequence,
+                             Py_ssize_t fd_sequence_len)
 {
     /* Binary search. */
     Py_ssize_t search_min = 0;
-    Py_ssize_t search_max = PyTuple_GET_SIZE(fd_sequence) - 1;
+    Py_ssize_t search_max = fd_sequence_len - 1;
     if (search_max < 0)
         return 0;
     do {
         long middle = (search_min + search_max) / 2;
-        long middle_fd = PyLong_AsLong(PyTuple_GET_ITEM(fd_sequence, middle));
+        long middle_fd = fd_sequence[middle];
         if (fd == middle_fd)
             return 1;
         if (fd > middle_fd)
@@ -180,8 +181,18 @@ _is_fd_in_sorted_fd_sequence(int fd, PyObject *fd_sequence)
     return 0;
 }
 
+/*
+ * Do all the Python C API calls in the parent process to turn the pass_fds
+ * "py_fds_to_keep" tuple into a C array.  The caller owns allocation and
+ * freeing of the array.
+ *
+ * On error an unknown number of array elements may have been filled in.
+ * A Python exception has been set when an error is returned.
+ *
+ * Returns: -1 on error, 0 on success.
+ */
 static int
-make_inheritable(PyObject *py_fds_to_keep, int errpipe_write)
+convert_fds_to_keep_to_c(PyObject *py_fds_to_keep, int *c_fds_to_keep)
 {
     Py_ssize_t i, len;
 
@@ -189,15 +200,37 @@ make_inheritable(PyObject *py_fds_to_keep, int errpipe_write)
     for (i = 0; i < len; ++i) {
         PyObject* fdobj = PyTuple_GET_ITEM(py_fds_to_keep, i);
         long fd = PyLong_AsLong(fdobj);
-        assert(!PyErr_Occurred());
-        assert(0 <= fd && fd <= INT_MAX);
+        if (PyErr_Occurred()) {
+            return -1;
+        }
+        if (fd < 0 || fd > INT_MAX) {
+            PyErr_SetString(PyExc_ValueError,
+                            "fd out of range in fds_to_keep.");
+            return -1;
+        }
+        c_fds_to_keep[i] = (int)fd;
+    }
+    return 0;
+}
+
+
+/* This function must be async-signal-safe as it is called from child_exec()
+ * after fork() or vfork().
+ */
+static int
+make_inheritable(int *c_fds_to_keep, Py_ssize_t len, int errpipe_write)
+{
+    Py_ssize_t i;
+
+    for (i = 0; i < len; ++i) {
+        int fd = c_fds_to_keep[i];
         if (fd == errpipe_write) {
-            /* errpipe_write is part of py_fds_to_keep. It must be closed at
+            /* errpipe_write is part of fds_to_keep. It must be closed at
                exec(), but kept open in the child process until exec() is
                called. */
             continue;
         }
-        if (_Py_set_inheritable_async_safe((int)fd, 1, NULL) < 0)
+        if (_Py_set_inheritable_async_safe(fd, 1, NULL) < 0)
             return -1;
     }
     return 0;
@@ -233,7 +266,7 @@ safe_get_max_fd(void)
 
 
 /* Close all file descriptors in the given range except for those in
- * py_fds_to_keep by invoking closer on each subrange.
+ * fds_to_keep by invoking closer on each subrange.
  *
  * If end_fd == -1, it's guessed via safe_get_max_fd(), but it isn't
  * possible to know for sure what the max fd to go up to is for
@@ -243,19 +276,18 @@ safe_get_max_fd(void)
 static int
 _close_range_except(int start_fd,
                     int end_fd,
-                    PyObject *py_fds_to_keep,
+                    int *fds_to_keep,
+                    Py_ssize_t fds_to_keep_len,
                     int (*closer)(int, int))
 {
     if (end_fd == -1) {
         end_fd = Py_MIN(safe_get_max_fd(), INT_MAX);
     }
-    Py_ssize_t num_fds_to_keep = PyTuple_GET_SIZE(py_fds_to_keep);
     Py_ssize_t keep_seq_idx;
-    /* As py_fds_to_keep is sorted we can loop through the list closing
+    /* As fds_to_keep is sorted we can loop through the list closing
      * fds in between any in the keep list falling within our range. */
-    for (keep_seq_idx = 0; keep_seq_idx < num_fds_to_keep; ++keep_seq_idx) {
-        PyObject* py_keep_fd = PyTuple_GET_ITEM(py_fds_to_keep, keep_seq_idx);
-        int keep_fd = PyLong_AsLong(py_keep_fd);
+    for (keep_seq_idx = 0; keep_seq_idx < fds_to_keep_len; ++keep_seq_idx) {
+        int keep_fd = fds_to_keep[keep_seq_idx];
         if (keep_fd < start_fd)
             continue;
         if (closer(start_fd, keep_fd - 1) != 0)
@@ -295,7 +327,7 @@ _brute_force_closer(int first, int last)
 }
 
 /* Close all open file descriptors in the range from start_fd and higher
- * Do not close any in the sorted py_fds_to_keep list.
+ * Do not close any in the sorted fds_to_keep list.
  *
  * This version is async signal safe as it does not make any unsafe C library
  * calls, malloc calls or handle any locks.  It is _unfortunate_ to be forced
@@ -310,14 +342,16 @@ _brute_force_closer(int first, int last)
  * it with some cpp #define magic to work on other OSes as well if you want.
  */
 static void
-_close_open_fds_safe(int start_fd, PyObject* py_fds_to_keep)
+_close_open_fds_safe(int start_fd, int *fds_to_keep, Py_ssize_t fds_to_keep_len)
 {
     int fd_dir_fd;
 
     fd_dir_fd = _Py_open_noraise(FD_DIR, O_RDONLY);
     if (fd_dir_fd == -1) {
         /* No way to get a list of open fds. */
-        _close_range_except(start_fd, -1, py_fds_to_keep, _brute_force_closer);
+        _close_range_except(start_fd, -1,
+                            fds_to_keep, fds_to_keep_len,
+                            _brute_force_closer);
         return;
     } else {
         char buffer[sizeof(struct linux_dirent64)];
@@ -336,7 +370,8 @@ _close_open_fds_safe(int start_fd, PyObject* py_fds_to_keep)
                 if ((fd = _pos_int_from_ascii(entry->d_name)) < 0)
                     continue;  /* Not a number. */
                 if (fd != fd_dir_fd && fd >= start_fd &&
-                    !_is_fd_in_sorted_fd_sequence(fd, py_fds_to_keep)) {
+                    !_is_fd_in_sorted_fd_sequence(fd, fds_to_keep,
+                                                  fds_to_keep_len)) {
                     close(fd);
                 }
             }
@@ -357,7 +392,7 @@ _unsafe_closer(int first, int last)
 }
 
 /* Close all open file descriptors from start_fd and higher.
- * Do not close any in the sorted py_fds_to_keep tuple.
+ * Do not close any in the sorted fds_to_keep tuple.
  *
  * This function violates the strict use of async signal safe functions. :(
  * It calls opendir(), readdir() and closedir().  Of these, the one most
@@ -370,11 +405,13 @@ _unsafe_closer(int first, int last)
  *   http://womble.decadent.org.uk/readdir_r-advisory.html
  */
 static void
-_close_open_fds_maybe_unsafe(int start_fd, PyObject* py_fds_to_keep)
+_close_open_fds_maybe_unsafe(int start_fd, int *fds_to_keep,
+                             Py_ssize_t fds_to_keep_len)
 {
     DIR *proc_fd_dir;
 #ifndef HAVE_DIRFD
-    while (_is_fd_in_sorted_fd_sequence(start_fd, py_fds_to_keep)) {
+    while (_is_fd_in_sorted_fd_sequence(start_fd, fds_to_keep,
+                                        fds_to_keep_len)) {
         ++start_fd;
     }
     /* Close our lowest fd before we call opendir so that it is likely to
@@ -393,7 +430,8 @@ _close_open_fds_maybe_unsafe(int start_fd, PyObject* py_fds_to_keep)
         proc_fd_dir = opendir(FD_DIR);
     if (!proc_fd_dir) {
         /* No way to get a list of open fds. */
-        _close_range_except(start_fd, -1, py_fds_to_keep, _unsafe_closer);
+        _close_range_except(start_fd, -1, fds_to_keep, fds_to_keep_len,
+                            _unsafe_closer);
     } else {
         struct dirent *dir_entry;
 #ifdef HAVE_DIRFD
@@ -407,14 +445,16 @@ _close_open_fds_maybe_unsafe(int start_fd, PyObject* py_fds_to_keep)
             if ((fd = _pos_int_from_ascii(dir_entry->d_name)) < 0)
                 continue;  /* Not a number. */
             if (fd != fd_used_by_opendir && fd >= start_fd &&
-                !_is_fd_in_sorted_fd_sequence(fd, py_fds_to_keep)) {
+                !_is_fd_in_sorted_fd_sequence(fd, fds_to_keep,
+                                              fds_to_keep_len)) {
                 close(fd);
             }
             errno = 0;
         }
         if (errno) {
             /* readdir error, revert behavior. Highly Unlikely. */
-            _close_range_except(start_fd, -1, py_fds_to_keep, _unsafe_closer);
+            _close_range_except(start_fd, -1, fds_to_keep, fds_to_keep_len,
+                                _unsafe_closer);
         }
         closedir(proc_fd_dir);
     }
@@ -442,16 +482,16 @@ _close_range_closer(int first, int last)
 #endif
 
 static void
-_close_open_fds(int start_fd, PyObject* py_fds_to_keep)
+_close_open_fds(int start_fd, int *fds_to_keep, Py_ssize_t fds_to_keep_len)
 {
 #ifdef HAVE_ASYNC_SAFE_CLOSE_RANGE
     if (_close_range_except(
-            start_fd, INT_MAX, py_fds_to_keep,
+            start_fd, INT_MAX, fds_to_keep, fds_to_keep_len,
             _close_range_closer) == 0) {
         return;
     }
 #endif
-    _close_open_fds_fallback(start_fd, py_fds_to_keep);
+    _close_open_fds_fallback(start_fd, fds_to_keep, fds_to_keep_len);
 }
 
 #ifdef VFORK_USABLE
@@ -544,7 +584,7 @@ child_exec(char *const exec_array[],
            Py_ssize_t extra_group_size, const gid_t *extra_groups,
            uid_t uid, int child_umask,
            const void *child_sigmask,
-           PyObject *py_fds_to_keep,
+           int *fds_to_keep, Py_ssize_t fds_to_keep_len,
            PyObject *preexec_fn,
            PyObject *preexec_fn_args_tuple)
 {
@@ -554,7 +594,7 @@ child_exec(char *const exec_array[],
     /* Buffer large enough to hold a hex integer.  We can't malloc. */
     char hex_errno[sizeof(saved_errno)*2+1];
 
-    if (make_inheritable(py_fds_to_keep, errpipe_write) < 0)
+    if (make_inheritable(fds_to_keep, fds_to_keep_len, errpipe_write) < 0)
         goto error;
 
     /* Close parent's pipe ends. */
@@ -676,7 +716,7 @@ child_exec(char *const exec_array[],
     /* close FDs after executing preexec_fn, which might open FDs */
     if (close_fds) {
         /* TODO HP-UX could use pstat_getproc() if anyone cares about it. */
-        _close_open_fds(3, py_fds_to_keep);
+        _close_open_fds(3, fds_to_keep, fds_to_keep_len);
     }
 
     /* This loop matches the Lib/os.py _execvpe()'s PATH search when */
@@ -750,7 +790,7 @@ do_fork_exec(char *const exec_array[],
              Py_ssize_t extra_group_size, const gid_t *extra_groups,
              uid_t uid, int child_umask,
              const void *child_sigmask,
-             PyObject *py_fds_to_keep,
+             int *fds_to_keep, Py_ssize_t fds_to_keep_len,
              PyObject *preexec_fn,
              PyObject *preexec_fn_args_tuple)
 {
@@ -801,7 +841,8 @@ do_fork_exec(char *const exec_array[],
                close_fds, restore_signals, call_setsid, pgid_to_set,
                gid, extra_group_size, extra_groups,
                uid, child_umask, child_sigmask,
-               py_fds_to_keep, preexec_fn, preexec_fn_args_tuple);
+               fds_to_keep, fds_to_keep_len,
+               preexec_fn, preexec_fn_args_tuple);
     _exit(255);
     return 0;  /* Dead code to avoid a potential compiler warning. */
 }
@@ -881,6 +922,8 @@ subprocess_fork_exec_impl(PyObject *module, PyObject *process_args,
     Py_ssize_t extra_group_size = 0;
     int need_after_fork = 0;
     int saved_errno = 0;
+    int *c_fds_to_keep = NULL;
+    Py_ssize_t fds_to_keep_len = PyTuple_GET_SIZE(py_fds_to_keep);
 
     PyInterpreterState *interp = PyInterpreterState_Get();
     if ((preexec_fn != Py_None) && (interp != PyInterpreterState_Main())) {
@@ -1031,6 +1074,15 @@ subprocess_fork_exec_impl(PyObject *module, PyObject *process_args,
 #endif /* HAVE_SETREUID */
     }
 
+    c_fds_to_keep = PyMem_RawMalloc(fds_to_keep_len * sizeof(int));
+    if (c_fds_to_keep == NULL) {
+        PyErr_SetString(PyExc_MemoryError, "failed to malloc c_fds_to_keep");
+        goto cleanup;
+    }
+    if (convert_fds_to_keep_to_c(py_fds_to_keep, c_fds_to_keep) < 0) {
+        goto cleanup;
+    }
+
     /* This must be the last thing done before fork() because we do not
      * want to call PyOS_BeforeFork() if there is any chance of another
      * error leading to the cleanup: code without calling fork(). */
@@ -1073,7 +1125,8 @@ subprocess_fork_exec_impl(PyObject *module, PyObject *process_args,
                        close_fds, restore_signals, call_setsid, pgid_to_set,
                        gid, extra_group_size, extra_groups,
                        uid, child_umask, old_sigmask,
-                       py_fds_to_keep, preexec_fn, preexec_fn_args_tuple);
+                       c_fds_to_keep, fds_to_keep_len,
+                       preexec_fn, preexec_fn_args_tuple);
 
     /* Parent (original) process */
     if (pid == (pid_t)-1) {
@@ -1103,6 +1156,10 @@ subprocess_fork_exec_impl(PyObject *module, PyObject *process_args,
         PyOS_AfterFork_Parent();
 
 cleanup:
+    if (c_fds_to_keep != NULL) {
+        PyMem_RawFree(c_fds_to_keep);
+    }
+
     if (saved_errno != 0) {
         errno = saved_errno;
         /* We can't call this above as PyOS_AfterFork_Parent() calls back



More information about the Python-checkins mailing list