[Python-checkins] gh-92301: subprocess: Prefer close_range() to procfs-based fd closing (#92303)

gpshead webhook-mailer at python.org
Thu May 5 12:46:24 EDT 2022


https://github.com/python/cpython/commit/58573ffba0e4d3c7d6e6712169578e45d2926dbd
commit: 58573ffba0e4d3c7d6e6712169578e45d2926dbd
branch: main
author: Alexey Izbyshev <izbyshev at ispras.ru>
committer: gpshead <greg at krypto.org>
date: 2022-05-05T09:46:19-07:00
summary:

gh-92301: subprocess: Prefer close_range() to procfs-based fd closing (#92303)

#92301: subprocess: Prefer `close_range()` to procfs-based fd closing.

`close_range()` is much faster for large number of file descriptors, e.g.
4 times faster for 1000 descriptors in a Linux 5.16-based environment.

We prefer close_range() only if it's known to be async-signal-safe.

files:
A Misc/NEWS.d/next/Library/2022-05-04-11-54-37.gh-issue-92301.eqjoYX.rst
M Modules/_posixsubprocess.c

diff --git a/Misc/NEWS.d/next/Library/2022-05-04-11-54-37.gh-issue-92301.eqjoYX.rst b/Misc/NEWS.d/next/Library/2022-05-04-11-54-37.gh-issue-92301.eqjoYX.rst
new file mode 100644
index 0000000000000..b0b0502bf0a75
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2022-05-04-11-54-37.gh-issue-92301.eqjoYX.rst
@@ -0,0 +1,2 @@
+Prefer ``close_range()`` to iterating over procfs for file descriptor
+closing in :mod:`subprocess` for better performance.
diff --git a/Modules/_posixsubprocess.c b/Modules/_posixsubprocess.c
index 2440609e31bcc..21c2bd13a3e27 100644
--- a/Modules/_posixsubprocess.c
+++ b/Modules/_posixsubprocess.c
@@ -210,18 +210,23 @@ safe_get_max_fd(void)
 }
 
 
-/* Close all file descriptors in the range from start_fd and higher
- * except for those in py_fds_to_keep.  If the range defined by
- * [start_fd, safe_get_max_fd()) is large this will take a long
- * time as it calls close() on EVERY possible fd.
+/* Close all file descriptors in the given range except for those in
+ * py_fds_to_keep by invoking closer on each subrange.
  *
- * It isn't possible to know for sure what the max fd to go up to
- * is for processes with the capability of raising their maximum.
+ * If end_fd == -1, it's guessed via safe_get_max_fd(), but it isn't
+ * possible to know for sure what the max fd to go up to is for
+ * processes with the capability of raising their maximum, or in case
+ * a process opened a high fd and then lowered its maximum.
  */
-static void
-_close_fds_by_brute_force(long start_fd, PyObject *py_fds_to_keep)
+static int
+_close_range_except(int start_fd,
+                    int end_fd,
+                    PyObject *py_fds_to_keep,
+                    int (*closer)(int, int))
 {
-    long end_fd = safe_get_max_fd();
+    if (end_fd == -1) {
+        end_fd = Py_MIN(safe_get_max_fd(), INT_MAX);
+    }
     Py_ssize_t num_fds_to_keep = PyTuple_GET_SIZE(py_fds_to_keep);
     Py_ssize_t keep_seq_idx;
     /* As py_fds_to_keep is sorted we can loop through the list closing
@@ -231,15 +236,17 @@ _close_fds_by_brute_force(long start_fd, PyObject *py_fds_to_keep)
         int keep_fd = PyLong_AsLong(py_keep_fd);
         if (keep_fd < start_fd)
             continue;
-        _Py_closerange(start_fd, keep_fd - 1);
+        if (closer(start_fd, keep_fd - 1) != 0)
+            return -1;
         start_fd = keep_fd + 1;
     }
     if (start_fd <= end_fd) {
-        _Py_closerange(start_fd, end_fd);
+        if (closer(start_fd, end_fd) != 0)
+            return -1;
     }
+    return 0;
 }
 
-
 #if defined(__linux__) && defined(HAVE_SYS_SYSCALL_H)
 /* It doesn't matter if d_name has room for NAME_MAX chars; we're using this
  * only to read a directory of short file descriptor number names.  The kernel
@@ -255,6 +262,16 @@ struct linux_dirent64 {
    char           d_name[256];  /* Filename (null-terminated) */
 };
 
+static int
+_brute_force_closer(int first, int last)
+{
+    for (int i = first; i <= last; i++) {
+        /* Ignore errors */
+        (void)close(i);
+    }
+    return 0;
+}
+
 /* Close all open file descriptors in the range from start_fd and higher
  * Do not close any in the sorted py_fds_to_keep list.
  *
@@ -278,7 +295,7 @@ _close_open_fds_safe(int start_fd, PyObject* py_fds_to_keep)
     fd_dir_fd = _Py_open_noraise(FD_DIR, O_RDONLY);
     if (fd_dir_fd == -1) {
         /* No way to get a list of open fds. */
-        _close_fds_by_brute_force(start_fd, py_fds_to_keep);
+        _close_range_except(start_fd, -1, py_fds_to_keep, _brute_force_closer);
         return;
     } else {
         char buffer[sizeof(struct linux_dirent64)];
@@ -306,10 +323,16 @@ _close_open_fds_safe(int start_fd, PyObject* py_fds_to_keep)
     }
 }
 
-#define _close_open_fds _close_open_fds_safe
+#define _close_open_fds_fallback _close_open_fds_safe
 
 #else  /* NOT (defined(__linux__) && defined(HAVE_SYS_SYSCALL_H)) */
 
+static int
+_unsafe_closer(int first, int last)
+{
+    _Py_closerange(first, last);
+    return 0;
+}
 
 /* Close all open file descriptors from start_fd and higher.
  * Do not close any in the sorted py_fds_to_keep tuple.
@@ -325,7 +348,7 @@ _close_open_fds_safe(int start_fd, PyObject* py_fds_to_keep)
  *   http://womble.decadent.org.uk/readdir_r-advisory.html
  */
 static void
-_close_open_fds_maybe_unsafe(long start_fd, PyObject* py_fds_to_keep)
+_close_open_fds_maybe_unsafe(int start_fd, PyObject* py_fds_to_keep)
 {
     DIR *proc_fd_dir;
 #ifndef HAVE_DIRFD
@@ -348,7 +371,7 @@ _close_open_fds_maybe_unsafe(long start_fd, PyObject* py_fds_to_keep)
         proc_fd_dir = opendir(FD_DIR);
     if (!proc_fd_dir) {
         /* No way to get a list of open fds. */
-        _close_fds_by_brute_force(start_fd, py_fds_to_keep);
+        _close_range_except(start_fd, -1, py_fds_to_keep, _unsafe_closer);
     } else {
         struct dirent *dir_entry;
 #ifdef HAVE_DIRFD
@@ -369,16 +392,45 @@ _close_open_fds_maybe_unsafe(long start_fd, PyObject* py_fds_to_keep)
         }
         if (errno) {
             /* readdir error, revert behavior. Highly Unlikely. */
-            _close_fds_by_brute_force(start_fd, py_fds_to_keep);
+            _close_range_except(start_fd, -1, py_fds_to_keep, _unsafe_closer);
         }
         closedir(proc_fd_dir);
     }
 }
 
-#define _close_open_fds _close_open_fds_maybe_unsafe
+#define _close_open_fds_fallback _close_open_fds_maybe_unsafe
 
 #endif  /* else NOT (defined(__linux__) && defined(HAVE_SYS_SYSCALL_H)) */
 
+/* We can use close_range() library function only if it's known to be
+ * async-signal-safe.
+ *
+ * On Linux, glibc explicitly documents it to be a thin wrapper over
+ * the system call, and other C libraries are likely to follow glibc.
+ */
+#if defined(HAVE_CLOSE_RANGE) && \
+    (defined(__linux__) || defined(__FreeBSD__))
+#define HAVE_ASYNC_SAFE_CLOSE_RANGE
+
+static int
+_close_range_closer(int first, int last)
+{
+    return close_range(first, last, 0);
+}
+#endif
+
+static void
+_close_open_fds(int start_fd, PyObject* py_fds_to_keep)
+{
+#ifdef HAVE_ASYNC_SAFE_CLOSE_RANGE
+    if (_close_range_except(
+            start_fd, INT_MAX, py_fds_to_keep,
+            _close_range_closer) == 0) {
+        return;
+    }
+#endif
+    _close_open_fds_fallback(start_fd, py_fds_to_keep);
+}
 
 #ifdef VFORK_USABLE
 /* Reset dispositions for all signals to SIG_DFL except for ignored



More information about the Python-checkins mailing list