Skip to content

bpo-35823: subprocess: Use vfork() instead of fork() on Linux when safe #11671

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Oct 24, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Use ``vfork()`` instead of ``fork()`` for :func:`subprocess.Popen` on Linux
to improve performance in cases where it is deemed safe.
224 changes: 197 additions & 27 deletions Modules/_posixsubprocess.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,12 @@
# define SYS_getdents64 __NR_getdents64
#endif

#if defined(__linux__) && defined(HAVE_VFORK) && defined(HAVE_SIGNAL_H) && \
defined(HAVE_PTHREAD_SIGMASK) && !defined(HAVE_BROKEN_PTHREAD_SIGMASK)
# include <signal.h>
# define VFORK_USABLE 1
#endif

#if defined(__sun) && defined(__SVR4)
/* readdir64 is used to work around Solaris 9 bug 6395699. */
# define readdir readdir64
Expand Down Expand Up @@ -406,18 +412,82 @@ _close_open_fds_maybe_unsafe(long start_fd, PyObject* py_fds_to_keep)
#endif /* else NOT (defined(__linux__) && defined(HAVE_SYS_SYSCALL_H)) */


#ifdef VFORK_USABLE
/* Reset dispositions for all signals to SIG_DFL except for ignored
* signals. This way we ensure that no signal handlers can run
* after we unblock signals in a child created by vfork().
*/
static void
reset_signal_handlers(const sigset_t *child_sigmask)
{
struct sigaction sa_dfl = {.sa_handler = SIG_DFL};
for (int sig = 1; sig < _NSIG; sig++) {
/* Dispositions for SIGKILL and SIGSTOP can't be changed. */
if (sig == SIGKILL || sig == SIGSTOP) {
continue;
}

/* There is no need to reset the disposition of signals that will
* remain blocked across execve() since the kernel will do it. */
if (sigismember(child_sigmask, sig) == 1) {
continue;
}

struct sigaction sa;
/* C libraries usually return EINVAL for signals used
* internally (e.g. for thread cancellation), so simply
* skip errors here. */
if (sigaction(sig, NULL, &sa) == -1) {
continue;
}

/* void *h works as these fields are both pointer types already. */
void *h = (sa.sa_flags & SA_SIGINFO ? (void *)sa.sa_sigaction :
(void *)sa.sa_handler);
if (h == SIG_IGN || h == SIG_DFL) {
continue;
}

/* This call can't reasonably fail, but if it does, terminating
* the child seems to be too harsh, so ignore errors. */
(void) sigaction(sig, &sa_dfl, NULL);
}
}
#endif /* VFORK_USABLE */


/*
* This function is code executed in the child process immediately after fork
* to set things up and call exec().
* This function is code executed in the child process immediately after
* (v)fork to set things up and call exec().
*
* All of the code in this function must only use async-signal-safe functions,
* listed at `man 7 signal` or
* http://www.opengroup.org/onlinepubs/009695399/functions/xsh_chap02_04.html.
*
* This restriction is documented at
* http://www.opengroup.org/onlinepubs/009695399/functions/fork.html.
*
* If this function is called after vfork(), even more care must be taken.
* The lack of preparations that C libraries normally take on fork(),
* as well as sharing the address space with the parent, might make even
* async-signal-safe functions vfork-unsafe. In particular, on Linux,
* set*id() and setgroups() library functions must not be called, since
* they have to interact with the library-level thread list and send
* library-internal signals to implement per-process credentials semantics
* required by POSIX but not supported natively on Linux. Another reason to
* avoid this family of functions is that sharing an address space between
* processes running with different privileges is inherently insecure.
* See bpo-35823 for further discussion and references.
*
* In some C libraries, setrlimit() has the same thread list/signalling
* behavior since resource limits were per-thread attributes before
* Linux 2.6.10. Musl, as of 1.2.1, is known to have this issue
* (https://www.openwall.com/lists/musl/2020/10/15/6).
*
* If vfork-unsafe functionality is desired after vfork(), consider using
* syscall() to obtain it.
*/
static void
_Py_NO_INLINE static void
child_exec(char *const exec_array[],
char *const argv[],
char *const envp[],
Expand All @@ -431,6 +501,7 @@ child_exec(char *const exec_array[],
int call_setgid, gid_t gid,
int call_setgroups, size_t groups_size, const gid_t *groups,
int call_setuid, uid_t uid, int child_umask,
const void *child_sigmask,
PyObject *py_fds_to_keep,
PyObject *preexec_fn,
PyObject *preexec_fn_args_tuple)
Expand Down Expand Up @@ -506,6 +577,13 @@ child_exec(char *const exec_array[],
if (restore_signals)
_Py_RestoreSignals();

#ifdef VFORK_USABLE
if (child_sigmask) {
reset_signal_handlers(child_sigmask);
POSIX_CALL(pthread_sigmask(SIG_SETMASK, child_sigmask, NULL));
}
#endif

#ifdef HAVE_SETSID
if (call_setsid)
POSIX_CALL(setsid());
Expand Down Expand Up @@ -598,6 +676,81 @@ child_exec(char *const exec_array[],
}


/* The main purpose of this wrapper function is to isolate vfork() from both
* subprocess_fork_exec() and child_exec(). A child process created via
* vfork() executes on the same stack as the parent process while the latter is
* suspended, so this function should not be inlined to avoid compiler bugs
* that might clobber data needed by the parent later. Additionally,
* child_exec() should not be inlined to avoid spurious -Wclobber warnings from
* GCC (see bpo-35823).
*/
_Py_NO_INLINE static pid_t
do_fork_exec(char *const exec_array[],
char *const argv[],
char *const envp[],
const char *cwd,
int p2cread, int p2cwrite,
int c2pread, int c2pwrite,
int errread, int errwrite,
int errpipe_read, int errpipe_write,
int close_fds, int restore_signals,
int call_setsid,
int call_setgid, gid_t gid,
int call_setgroups, size_t groups_size, const gid_t *groups,
int call_setuid, uid_t uid, int child_umask,
const void *child_sigmask,
PyObject *py_fds_to_keep,
PyObject *preexec_fn,
PyObject *preexec_fn_args_tuple)
{

pid_t pid;

#ifdef VFORK_USABLE
if (child_sigmask) {
/* These are checked by our caller; verify them in debug builds. */
assert(!call_setsid);
assert(!call_setuid);
assert(!call_setgid);
assert(!call_setgroups);
assert(preexec_fn == Py_None);

pid = vfork();
} else
#endif
{
pid = fork();
}

if (pid != 0) {
return pid;
}

/* Child process.
* See the comment above child_exec() for restrictions imposed on
* the code below.
*/

if (preexec_fn != Py_None) {
/* We'll be calling back into Python later so we need to do this.
* This call may not be async-signal-safe but neither is calling
* back into Python. The user asked us to use hope as a strategy
* to avoid deadlock... */
PyOS_AfterFork_Child();
}

child_exec(exec_array, argv, envp, cwd,
p2cread, p2cwrite, c2pread, c2pwrite,
errread, errwrite, errpipe_read, errpipe_write,
close_fds, restore_signals, call_setsid,
call_setgid, gid, call_setgroups, groups_size, groups,
call_setuid, uid, child_umask, child_sigmask,
py_fds_to_keep, preexec_fn, preexec_fn_args_tuple);
_exit(255);
return 0; /* Dead code to avoid a potential compiler warning. */
}


static PyObject *
subprocess_fork_exec(PyObject* self, PyObject *args)
{
Expand Down Expand Up @@ -835,39 +988,56 @@ subprocess_fork_exec(PyObject* self, PyObject *args)
need_after_fork = 1;
}

pid = fork();
if (pid == 0) {
/* Child process */
/*
* Code from here to _exit() must only use async-signal-safe functions,
* listed at `man 7 signal` or
* http://www.opengroup.org/onlinepubs/009695399/functions/xsh_chap02_04.html.
/* NOTE: When old_sigmask is non-NULL, do_fork_exec() may use vfork(). */
const void *old_sigmask = NULL;
#ifdef VFORK_USABLE
/* Use vfork() only if it's safe. See the comment above child_exec(). */
sigset_t old_sigs;
if (preexec_fn == Py_None &&
!call_setuid && !call_setgid && !call_setgroups && !call_setsid) {
/* Block all signals to ensure that no signal handlers are run in the
* child process while it shares memory with us. Note that signals
* used internally by C libraries won't be blocked by
* pthread_sigmask(), but signal handlers installed by C libraries
* normally service only signals originating from *within the process*,
* so it should be sufficient to consider any library function that
* might send such a signal to be vfork-unsafe and do not call it in
* the child.
*/
sigset_t all_sigs;
sigfillset(&all_sigs);
pthread_sigmask(SIG_BLOCK, &all_sigs, &old_sigs);
old_sigmask = &old_sigs;
}
#endif

if (preexec_fn != Py_None) {
/* We'll be calling back into Python later so we need to do this.
* This call may not be async-signal-safe but neither is calling
* back into Python. The user asked us to use hope as a strategy
* to avoid deadlock... */
PyOS_AfterFork_Child();
}
pid = do_fork_exec(exec_array, argv, envp, cwd,
p2cread, p2cwrite, c2pread, c2pwrite,
errread, errwrite, errpipe_read, errpipe_write,
close_fds, restore_signals, call_setsid,
call_setgid, gid, call_setgroups, num_groups, groups,
call_setuid, uid, child_umask, old_sigmask,
py_fds_to_keep, preexec_fn, preexec_fn_args_tuple);

child_exec(exec_array, argv, envp, cwd,
p2cread, p2cwrite, c2pread, c2pwrite,
errread, errwrite, errpipe_read, errpipe_write,
close_fds, restore_signals, call_setsid,
call_setgid, gid, call_setgroups, num_groups, groups,
call_setuid, uid, child_umask,
py_fds_to_keep, preexec_fn, preexec_fn_args_tuple);
_exit(255);
return NULL; /* Dead code to avoid a potential compiler warning. */
}
/* Parent (original) process */
if (pid == -1) {
/* Capture errno for the exception. */
saved_errno = errno;
}

#ifdef VFORK_USABLE
if (old_sigmask) {
/* vfork() semantics guarantees that the parent is blocked
* until the child performs _exit() or execve(), so it is safe
* to unblock signals once we're here.
* Note that in environments where vfork() is implemented as fork(),
* such as QEMU user-mode emulation, the parent won't be blocked,
* but it won't share the address space with the child,
* so it's still safe to unblock the signals. */
pthread_sigmask(SIG_SETMASK, old_sigmask, NULL);
}
#endif

Py_XDECREF(cwd_obj2);

if (need_after_fork)
Expand Down
2 changes: 1 addition & 1 deletion configure
Original file line number Diff line number Diff line change
Expand Up @@ -11694,7 +11694,7 @@ for ac_func in alarm accept4 setitimer getitimer bind_textdomain_codeset chown \
sigaction sigaltstack sigfillset siginterrupt sigpending sigrelse \
sigtimedwait sigwait sigwaitinfo snprintf strftime strlcpy strsignal symlinkat sync \
sysconf tcgetpgrp tcsetpgrp tempnam timegm times tmpfile tmpnam tmpnam_r \
truncate uname unlinkat utimensat utimes waitid waitpid wait3 wait4 \
truncate uname unlinkat utimensat utimes vfork waitid waitpid wait3 wait4 \
wcscoll wcsftime wcsxfrm wmemcmp writev _getpty rtpSpawn
do :
as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh`
Expand Down
2 changes: 1 addition & 1 deletion configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -3686,7 +3686,7 @@ AC_CHECK_FUNCS(alarm accept4 setitimer getitimer bind_textdomain_codeset chown \
sigaction sigaltstack sigfillset siginterrupt sigpending sigrelse \
sigtimedwait sigwait sigwaitinfo snprintf strftime strlcpy strsignal symlinkat sync \
sysconf tcgetpgrp tcsetpgrp tempnam timegm times tmpfile tmpnam tmpnam_r \
truncate uname unlinkat utimensat utimes waitid waitpid wait3 wait4 \
truncate uname unlinkat utimensat utimes vfork waitid waitpid wait3 wait4 \
wcscoll wcsftime wcsxfrm wmemcmp writev _getpty rtpSpawn)

# Force lchmod off for Linux. Linux disallows changing the mode of symbolic
Expand Down
3 changes: 3 additions & 0 deletions pyconfig.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -1301,6 +1301,9 @@
/* Define to 1 if you have the <uuid/uuid.h> header file. */
#undef HAVE_UUID_UUID_H

/* Define to 1 if you have the `vfork' function. */
#undef HAVE_VFORK

/* Define to 1 if you have the `wait3' function. */
#undef HAVE_WAIT3

Expand Down