Skip to content

Commit ca4cdc2

Browse files
authored
Rollup merge of rust-lang#81825 - voidc:pidfd, r=joshtriplett
Add Linux-specific pidfd process extensions (take 2) Continuation of rust-lang#77168. I addressed the following concerns from the original PR: - make `CommandExt` and `ChildExt` sealed traits - wrap file descriptors in `PidFd` struct representing ownership over the fd - add `take_pidfd` to take the fd out of `Child` - close fd when dropped Tracking Issue: rust-lang#82971
2 parents 9320b12 + f23b30c commit ca4cdc2

File tree

6 files changed

+312
-5
lines changed

6 files changed

+312
-5
lines changed

library/std/src/os/linux/mod.rs

+1
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,5 @@
33
#![stable(feature = "raw_ext", since = "1.1.0")]
44

55
pub mod fs;
6+
pub mod process;
67
pub mod raw;

library/std/src/os/linux/process.rs

+150
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,150 @@
1+
//! Linux-specific extensions to primitives in the `std::process` module.
2+
3+
#![unstable(feature = "linux_pidfd", issue = "82971")]
4+
5+
use crate::io::Result;
6+
use crate::os::unix::io::{AsRawFd, FromRawFd, IntoRawFd, RawFd};
7+
use crate::process;
8+
use crate::sys::fd::FileDesc;
9+
use crate::sys_common::{AsInner, AsInnerMut, FromInner, IntoInner};
10+
11+
/// This type represents a file descriptor that refers to a process.
12+
///
13+
/// A `PidFd` can be obtained by setting the corresponding option on [`Command`]
14+
/// with [`create_pidfd`]. Subsequently, the created pidfd can be retrieved
15+
/// from the [`Child`] by calling [`pidfd`] or [`take_pidfd`].
16+
///
17+
/// Example:
18+
/// ```no_run
19+
/// #![feature(linux_pidfd)]
20+
/// use std::os::linux::process::{CommandExt, ChildExt};
21+
/// use std::process::Command;
22+
///
23+
/// let mut child = Command::new("echo")
24+
/// .create_pidfd(true)
25+
/// .spawn()
26+
/// .expect("Failed to spawn child");
27+
///
28+
/// let pidfd = child
29+
/// .take_pidfd()
30+
/// .expect("Failed to retrieve pidfd");
31+
///
32+
/// // The file descriptor will be closed when `pidfd` is dropped.
33+
/// ```
34+
/// Refer to the man page of [`pidfd_open(2)`] for further details.
35+
///
36+
/// [`Command`]: process::Command
37+
/// [`create_pidfd`]: CommandExt::create_pidfd
38+
/// [`Child`]: process::Child
39+
/// [`pidfd`]: fn@ChildExt::pidfd
40+
/// [`take_pidfd`]: ChildExt::take_pidfd
41+
/// [`pidfd_open(2)`]: https://man7.org/linux/man-pages/man2/pidfd_open.2.html
42+
#[derive(Debug)]
43+
pub struct PidFd {
44+
inner: FileDesc,
45+
}
46+
47+
impl AsInner<FileDesc> for PidFd {
48+
fn as_inner(&self) -> &FileDesc {
49+
&self.inner
50+
}
51+
}
52+
53+
impl FromInner<FileDesc> for PidFd {
54+
fn from_inner(inner: FileDesc) -> PidFd {
55+
PidFd { inner }
56+
}
57+
}
58+
59+
impl IntoInner<FileDesc> for PidFd {
60+
fn into_inner(self) -> FileDesc {
61+
self.inner
62+
}
63+
}
64+
65+
impl AsRawFd for PidFd {
66+
fn as_raw_fd(&self) -> RawFd {
67+
self.as_inner().raw()
68+
}
69+
}
70+
71+
impl FromRawFd for PidFd {
72+
unsafe fn from_raw_fd(fd: RawFd) -> Self {
73+
Self::from_inner(FileDesc::new(fd))
74+
}
75+
}
76+
77+
impl IntoRawFd for PidFd {
78+
fn into_raw_fd(self) -> RawFd {
79+
self.into_inner().into_raw()
80+
}
81+
}
82+
83+
mod private_child_ext {
84+
pub trait Sealed {}
85+
impl Sealed for crate::process::Child {}
86+
}
87+
88+
/// Os-specific extensions for [`Child`]
89+
///
90+
/// [`Child`]: process::Child
91+
pub trait ChildExt: private_child_ext::Sealed {
92+
/// Obtains a reference to the [`PidFd`] created for this [`Child`], if available.
93+
///
94+
/// A pidfd will only be available if its creation was requested with
95+
/// [`create_pidfd`] when the corresponding [`Command`] was created.
96+
///
97+
/// Even if requested, a pidfd may not be available due to an older
98+
/// version of Linux being in use, or if some other error occurred.
99+
///
100+
/// [`Command`]: process::Command
101+
/// [`create_pidfd`]: CommandExt::create_pidfd
102+
/// [`Child`]: process::Child
103+
fn pidfd(&self) -> Result<&PidFd>;
104+
105+
/// Takes ownership of the [`PidFd`] created for this [`Child`], if available.
106+
///
107+
/// A pidfd will only be available if its creation was requested with
108+
/// [`create_pidfd`] when the corresponding [`Command`] was created.
109+
///
110+
/// Even if requested, a pidfd may not be available due to an older
111+
/// version of Linux being in use, or if some other error occurred.
112+
///
113+
/// [`Command`]: process::Command
114+
/// [`create_pidfd`]: CommandExt::create_pidfd
115+
/// [`Child`]: process::Child
116+
fn take_pidfd(&mut self) -> Result<PidFd>;
117+
}
118+
119+
mod private_command_ext {
120+
pub trait Sealed {}
121+
impl Sealed for crate::process::Command {}
122+
}
123+
124+
/// Os-specific extensions for [`Command`]
125+
///
126+
/// [`Command`]: process::Command
127+
pub trait CommandExt: private_command_ext::Sealed {
128+
/// Sets whether a [`PidFd`](struct@PidFd) should be created for the [`Child`]
129+
/// spawned by this [`Command`].
130+
/// By default, no pidfd will be created.
131+
///
132+
/// The pidfd can be retrieved from the child with [`pidfd`] or [`take_pidfd`].
133+
///
134+
/// A pidfd will only be created if it is possible to do so
135+
/// in a guaranteed race-free manner (e.g. if the `clone3` system call
136+
/// is supported). Otherwise, [`pidfd`] will return an error.
137+
///
138+
/// [`Command`]: process::Command
139+
/// [`Child`]: process::Child
140+
/// [`pidfd`]: fn@ChildExt::pidfd
141+
/// [`take_pidfd`]: ChildExt::take_pidfd
142+
fn create_pidfd(&mut self, val: bool) -> &mut process::Command;
143+
}
144+
145+
impl CommandExt for process::Command {
146+
fn create_pidfd(&mut self, val: bool) -> &mut process::Command {
147+
self.as_inner_mut().create_pidfd(val);
148+
self
149+
}
150+
}

library/std/src/process.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,7 @@ use crate::sys_common::{AsInner, AsInnerMut, FromInner, IntoInner};
165165
/// [`wait`]: Child::wait
166166
#[stable(feature = "process", since = "1.0.0")]
167167
pub struct Child {
168-
handle: imp::Process,
168+
pub(crate) handle: imp::Process,
169169

170170
/// The handle for writing to the child's standard input (stdin), if it has
171171
/// been captured. To avoid partially moving

library/std/src/sys/unix/process/process_common.rs

+6
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ pub struct Command {
7979
stdin: Option<Stdio>,
8080
stdout: Option<Stdio>,
8181
stderr: Option<Stdio>,
82+
pub(crate) create_pidfd: bool,
8283
}
8384

8485
// Create a new type for argv, so that we can make it `Send` and `Sync`
@@ -141,6 +142,7 @@ impl Command {
141142
stdin: None,
142143
stdout: None,
143144
stderr: None,
145+
create_pidfd: false,
144146
}
145147
}
146148

@@ -177,6 +179,10 @@ impl Command {
177179
self.groups = Some(Box::from(groups));
178180
}
179181

182+
pub fn create_pidfd(&mut self, val: bool) {
183+
self.create_pidfd = val;
184+
}
185+
180186
pub fn saw_nul(&self) -> bool {
181187
self.saw_nul
182188
}

library/std/src/sys/unix/process/process_unix.rs

+127-4
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,20 @@ use crate::fmt;
33
use crate::io::{self, Error, ErrorKind};
44
use crate::mem;
55
use crate::ptr;
6+
use crate::sync::atomic::{AtomicBool, Ordering};
67
use crate::sys;
78
use crate::sys::cvt;
89
use crate::sys::process::process_common::*;
10+
use crate::sys_common::FromInner;
11+
12+
#[cfg(target_os = "linux")]
13+
use crate::os::linux::process::PidFd;
914

1015
#[cfg(target_os = "vxworks")]
1116
use libc::RTP_ID as pid_t;
1217

1318
#[cfg(not(target_os = "vxworks"))]
14-
use libc::{c_int, gid_t, pid_t, uid_t};
19+
use libc::{c_int, c_long, gid_t, pid_t, uid_t};
1520

1621
////////////////////////////////////////////////////////////////////////////////
1722
// Command
@@ -48,7 +53,8 @@ impl Command {
4853
// a lock any more because the parent won't do anything and the child is
4954
// in its own process. Thus the parent drops the lock guard while the child
5055
// forgets it to avoid unlocking it on a new thread, which would be invalid.
51-
let (env_lock, result) = unsafe { (sys::os::env_lock(), cvt(libc::fork())?) };
56+
let env_lock = unsafe { sys::os::env_lock() };
57+
let (result, pidfd) = self.do_fork()?;
5258

5359
let pid = unsafe {
5460
match result {
@@ -81,7 +87,7 @@ impl Command {
8187
}
8288
};
8389

84-
let mut p = Process { pid, status: None };
90+
let mut p = Process::new(pid, pidfd);
8591
drop(output);
8692
let mut bytes = [0; 8];
8793

@@ -114,6 +120,87 @@ impl Command {
114120
}
115121
}
116122

123+
// Attempts to fork the process. If successful, returns
124+
// Ok((0, -1)) in the child, and Ok((child_pid, child_pidfd)) in the parent.
125+
fn do_fork(&mut self) -> Result<(pid_t, pid_t), io::Error> {
126+
// If we fail to create a pidfd for any reason, this will
127+
// stay as -1, which indicates an error
128+
let mut pidfd: pid_t = -1;
129+
130+
// On Linux, attempt to use the `clone3` syscall, which
131+
// supports more arguments (in particular, the ability to create a pidfd).
132+
// If this fails, we will fall through this block to a call to `fork()`
133+
#[cfg(target_os = "linux")]
134+
{
135+
static HAS_CLONE3: AtomicBool = AtomicBool::new(true);
136+
137+
const CLONE_PIDFD: u64 = 0x00001000;
138+
139+
#[repr(C)]
140+
struct clone_args {
141+
flags: u64,
142+
pidfd: u64,
143+
child_tid: u64,
144+
parent_tid: u64,
145+
exit_signal: u64,
146+
stack: u64,
147+
stack_size: u64,
148+
tls: u64,
149+
set_tid: u64,
150+
set_tid_size: u64,
151+
cgroup: u64,
152+
}
153+
154+
syscall! {
155+
fn clone3(cl_args: *mut clone_args, len: libc::size_t) -> c_long
156+
}
157+
158+
if HAS_CLONE3.load(Ordering::Relaxed) {
159+
let mut flags = 0;
160+
if self.create_pidfd {
161+
flags |= CLONE_PIDFD;
162+
}
163+
164+
let mut args = clone_args {
165+
flags,
166+
pidfd: &mut pidfd as *mut pid_t as u64,
167+
child_tid: 0,
168+
parent_tid: 0,
169+
exit_signal: libc::SIGCHLD as u64,
170+
stack: 0,
171+
stack_size: 0,
172+
tls: 0,
173+
set_tid: 0,
174+
set_tid_size: 0,
175+
cgroup: 0,
176+
};
177+
178+
let args_ptr = &mut args as *mut clone_args;
179+
let args_size = crate::mem::size_of::<clone_args>();
180+
181+
let res = cvt(unsafe { clone3(args_ptr, args_size) });
182+
match res {
183+
Ok(n) => return Ok((n as pid_t, pidfd)),
184+
Err(e) => match e.raw_os_error() {
185+
// Multiple threads can race to execute this store,
186+
// but that's fine - that just means that multiple threads
187+
// will have tried and failed to execute the same syscall,
188+
// with no other side effects.
189+
Some(libc::ENOSYS) => HAS_CLONE3.store(false, Ordering::Relaxed),
190+
// Fallback to fork if `EPERM` is returned. (e.g. blocked by seccomp)
191+
Some(libc::EPERM) => {}
192+
_ => return Err(e),
193+
},
194+
}
195+
}
196+
}
197+
198+
// If we get here, we are either not on Linux,
199+
// or we are on Linux and the 'clone3' syscall does not exist
200+
// or we do not have permission to call it
201+
cvt(unsafe { libc::fork() }).map(|res| (res, pidfd))
202+
}
203+
117204
pub fn exec(&mut self, default: Stdio) -> io::Error {
118205
let envp = self.capture_env();
119206

@@ -297,6 +384,7 @@ impl Command {
297384
|| (self.env_saw_path() && !self.program_is_path())
298385
|| !self.get_closures().is_empty()
299386
|| self.get_groups().is_some()
387+
|| self.create_pidfd
300388
{
301389
return Ok(None);
302390
}
@@ -341,7 +429,7 @@ impl Command {
341429
None => None,
342430
};
343431

344-
let mut p = Process { pid: 0, status: None };
432+
let mut p = Process::new(0, -1);
345433

346434
struct PosixSpawnFileActions<'a>(&'a mut MaybeUninit<libc::posix_spawn_file_actions_t>);
347435

@@ -430,9 +518,26 @@ impl Command {
430518
pub struct Process {
431519
pid: pid_t,
432520
status: Option<ExitStatus>,
521+
// On Linux, stores the pidfd created for this child.
522+
// This is None if the user did not request pidfd creation,
523+
// or if the pidfd could not be created for some reason
524+
// (e.g. the `clone3` syscall was not available).
525+
#[cfg(target_os = "linux")]
526+
pidfd: Option<PidFd>,
433527
}
434528

435529
impl Process {
530+
#[cfg(target_os = "linux")]
531+
fn new(pid: pid_t, pidfd: pid_t) -> Self {
532+
let pidfd = (pidfd >= 0).then(|| PidFd::from_inner(sys::fd::FileDesc::new(pidfd)));
533+
Process { pid, status: None, pidfd }
534+
}
535+
536+
#[cfg(not(target_os = "linux"))]
537+
fn new(pid: pid_t, _pidfd: pid_t) -> Self {
538+
Process { pid, status: None }
539+
}
540+
436541
pub fn id(&self) -> u32 {
437542
self.pid as u32
438543
}
@@ -546,6 +651,24 @@ impl fmt::Display for ExitStatus {
546651
}
547652
}
548653

654+
#[cfg(target_os = "linux")]
655+
#[unstable(feature = "linux_pidfd", issue = "82971")]
656+
impl crate::os::linux::process::ChildExt for crate::process::Child {
657+
fn pidfd(&self) -> io::Result<&PidFd> {
658+
self.handle
659+
.pidfd
660+
.as_ref()
661+
.ok_or_else(|| Error::new(ErrorKind::Other, "No pidfd was created."))
662+
}
663+
664+
fn take_pidfd(&mut self) -> io::Result<PidFd> {
665+
self.handle
666+
.pidfd
667+
.take()
668+
.ok_or_else(|| Error::new(ErrorKind::Other, "No pidfd was created."))
669+
}
670+
}
671+
549672
#[cfg(test)]
550673
#[path = "process_unix/tests.rs"]
551674
mod tests;

0 commit comments

Comments
 (0)