@@ -3,15 +3,20 @@ use crate::fmt;
33use crate :: io:: { self , Error , ErrorKind } ;
44use crate :: mem;
55use crate :: ptr;
6+ use crate :: sync:: atomic:: { AtomicBool , Ordering } ;
67use crate :: sys;
78use crate :: sys:: cvt;
89use crate :: sys:: process:: process_common:: * ;
10+ use crate :: sys_common:: FromInner ;
11+
12+ #[ cfg( target_os = "linux" ) ]
13+ use crate :: os:: linux:: process:: PidFd ;
914
1015#[ cfg( target_os = "vxworks" ) ]
1116use libc:: RTP_ID as pid_t;
1217
1318#[ cfg( not( target_os = "vxworks" ) ) ]
14- use libc:: { c_int, gid_t, pid_t, uid_t} ;
19+ use libc:: { c_int, c_long , gid_t, pid_t, uid_t} ;
1520
1621////////////////////////////////////////////////////////////////////////////////
1722// Command
@@ -48,7 +53,8 @@ impl Command {
4853 // a lock any more because the parent won't do anything and the child is
4954 // in its own process. Thus the parent drops the lock guard while the child
5055 // forgets it to avoid unlocking it on a new thread, which would be invalid.
51- let ( env_lock, result) = unsafe { ( sys:: os:: env_lock ( ) , cvt ( libc:: fork ( ) ) ?) } ;
56+ let env_lock = unsafe { sys:: os:: env_lock ( ) } ;
57+ let ( result, pidfd) = self . do_fork ( ) ?;
5258
5359 let pid = unsafe {
5460 match result {
@@ -81,7 +87,7 @@ impl Command {
8187 }
8288 } ;
8389
84- let mut p = Process { pid, status : None } ;
90+ let mut p = Process :: new ( pid, pidfd ) ;
8591 drop ( output) ;
8692 let mut bytes = [ 0 ; 8 ] ;
8793
@@ -114,6 +120,87 @@ impl Command {
114120 }
115121 }
116122
123+ // Attempts to fork the process. If successful, returns
124+ // Ok((0, -1)) in the child, and Ok((child_pid, child_pidfd)) in the parent.
125+ fn do_fork ( & mut self ) -> Result < ( pid_t , pid_t ) , io:: Error > {
126+ // If we fail to create a pidfd for any reason, this will
127+ // stay as -1, which indicates an error
128+ let mut pidfd: pid_t = -1 ;
129+
130+ // On Linux, attempt to use the `clone3` syscall, which
131+ // supports more arguments (in particular, the ability to create a pidfd).
132+ // If this fails, we will fall through this block to a call to `fork()`
133+ #[ cfg( target_os = "linux" ) ]
134+ {
135+ static HAS_CLONE3 : AtomicBool = AtomicBool :: new ( true ) ;
136+
137+ const CLONE_PIDFD : u64 = 0x00001000 ;
138+
139+ #[ repr( C ) ]
140+ struct clone_args {
141+ flags : u64 ,
142+ pidfd : u64 ,
143+ child_tid : u64 ,
144+ parent_tid : u64 ,
145+ exit_signal : u64 ,
146+ stack : u64 ,
147+ stack_size : u64 ,
148+ tls : u64 ,
149+ set_tid : u64 ,
150+ set_tid_size : u64 ,
151+ cgroup : u64 ,
152+ }
153+
154+ syscall ! {
155+ fn clone3( cl_args: * mut clone_args, len: libc:: size_t) -> c_long
156+ }
157+
158+ if HAS_CLONE3 . load ( Ordering :: Relaxed ) {
159+ let mut flags = 0 ;
160+ if self . create_pidfd {
161+ flags |= CLONE_PIDFD ;
162+ }
163+
164+ let mut args = clone_args {
165+ flags,
166+ pidfd : & mut pidfd as * mut pid_t as u64 ,
167+ child_tid : 0 ,
168+ parent_tid : 0 ,
169+ exit_signal : libc:: SIGCHLD as u64 ,
170+ stack : 0 ,
171+ stack_size : 0 ,
172+ tls : 0 ,
173+ set_tid : 0 ,
174+ set_tid_size : 0 ,
175+ cgroup : 0 ,
176+ } ;
177+
178+ let args_ptr = & mut args as * mut clone_args ;
179+ let args_size = crate :: mem:: size_of :: < clone_args > ( ) ;
180+
181+ let res = cvt ( unsafe { clone3 ( args_ptr, args_size) } ) ;
182+ match res {
183+ Ok ( n) => return Ok ( ( n as pid_t , pidfd) ) ,
184+ Err ( e) => match e. raw_os_error ( ) {
185+ // Multiple threads can race to execute this store,
186+ // but that's fine - that just means that multiple threads
187+ // will have tried and failed to execute the same syscall,
188+ // with no other side effects.
189+ Some ( libc:: ENOSYS ) => HAS_CLONE3 . store ( false , Ordering :: Relaxed ) ,
190+ // Fallback to fork if `EPERM` is returned. (e.g. blocked by seccomp)
191+ Some ( libc:: EPERM ) => { }
192+ _ => return Err ( e) ,
193+ } ,
194+ }
195+ }
196+ }
197+
198+ // If we get here, we are either not on Linux,
199+ // or we are on Linux and the 'clone3' syscall does not exist
200+ // or we do not have permission to call it
201+ cvt ( unsafe { libc:: fork ( ) } ) . map ( |res| ( res, pidfd) )
202+ }
203+
117204 pub fn exec ( & mut self , default : Stdio ) -> io:: Error {
118205 let envp = self . capture_env ( ) ;
119206
@@ -297,6 +384,7 @@ impl Command {
297384 || ( self . env_saw_path ( ) && !self . program_is_path ( ) )
298385 || !self . get_closures ( ) . is_empty ( )
299386 || self . get_groups ( ) . is_some ( )
387+ || self . create_pidfd
300388 {
301389 return Ok ( None ) ;
302390 }
@@ -341,7 +429,7 @@ impl Command {
341429 None => None ,
342430 } ;
343431
344- let mut p = Process { pid : 0 , status : None } ;
432+ let mut p = Process :: new ( 0 , - 1 ) ;
345433
346434 struct PosixSpawnFileActions < ' a > ( & ' a mut MaybeUninit < libc:: posix_spawn_file_actions_t > ) ;
347435
@@ -430,9 +518,26 @@ impl Command {
430518pub struct Process {
431519 pid : pid_t ,
432520 status : Option < ExitStatus > ,
521+ // On Linux, stores the pidfd created for this child.
522+ // This is None if the user did not request pidfd creation,
523+ // or if the pidfd could not be created for some reason
524+ // (e.g. the `clone3` syscall was not available).
525+ #[ cfg( target_os = "linux" ) ]
526+ pidfd : Option < PidFd > ,
433527}
434528
435529impl Process {
530+ #[ cfg( target_os = "linux" ) ]
531+ fn new ( pid : pid_t , pidfd : pid_t ) -> Self {
532+ let pidfd = ( pidfd >= 0 ) . then ( || PidFd :: from_inner ( sys:: fd:: FileDesc :: new ( pidfd) ) ) ;
533+ Process { pid, status : None , pidfd }
534+ }
535+
536+ #[ cfg( not( target_os = "linux" ) ) ]
537+ fn new ( pid : pid_t , _pidfd : pid_t ) -> Self {
538+ Process { pid, status : None }
539+ }
540+
436541 pub fn id ( & self ) -> u32 {
437542 self . pid as u32
438543 }
@@ -546,6 +651,24 @@ impl fmt::Display for ExitStatus {
546651 }
547652}
548653
654+ #[ cfg( target_os = "linux" ) ]
655+ #[ unstable( feature = "linux_pidfd" , issue = "82971" ) ]
656+ impl crate :: os:: linux:: process:: ChildExt for crate :: process:: Child {
657+ fn pidfd ( & self ) -> io:: Result < & PidFd > {
658+ self . handle
659+ . pidfd
660+ . as_ref ( )
661+ . ok_or_else ( || Error :: new ( ErrorKind :: Other , "No pidfd was created." ) )
662+ }
663+
664+ fn take_pidfd ( & mut self ) -> io:: Result < PidFd > {
665+ self . handle
666+ . pidfd
667+ . take ( )
668+ . ok_or_else ( || Error :: new ( ErrorKind :: Other , "No pidfd was created." ) )
669+ }
670+ }
671+
549672#[ cfg( test) ]
550673#[ path = "process_unix/tests.rs" ]
551674mod tests;
0 commit comments