@@ -10,9 +10,6 @@ use core::ffi::NonZero_c_int;
10
10
#[ cfg( target_os = "linux" ) ]
11
11
use crate :: os:: linux:: process:: PidFd ;
12
12
13
- #[ cfg( target_os = "linux" ) ]
14
- use crate :: sys:: weak:: raw_syscall;
15
-
16
13
#[ cfg( any(
17
14
target_os = "macos" ,
18
15
target_os = "watchos" ,
@@ -91,6 +88,11 @@ impl Command {
91
88
if let Some ( ret) = self . posix_spawn ( & theirs, envp. as_ref ( ) ) ? {
92
89
return Ok ( ( ret, ours) ) ;
93
90
}
91
+
92
+ #[ cfg( target_os = "linux" ) ]
93
+ let ( input, output) = sys:: net:: Socket :: new_pair ( libc:: AF_UNIX , libc:: SOCK_SEQPACKET ) ?;
94
+
95
+ #[ cfg( not( target_os = "linux" ) ) ]
94
96
let ( input, output) = sys:: pipe:: anon_pipe ( ) ?;
95
97
96
98
// Whatever happens after the fork is almost for sure going to touch or
@@ -104,12 +106,16 @@ impl Command {
104
106
// The child calls `mem::forget` to leak the lock, which is crucial because
105
107
// releasing a lock is not async-signal-safe.
106
108
let env_lock = sys:: os:: env_read_lock ( ) ;
107
- let ( pid, pidfd ) = unsafe { self . do_fork ( ) ? } ;
109
+ let pid = unsafe { self . do_fork ( ) ? } ;
108
110
109
111
if pid == 0 {
110
112
crate :: panic:: always_abort ( ) ;
111
113
mem:: forget ( env_lock) ; // avoid non-async-signal-safe unlocking
112
114
drop ( input) ;
115
+ #[ cfg( target_os = "linux" ) ]
116
+ if self . get_create_pidfd ( ) {
117
+ self . send_pidfd ( & output) ;
118
+ }
113
119
let Err ( err) = unsafe { self . do_exec ( theirs, envp. as_ref ( ) ) } ;
114
120
let errno = err. raw_os_error ( ) . unwrap_or ( libc:: EINVAL ) as u32 ;
115
121
let errno = errno. to_be_bytes ( ) ;
@@ -133,6 +139,12 @@ impl Command {
133
139
drop ( env_lock) ;
134
140
drop ( output) ;
135
141
142
+ #[ cfg( target_os = "linux" ) ]
143
+ let pidfd = if self . get_create_pidfd ( ) { self . recv_pidfd ( & input) } else { -1 } ;
144
+
145
+ #[ cfg( not( target_os = "linux" ) ) ]
146
+ let pidfd = -1 ;
147
+
136
148
// Safety: We obtained the pidfd from calling `clone3` with
137
149
// `CLONE_PIDFD` so it's valid an otherwise unowned.
138
150
let mut p = unsafe { Process :: new ( pid, pidfd) } ;
@@ -160,6 +172,7 @@ impl Command {
160
172
}
161
173
Ok ( ..) => {
162
174
// pipe I/O up to PIPE_BUF bytes should be atomic
175
+ // similarly SOCK_SEQPACKET messages should arrive whole
163
176
assert ! ( p. wait( ) . is_ok( ) , "wait() should either return Ok or panic" ) ;
164
177
panic ! ( "short read on the CLOEXEC pipe" )
165
178
}
@@ -185,28 +198,27 @@ impl Command {
185
198
) ;
186
199
187
200
#[ cfg( any( target_os = "tvos" , target_os = "watchos" ) ) ]
188
- unsafe fn do_fork ( & mut self ) -> Result < ( pid_t , pid_t ) , io:: Error > {
201
+ unsafe fn do_fork ( & mut self ) -> Result < pid_t , io:: Error > {
189
202
return Err ( Self :: ERR_APPLE_TV_WATCH_NO_FORK_EXEC ) ;
190
203
}
191
204
192
205
// Attempts to fork the process. If successful, returns Ok((0, -1))
193
206
// in the child, and Ok((child_pid, -1)) in the parent.
194
207
#[ cfg( not( any(
195
- target_os = "linux" ,
196
208
target_os = "watchos" ,
197
209
target_os = "tvos" ,
198
210
all( target_os = "nto" , target_env = "nto71" ) ,
199
211
) ) ) ]
200
- unsafe fn do_fork ( & mut self ) -> Result < ( pid_t , pid_t ) , io:: Error > {
201
- cvt ( libc:: fork ( ) ) . map ( |res| ( res , - 1 ) )
212
+ unsafe fn do_fork ( & mut self ) -> Result < pid_t , io:: Error > {
213
+ cvt ( libc:: fork ( ) )
202
214
}
203
215
204
216
// On QNX Neutrino, fork can fail with EBADF in case "another thread might have opened
205
217
// or closed a file descriptor while the fork() was occurring".
206
218
// Documentation says "... or try calling fork() again". This is what we do here.
207
219
// See also https://www.qnx.com/developers/docs/7.1/#com.qnx.doc.neutrino.lib_ref/topic/f/fork.html
208
220
#[ cfg( all( target_os = "nto" , target_env = "nto71" ) ) ]
209
- unsafe fn do_fork ( & mut self ) -> Result < ( pid_t , pid_t ) , io:: Error > {
221
+ unsafe fn do_fork ( & mut self ) -> Result < pid_t , io:: Error > {
210
222
use crate :: sys:: os:: errno;
211
223
212
224
let mut delay = MIN_FORKSPAWN_SLEEP ;
@@ -229,91 +241,11 @@ impl Command {
229
241
delay *= 2 ;
230
242
continue ;
231
243
} else {
232
- return cvt ( r) . map ( |res| ( res , - 1 ) ) ;
244
+ return cvt ( r) ;
233
245
}
234
246
}
235
247
}
236
248
237
- // Attempts to fork the process. If successful, returns Ok((0, -1))
238
- // in the child, and Ok((child_pid, child_pidfd)) in the parent.
239
- #[ cfg( target_os = "linux" ) ]
240
- unsafe fn do_fork ( & mut self ) -> Result < ( pid_t , pid_t ) , io:: Error > {
241
- use crate :: sync:: atomic:: { AtomicBool , Ordering } ;
242
-
243
- static HAS_CLONE3 : AtomicBool = AtomicBool :: new ( true ) ;
244
- const CLONE_PIDFD : u64 = 0x00001000 ;
245
-
246
- #[ repr( C ) ]
247
- struct clone_args {
248
- flags : u64 ,
249
- pidfd : u64 ,
250
- child_tid : u64 ,
251
- parent_tid : u64 ,
252
- exit_signal : u64 ,
253
- stack : u64 ,
254
- stack_size : u64 ,
255
- tls : u64 ,
256
- set_tid : u64 ,
257
- set_tid_size : u64 ,
258
- cgroup : u64 ,
259
- }
260
-
261
- raw_syscall ! {
262
- fn clone3( cl_args: * mut clone_args, len: libc:: size_t) -> libc:: c_long
263
- }
264
-
265
- // Bypassing libc for `clone3` can make further libc calls unsafe,
266
- // so we use it sparingly for now. See #89522 for details.
267
- // Some tools (e.g. sandboxing tools) may also expect `fork`
268
- // rather than `clone3`.
269
- let want_clone3_pidfd = self . get_create_pidfd ( ) ;
270
-
271
- // If we fail to create a pidfd for any reason, this will
272
- // stay as -1, which indicates an error.
273
- let mut pidfd: pid_t = -1 ;
274
-
275
- // Attempt to use the `clone3` syscall, which supports more arguments
276
- // (in particular, the ability to create a pidfd). If this fails,
277
- // we will fall through this block to a call to `fork()`
278
- if want_clone3_pidfd && HAS_CLONE3 . load ( Ordering :: Relaxed ) {
279
- let mut args = clone_args {
280
- flags : CLONE_PIDFD ,
281
- pidfd : & mut pidfd as * mut pid_t as u64 ,
282
- child_tid : 0 ,
283
- parent_tid : 0 ,
284
- exit_signal : libc:: SIGCHLD as u64 ,
285
- stack : 0 ,
286
- stack_size : 0 ,
287
- tls : 0 ,
288
- set_tid : 0 ,
289
- set_tid_size : 0 ,
290
- cgroup : 0 ,
291
- } ;
292
-
293
- let args_ptr = & mut args as * mut clone_args ;
294
- let args_size = crate :: mem:: size_of :: < clone_args > ( ) ;
295
-
296
- let res = cvt ( clone3 ( args_ptr, args_size) ) ;
297
- match res {
298
- Ok ( n) => return Ok ( ( n as pid_t , pidfd) ) ,
299
- Err ( e) => match e. raw_os_error ( ) {
300
- // Multiple threads can race to execute this store,
301
- // but that's fine - that just means that multiple threads
302
- // will have tried and failed to execute the same syscall,
303
- // with no other side effects.
304
- Some ( libc:: ENOSYS ) => HAS_CLONE3 . store ( false , Ordering :: Relaxed ) ,
305
- // Fallback to fork if `EPERM` is returned. (e.g. blocked by seccomp)
306
- Some ( libc:: EPERM ) => { }
307
- _ => return Err ( e) ,
308
- } ,
309
- }
310
- }
311
-
312
- // Generally, we just call `fork`. If we get here after wanting `clone3`,
313
- // then the syscall does not exist or we do not have permission to call it.
314
- cvt ( libc:: fork ( ) ) . map ( |res| ( res, pidfd) )
315
- }
316
-
317
249
pub fn exec ( & mut self , default : Stdio ) -> io:: Error {
318
250
let envp = self . capture_env ( ) ;
319
251
@@ -722,6 +654,115 @@ impl Command {
722
654
Ok ( Some ( p) )
723
655
}
724
656
}
657
+
658
+ #[ cfg( target_os = "linux" ) ]
659
+ fn send_pidfd ( & self , sock : & crate :: sys:: net:: Socket ) {
660
+ use crate :: io:: IoSlice ;
661
+ use crate :: os:: fd:: RawFd ;
662
+ use crate :: sys:: cvt_r;
663
+ use libc:: { CMSG_DATA , CMSG_FIRSTHDR , CMSG_LEN , CMSG_SPACE , SCM_RIGHTS , SOL_SOCKET } ;
664
+
665
+ unsafe {
666
+ let child_pid = libc:: getpid ( ) ;
667
+ // pidfd_open sets CLOEXEC by default
668
+ let pidfd = libc:: syscall ( libc:: SYS_pidfd_open , child_pid, 0 ) ;
669
+
670
+ let fds: [ c_int ; 1 ] = [ pidfd as RawFd ] ;
671
+
672
+ const SCM_MSG_LEN : usize = mem:: size_of :: < [ c_int ; 1 ] > ( ) ;
673
+
674
+ #[ repr( C ) ]
675
+ union Cmsg {
676
+ buf : [ u8 ; unsafe { CMSG_SPACE ( SCM_MSG_LEN as u32 ) as usize } ] ,
677
+ _align : libc:: cmsghdr ,
678
+ }
679
+
680
+ let mut cmsg: Cmsg = mem:: zeroed ( ) ;
681
+
682
+ // 0-length message to send through the socket so we can pass along the fd
683
+ let mut iov = [ IoSlice :: new ( b"" ) ] ;
684
+ let mut msg: libc:: msghdr = mem:: zeroed ( ) ;
685
+
686
+ msg. msg_iov = & mut iov as * mut _ as * mut _ ;
687
+ msg. msg_iovlen = 1 ;
688
+ msg. msg_controllen = mem:: size_of_val ( & cmsg. buf ) as _ ;
689
+ msg. msg_control = & mut cmsg. buf as * mut _ as * mut _ ;
690
+
691
+ // only attach cmsg if we successfully acquired the pidfd
692
+ if pidfd >= 0 {
693
+ let hdr = CMSG_FIRSTHDR ( & mut msg as * mut _ as * mut _ ) ;
694
+ ( * hdr) . cmsg_level = SOL_SOCKET ;
695
+ ( * hdr) . cmsg_type = SCM_RIGHTS ;
696
+ ( * hdr) . cmsg_len = CMSG_LEN ( SCM_MSG_LEN as _ ) as _ ;
697
+ let data = CMSG_DATA ( hdr) ;
698
+ crate :: ptr:: copy_nonoverlapping (
699
+ fds. as_ptr ( ) . cast :: < u8 > ( ) ,
700
+ data as * mut _ ,
701
+ SCM_MSG_LEN ,
702
+ ) ;
703
+ }
704
+
705
+ // we send the 0-length message even if we failed to acquire the pidfd
706
+ // so we get a consistent SEQPACKET order
707
+ match cvt_r ( || libc:: sendmsg ( sock. as_raw ( ) , & msg, 0 ) ) {
708
+ Ok ( 0 ) => { }
709
+ _ => rtabort ! ( "failed to communicate with parent process" ) ,
710
+ }
711
+ }
712
+ }
713
+
714
+ #[ cfg( target_os = "linux" ) ]
715
+ fn recv_pidfd ( & self , sock : & crate :: sys:: net:: Socket ) -> pid_t {
716
+ use crate :: io:: IoSliceMut ;
717
+ use crate :: sys:: cvt_r;
718
+
719
+ use libc:: { CMSG_DATA , CMSG_FIRSTHDR , CMSG_LEN , CMSG_SPACE , SCM_RIGHTS , SOL_SOCKET } ;
720
+
721
+ unsafe {
722
+ const SCM_MSG_LEN : usize = mem:: size_of :: < [ c_int ; 1 ] > ( ) ;
723
+
724
+ #[ repr( C ) ]
725
+ union Cmsg {
726
+ _buf : [ u8 ; unsafe { CMSG_SPACE ( SCM_MSG_LEN as u32 ) as usize } ] ,
727
+ _align : libc:: cmsghdr ,
728
+ }
729
+ let mut cmsg: Cmsg = mem:: zeroed ( ) ;
730
+ // 0-length read to get the fd
731
+ let mut iov = [ IoSliceMut :: new ( & mut [ ] ) ] ;
732
+
733
+ let mut msg: libc:: msghdr = mem:: zeroed ( ) ;
734
+
735
+ msg. msg_iov = & mut iov as * mut _ as * mut _ ;
736
+ msg. msg_iovlen = 1 ;
737
+ msg. msg_controllen = mem:: size_of :: < Cmsg > ( ) as _ ;
738
+ msg. msg_control = & mut cmsg as * mut _ as * mut _ ;
739
+
740
+ match cvt_r ( || libc:: recvmsg ( sock. as_raw ( ) , & mut msg, 0 ) ) {
741
+ Err ( _) => return -1 ,
742
+ Ok ( _) => { }
743
+ }
744
+
745
+ let hdr = CMSG_FIRSTHDR ( & mut msg as * mut _ as * mut _ ) ;
746
+ if hdr. is_null ( )
747
+ || ( * hdr) . cmsg_level != SOL_SOCKET
748
+ || ( * hdr) . cmsg_type != SCM_RIGHTS
749
+ || ( * hdr) . cmsg_len != CMSG_LEN ( SCM_MSG_LEN as _ ) as _
750
+ {
751
+ return -1 ;
752
+ }
753
+ let data = CMSG_DATA ( hdr) ;
754
+
755
+ let mut fds = [ -1 as c_int ] ;
756
+
757
+ crate :: ptr:: copy_nonoverlapping (
758
+ data as * const _ ,
759
+ fds. as_mut_ptr ( ) . cast :: < u8 > ( ) ,
760
+ SCM_MSG_LEN ,
761
+ ) ;
762
+
763
+ fds[ 0 ]
764
+ }
765
+ }
725
766
}
726
767
727
768
////////////////////////////////////////////////////////////////////////////////
0 commit comments