@@ -59,7 +59,7 @@ use crate::os::unix::io::{AsRawFd, FromRawFd, RawFd};
5959use crate :: os:: unix:: net:: UnixStream ;
6060use crate :: process:: { ChildStderr , ChildStdin , ChildStdout } ;
6161use crate :: ptr;
62- use crate :: sync:: atomic:: { AtomicBool , Ordering } ;
62+ use crate :: sync:: atomic:: { AtomicBool , AtomicU8 , Ordering } ;
6363use crate :: sys:: cvt;
6464
6565#[ cfg( test) ]
@@ -491,18 +491,29 @@ impl CopyResult {
491491 }
492492}
493493
494- /// linux-specific implementation that will attempt to use copy_file_range for copy offloading
495- /// as the name says, it only works on regular files
494+ /// Invalid file descriptor.
495+ ///
496+ /// Valid file descriptors are guaranteed to be positive numbers (see `open()` manpage)
497+ /// while negative values are used to indicate errors.
498+ /// Thus -1 will never be overlap with a valid open file.
499+ const INVALID_FD : RawFd = -1 ;
500+
501+ /// Linux-specific implementation that will attempt to use copy_file_range for copy offloading.
502+ /// As the name says, it only works on regular files.
496503///
497504/// Callers must handle fallback to a generic copy loop.
498505/// `Fallback` may indicate non-zero number of bytes already written
499506/// if one of the files' cursor +`max_len` would exceed u64::MAX (`EOVERFLOW`).
500507pub ( super ) fn copy_regular_files ( reader : RawFd , writer : RawFd , max_len : u64 ) -> CopyResult {
501508 use crate :: cmp;
502509
510+ const NOT_PROBED : u8 = 0 ;
511+ const UNAVAILABLE : u8 = 1 ;
512+ const AVAILABLE : u8 = 2 ;
513+
503514 // Kernel prior to 4.5 don't have copy_file_range
504515 // We store the availability in a global to avoid unnecessary syscalls
505- static HAS_COPY_FILE_RANGE : AtomicBool = AtomicBool :: new ( true ) ;
516+ static HAS_COPY_FILE_RANGE : AtomicU8 = AtomicU8 :: new ( NOT_PROBED ) ;
506517
507518 syscall ! {
508519 fn copy_file_range(
@@ -515,39 +526,39 @@ pub(super) fn copy_regular_files(reader: RawFd, writer: RawFd, max_len: u64) ->
515526 ) -> libc:: ssize_t
516527 }
517528
518- let has_copy_file_range = HAS_COPY_FILE_RANGE . load ( Ordering :: Relaxed ) ;
519- let mut written = 0u64 ;
520- while written < max_len {
521- let copy_result = if has_copy_file_range {
522- let bytes_to_copy = cmp:: min ( max_len - written, usize:: MAX as u64 ) ;
523- // cap to 1GB chunks in case u64::MAX is passed as max_len and the file has a non-zero seek position
524- // this allows us to copy large chunks without hitting EOVERFLOW,
525- // unless someone sets a file offset close to u64::MAX - 1GB, in which case a fallback would be required
526- let bytes_to_copy = cmp:: min ( bytes_to_copy as usize , 0x4000_0000usize ) ;
527- let copy_result = unsafe {
528- // We actually don't have to adjust the offsets,
529- // because copy_file_range adjusts the file offset automatically
530- cvt ( copy_file_range (
531- reader,
532- ptr:: null_mut ( ) ,
533- writer,
534- ptr:: null_mut ( ) ,
535- bytes_to_copy,
536- 0 ,
537- ) )
529+ match HAS_COPY_FILE_RANGE . load ( Ordering :: Relaxed ) {
530+ NOT_PROBED => {
531+ // EPERM can indicate seccomp filters or an immutable file.
532+ // To distinguish these cases we probe with invalid file descriptors which should result in EBADF if the syscall is supported
533+ // and some other error (ENOSYS or EPERM) if it's not available
534+ let result = unsafe {
535+ cvt ( copy_file_range ( INVALID_FD , ptr:: null_mut ( ) , INVALID_FD , ptr:: null_mut ( ) , 1 , 0 ) )
538536 } ;
539- if let Err ( ref copy_err) = copy_result {
540- match copy_err. raw_os_error ( ) {
541- Some ( libc:: ENOSYS | libc:: EPERM | libc:: EOPNOTSUPP ) => {
542- HAS_COPY_FILE_RANGE . store ( false , Ordering :: Relaxed ) ;
543- }
544- _ => { }
545- }
537+
538+ if matches ! ( result. map_err( |e| e. raw_os_error( ) ) , Err ( Some ( libc:: EBADF ) ) ) {
539+ HAS_COPY_FILE_RANGE . store ( AVAILABLE , Ordering :: Relaxed ) ;
540+ } else {
541+ HAS_COPY_FILE_RANGE . store ( UNAVAILABLE , Ordering :: Relaxed ) ;
542+ return CopyResult :: Fallback ( 0 ) ;
546543 }
547- copy_result
548- } else {
549- Err ( Error :: from_raw_os_error ( libc:: ENOSYS ) )
544+ }
545+ UNAVAILABLE => return CopyResult :: Fallback ( 0 ) ,
546+ _ => { }
547+ } ;
548+
549+ let mut written = 0u64 ;
550+ while written < max_len {
551+ let bytes_to_copy = cmp:: min ( max_len - written, usize:: MAX as u64 ) ;
552+ // cap to 1GB chunks in case u64::MAX is passed as max_len and the file has a non-zero seek position
553+ // this allows us to copy large chunks without hitting EOVERFLOW,
554+ // unless someone sets a file offset close to u64::MAX - 1GB, in which case a fallback would be required
555+ let bytes_to_copy = cmp:: min ( bytes_to_copy as usize , 0x4000_0000usize ) ;
556+ let copy_result = unsafe {
557+ // We actually don't have to adjust the offsets,
558+ // because copy_file_range adjusts the file offset automatically
559+ cvt ( copy_file_range ( reader, ptr:: null_mut ( ) , writer, ptr:: null_mut ( ) , bytes_to_copy, 0 ) )
550560 } ;
561+
551562 match copy_result {
552563 Ok ( 0 ) if written == 0 => {
553564 // fallback to work around several kernel bugs where copy_file_range will fail to
@@ -567,11 +578,14 @@ pub(super) fn copy_regular_files(reader: RawFd, writer: RawFd, max_len: u64) ->
567578 libc:: ENOSYS | libc:: EXDEV | libc:: EINVAL | libc:: EPERM | libc:: EOPNOTSUPP ,
568579 ) => {
569580 // Try fallback io::copy if either:
570- // - Kernel version is < 4.5 (ENOSYS)
581+ // - Kernel version is < 4.5 (ENOSYS¹ )
571582 // - Files are mounted on different fs (EXDEV)
572583 // - copy_file_range is broken in various ways on RHEL/CentOS 7 (EOPNOTSUPP)
573- // - copy_file_range is disallowed, for example by seccomp (EPERM)
584+ // - copy_file_range file is immutable or syscall is blocked by seccomp¹ (EPERM)
574585 // - copy_file_range cannot be used with pipes or device nodes (EINVAL)
586+ //
587+ // ¹ these cases should be detected by the initial probe but we handle them here
588+ // anyway in case syscall interception changes during runtime
575589 assert_eq ! ( written, 0 ) ;
576590 CopyResult :: Fallback ( 0 )
577591 }
0 commit comments