@@ -49,14 +49,16 @@ use crate::convert::TryInto;
4949use crate :: fs:: { File , Metadata } ;
5050use crate :: io:: copy:: generic_copy;
5151use crate :: io:: {
52- BufRead , BufReader , BufWriter , Read , Result , StderrLock , StdinLock , StdoutLock , Take , Write ,
52+ BufRead , BufReader , BufWriter , Error , Read , Result , StderrLock , StdinLock , StdoutLock , Take ,
53+ Write ,
5354} ;
5455use crate :: mem:: ManuallyDrop ;
5556use crate :: net:: TcpStream ;
5657use crate :: os:: unix:: fs:: FileTypeExt ;
5758use crate :: os:: unix:: io:: { AsRawFd , FromRawFd , RawFd } ;
5859use crate :: process:: { ChildStderr , ChildStdin , ChildStdout } ;
59- use crate :: sys:: fs:: { copy_regular_files, sendfile_splice, CopyResult , SpliceMode } ;
60+ use crate :: ptr;
61+ use crate :: sys:: cvt;
6062
6163#[ cfg( test) ]
6264mod tests;
@@ -423,3 +425,145 @@ fn fd_to_meta<T: AsRawFd>(fd: &T) -> FdMeta {
423425 Err ( _) => FdMeta :: NoneObtained ,
424426 }
425427}
428+
429+ pub ( super ) enum CopyResult {
430+ Ended ( Result < u64 > ) ,
431+ Fallback ( u64 ) ,
432+ }
433+
434+ /// linux-specific implementation that will attempt to use copy_file_range for copy offloading
435+ /// as the name says, it only works on regular files
436+ ///
437+ /// Callers must handle fallback to a generic copy loop.
438+ /// `Fallback` may indicate non-zero number of bytes already written
439+ /// if one of the files' cursor +`max_len` would exceed u64::MAX (`EOVERFLOW`).
440+ /// If the initial file offset was 0 then `Fallback` will only contain `0`.
441+ pub ( super ) fn copy_regular_files ( reader : RawFd , writer : RawFd , max_len : u64 ) -> CopyResult {
442+ use crate :: cmp;
443+ use crate :: sync:: atomic:: { AtomicBool , Ordering } ;
444+
445+ // Kernel prior to 4.5 don't have copy_file_range
446+ // We store the availability in a global to avoid unnecessary syscalls
447+ static HAS_COPY_FILE_RANGE : AtomicBool = AtomicBool :: new ( true ) ;
448+
449+ unsafe fn copy_file_range (
450+ fd_in : libc:: c_int ,
451+ off_in : * mut libc:: loff_t ,
452+ fd_out : libc:: c_int ,
453+ off_out : * mut libc:: loff_t ,
454+ len : libc:: size_t ,
455+ flags : libc:: c_uint ,
456+ ) -> libc:: c_long {
457+ libc:: syscall ( libc:: SYS_copy_file_range , fd_in, off_in, fd_out, off_out, len, flags)
458+ }
459+
460+ let has_copy_file_range = HAS_COPY_FILE_RANGE . load ( Ordering :: Relaxed ) ;
461+ let mut written = 0u64 ;
462+ while written < max_len {
463+ let copy_result = if has_copy_file_range {
464+ let bytes_to_copy = cmp:: min ( max_len - written, usize:: MAX as u64 ) ;
465+ // cap to 2GB chunks in case u64::MAX is passed in as file size and the file has a non-zero offset
466+ // this allows us to copy large chunks without hitting the limit,
467+ // unless someone sets a file offset close to u64::MAX - 2GB, in which case the fallback would kick in
468+ let bytes_to_copy = cmp:: min ( bytes_to_copy as usize , 0x8000_0000usize ) ;
469+ let copy_result = unsafe {
470+ // We actually don't have to adjust the offsets,
471+ // because copy_file_range adjusts the file offset automatically
472+ cvt ( copy_file_range (
473+ reader,
474+ ptr:: null_mut ( ) ,
475+ writer,
476+ ptr:: null_mut ( ) ,
477+ bytes_to_copy,
478+ 0 ,
479+ ) )
480+ } ;
481+ if let Err ( ref copy_err) = copy_result {
482+ match copy_err. raw_os_error ( ) {
483+ Some ( libc:: ENOSYS | libc:: EPERM | libc:: EOPNOTSUPP ) => {
484+ HAS_COPY_FILE_RANGE . store ( false , Ordering :: Relaxed ) ;
485+ }
486+ _ => { }
487+ }
488+ }
489+ copy_result
490+ } else {
491+ Err ( Error :: from_raw_os_error ( libc:: ENOSYS ) )
492+ } ;
493+ match copy_result {
494+ Ok ( 0 ) if written == 0 => {
495+ // fallback to work around several kernel bugs where copy_file_range will fail to
496+ // copy any bytes and return 0 instead of an error if
497+ // - reading virtual files from the proc filesystem which appear to have 0 size
498+ // but are not empty. noted in coreutils to affect kernels at least up to 5.6.19.
499+ // - copying from an overlay filesystem in docker. reported to occur on fedora 32.
500+ return CopyResult :: Fallback ( 0 ) ;
501+ }
502+ Ok ( 0 ) => return CopyResult :: Ended ( Ok ( written) ) , // reached EOF
503+ Ok ( ret) => written += ret as u64 ,
504+ Err ( err) => {
505+ return match err. raw_os_error ( ) {
506+ // when file offset + max_length > u64::MAX
507+ Some ( libc:: EOVERFLOW ) => CopyResult :: Fallback ( written) ,
508+ Some (
509+ libc:: ENOSYS | libc:: EXDEV | libc:: EINVAL | libc:: EPERM | libc:: EOPNOTSUPP ,
510+ ) => {
511+ // Try fallback io::copy if either:
512+ // - Kernel version is < 4.5 (ENOSYS)
513+ // - Files are mounted on different fs (EXDEV)
514+ // - copy_file_range is broken in various ways on RHEL/CentOS 7 (EOPNOTSUPP)
515+ // - copy_file_range is disallowed, for example by seccomp (EPERM)
516+ // - copy_file_range cannot be used with pipes or device nodes (EINVAL)
517+ assert_eq ! ( written, 0 ) ;
518+ CopyResult :: Fallback ( 0 )
519+ }
520+ _ => CopyResult :: Ended ( Err ( err) ) ,
521+ } ;
522+ }
523+ }
524+ }
525+ CopyResult :: Ended ( Ok ( written) )
526+ }
527+
528+ #[ derive( PartialEq ) ]
529+ enum SpliceMode {
530+ Sendfile ,
531+ Splice ,
532+ }
533+
534+ /// performs splice or sendfile between file descriptors
535+ /// Does _not_ fall back to a generic copy loop.
536+ fn sendfile_splice ( mode : SpliceMode , reader : RawFd , writer : RawFd , len : u64 ) -> CopyResult {
537+ let mut written = 0u64 ;
538+ while written < len {
539+ let chunk_size = crate :: cmp:: min ( len - written, 0x7ffff000_u64 ) as usize ;
540+
541+ let result = match mode {
542+ SpliceMode :: Sendfile => {
543+ cvt ( unsafe { libc:: sendfile ( writer, reader, ptr:: null_mut ( ) , chunk_size) } )
544+ }
545+ SpliceMode :: Splice => cvt ( unsafe {
546+ libc:: splice ( reader, ptr:: null_mut ( ) , writer, ptr:: null_mut ( ) , chunk_size, 0 )
547+ } ) ,
548+ } ;
549+
550+ match result {
551+ Ok ( 0 ) => break , // EOF
552+ Ok ( ret) => written += ret as u64 ,
553+ Err ( err) => {
554+ return match err. raw_os_error ( ) {
555+ Some ( os_err) if os_err == libc:: EINVAL => {
556+ // splice/sendfile do not support this particular file descritor (EINVAL)
557+ assert_eq ! ( written, 0 ) ;
558+ CopyResult :: Fallback ( 0 )
559+ }
560+ Some ( os_err) if mode == SpliceMode :: Sendfile && os_err == libc:: EOVERFLOW => {
561+ CopyResult :: Fallback ( written)
562+ }
563+ _ => CopyResult :: Ended ( Err ( err) ) ,
564+ } ;
565+ }
566+ }
567+ }
568+ CopyResult :: Ended ( Ok ( written) )
569+ }
0 commit comments