move sendfile/splice/copy_file_range into kernel_copy module
This commit is contained in:
parent
888b1031bc
commit
3dfc377aa1
3 changed files with 150 additions and 153 deletions
|
@ -1195,6 +1195,8 @@ pub fn copy(from: &Path, to: &Path) -> io::Result<u64> {
|
||||||
let max_len = u64::MAX;
|
let max_len = u64::MAX;
|
||||||
let (mut writer, _) = open_to_and_set_permissions(to, reader_metadata)?;
|
let (mut writer, _) = open_to_and_set_permissions(to, reader_metadata)?;
|
||||||
|
|
||||||
|
use super::kernel_copy::{copy_regular_files, CopyResult};
|
||||||
|
|
||||||
match copy_regular_files(reader.as_raw_fd(), writer.as_raw_fd(), max_len) {
|
match copy_regular_files(reader.as_raw_fd(), writer.as_raw_fd(), max_len) {
|
||||||
CopyResult::Ended(result) => result,
|
CopyResult::Ended(result) => result,
|
||||||
CopyResult::Fallback(written) => {
|
CopyResult::Fallback(written) => {
|
||||||
|
@ -1206,155 +1208,6 @@ pub fn copy(from: &Path, to: &Path) -> io::Result<u64> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// linux-specific implementation that will attempt to use copy_file_range for copy offloading
|
|
||||||
/// as the name says, it only works on regular files
|
|
||||||
///
|
|
||||||
/// Callers must handle fallback to a generic copy loop.
|
|
||||||
/// `Fallback` may indicate non-zero number of bytes already written
|
|
||||||
/// if one of the files' cursor +`max_len` would exceed u64::MAX (`EOVERFLOW`).
|
|
||||||
/// If the initial file offset was 0 then `Fallback` will only contain `0`.
|
|
||||||
#[cfg(any(target_os = "linux", target_os = "android"))]
|
|
||||||
pub(super) fn copy_regular_files(reader: RawFd, writer: RawFd, max_len: u64) -> CopyResult {
|
|
||||||
use crate::cmp;
|
|
||||||
use crate::sync::atomic::{AtomicBool, Ordering};
|
|
||||||
|
|
||||||
// Kernel prior to 4.5 don't have copy_file_range
|
|
||||||
// We store the availability in a global to avoid unnecessary syscalls
|
|
||||||
static HAS_COPY_FILE_RANGE: AtomicBool = AtomicBool::new(true);
|
|
||||||
|
|
||||||
unsafe fn copy_file_range(
|
|
||||||
fd_in: libc::c_int,
|
|
||||||
off_in: *mut libc::loff_t,
|
|
||||||
fd_out: libc::c_int,
|
|
||||||
off_out: *mut libc::loff_t,
|
|
||||||
len: libc::size_t,
|
|
||||||
flags: libc::c_uint,
|
|
||||||
) -> libc::c_long {
|
|
||||||
libc::syscall(libc::SYS_copy_file_range, fd_in, off_in, fd_out, off_out, len, flags)
|
|
||||||
}
|
|
||||||
|
|
||||||
let has_copy_file_range = HAS_COPY_FILE_RANGE.load(Ordering::Relaxed);
|
|
||||||
let mut written = 0u64;
|
|
||||||
while written < max_len {
|
|
||||||
let copy_result = if has_copy_file_range {
|
|
||||||
let bytes_to_copy = cmp::min(max_len - written, usize::MAX as u64);
|
|
||||||
// cap to 2GB chunks in case u64::MAX is passed in as file size and the file has a non-zero offset
|
|
||||||
// this allows us to copy large chunks without hitting the limit,
|
|
||||||
// unless someone sets a file offset close to u64::MAX - 2GB, in which case the fallback would kick in
|
|
||||||
let bytes_to_copy = cmp::min(bytes_to_copy as usize, 0x8000_0000usize);
|
|
||||||
let copy_result = unsafe {
|
|
||||||
// We actually don't have to adjust the offsets,
|
|
||||||
// because copy_file_range adjusts the file offset automatically
|
|
||||||
cvt(copy_file_range(
|
|
||||||
reader,
|
|
||||||
ptr::null_mut(),
|
|
||||||
writer,
|
|
||||||
ptr::null_mut(),
|
|
||||||
bytes_to_copy,
|
|
||||||
0,
|
|
||||||
))
|
|
||||||
};
|
|
||||||
if let Err(ref copy_err) = copy_result {
|
|
||||||
match copy_err.raw_os_error() {
|
|
||||||
Some(libc::ENOSYS | libc::EPERM | libc::EOPNOTSUPP) => {
|
|
||||||
HAS_COPY_FILE_RANGE.store(false, Ordering::Relaxed);
|
|
||||||
}
|
|
||||||
_ => {}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
copy_result
|
|
||||||
} else {
|
|
||||||
Err(io::Error::from_raw_os_error(libc::ENOSYS))
|
|
||||||
};
|
|
||||||
match copy_result {
|
|
||||||
Ok(0) if written == 0 => {
|
|
||||||
// fallback to work around several kernel bugs where copy_file_range will fail to
|
|
||||||
// copy any bytes and return 0 instead of an error if
|
|
||||||
// - reading virtual files from the proc filesystem which appear to have 0 size
|
|
||||||
// but are not empty. noted in coreutils to affect kernels at least up to 5.6.19.
|
|
||||||
// - copying from an overlay filesystem in docker. reported to occur on fedora 32.
|
|
||||||
return CopyResult::Fallback(0);
|
|
||||||
}
|
|
||||||
Ok(0) => return CopyResult::Ended(Ok(written)), // reached EOF
|
|
||||||
Ok(ret) => written += ret as u64,
|
|
||||||
Err(err) => {
|
|
||||||
match err.raw_os_error() {
|
|
||||||
// when file offset + max_length > u64::MAX
|
|
||||||
Some(libc::EOVERFLOW) => return CopyResult::Fallback(written),
|
|
||||||
Some(
|
|
||||||
libc::ENOSYS | libc::EXDEV | libc::EINVAL | libc::EPERM | libc::EOPNOTSUPP,
|
|
||||||
) => {
|
|
||||||
// Try fallback io::copy if either:
|
|
||||||
// - Kernel version is < 4.5 (ENOSYS)
|
|
||||||
// - Files are mounted on different fs (EXDEV)
|
|
||||||
// - copy_file_range is broken in various ways on RHEL/CentOS 7 (EOPNOTSUPP)
|
|
||||||
// - copy_file_range is disallowed, for example by seccomp (EPERM)
|
|
||||||
// - copy_file_range cannot be used with pipes or device nodes (EINVAL)
|
|
||||||
assert_eq!(written, 0);
|
|
||||||
return CopyResult::Fallback(0);
|
|
||||||
}
|
|
||||||
_ => return CopyResult::Ended(Err(err)),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
CopyResult::Ended(Ok(written))
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(PartialEq)]
|
|
||||||
pub(super) enum SpliceMode {
|
|
||||||
Sendfile,
|
|
||||||
Splice,
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(super) enum CopyResult {
|
|
||||||
Ended(io::Result<u64>),
|
|
||||||
Fallback(u64),
|
|
||||||
}
|
|
||||||
|
|
||||||
/// performs splice or sendfile between file descriptors
|
|
||||||
/// Does _not_ fall back to a generic copy loop.
|
|
||||||
#[cfg(any(target_os = "linux", target_os = "android"))]
|
|
||||||
pub(super) fn sendfile_splice(
|
|
||||||
mode: SpliceMode,
|
|
||||||
reader: RawFd,
|
|
||||||
writer: RawFd,
|
|
||||||
len: u64,
|
|
||||||
) -> CopyResult {
|
|
||||||
let mut written = 0u64;
|
|
||||||
while written < len {
|
|
||||||
let chunk_size = crate::cmp::min(len - written, 0x7ffff000_u64) as usize;
|
|
||||||
|
|
||||||
let result = match mode {
|
|
||||||
SpliceMode::Sendfile => {
|
|
||||||
cvt(unsafe { libc::sendfile(writer, reader, ptr::null_mut(), chunk_size) })
|
|
||||||
}
|
|
||||||
SpliceMode::Splice => cvt(unsafe {
|
|
||||||
libc::splice(reader, ptr::null_mut(), writer, ptr::null_mut(), chunk_size, 0)
|
|
||||||
}),
|
|
||||||
};
|
|
||||||
|
|
||||||
match result {
|
|
||||||
Ok(0) => break, // EOF
|
|
||||||
Ok(ret) => written += ret as u64,
|
|
||||||
Err(err) => {
|
|
||||||
match err.raw_os_error() {
|
|
||||||
Some(os_err) if os_err == libc::EINVAL => {
|
|
||||||
// splice/sendfile do not support this particular file descritor (EINVAL)
|
|
||||||
assert_eq!(written, 0);
|
|
||||||
return CopyResult::Fallback(0);
|
|
||||||
}
|
|
||||||
Some(os_err) if mode == SpliceMode::Sendfile && os_err == libc::EOVERFLOW => {
|
|
||||||
return CopyResult::Fallback(written);
|
|
||||||
}
|
|
||||||
_ => return CopyResult::Ended(Err(err)),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
CopyResult::Ended(Ok(written))
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(any(target_os = "macos", target_os = "ios"))]
|
#[cfg(any(target_os = "macos", target_os = "ios"))]
|
||||||
pub fn copy(from: &Path, to: &Path) -> io::Result<u64> {
|
pub fn copy(from: &Path, to: &Path) -> io::Result<u64> {
|
||||||
use crate::sync::atomic::{AtomicBool, Ordering};
|
use crate::sync::atomic::{AtomicBool, Ordering};
|
||||||
|
|
|
@ -49,14 +49,16 @@ use crate::convert::TryInto;
|
||||||
use crate::fs::{File, Metadata};
|
use crate::fs::{File, Metadata};
|
||||||
use crate::io::copy::generic_copy;
|
use crate::io::copy::generic_copy;
|
||||||
use crate::io::{
|
use crate::io::{
|
||||||
BufRead, BufReader, BufWriter, Read, Result, StderrLock, StdinLock, StdoutLock, Take, Write,
|
BufRead, BufReader, BufWriter, Error, Read, Result, StderrLock, StdinLock, StdoutLock, Take,
|
||||||
|
Write,
|
||||||
};
|
};
|
||||||
use crate::mem::ManuallyDrop;
|
use crate::mem::ManuallyDrop;
|
||||||
use crate::net::TcpStream;
|
use crate::net::TcpStream;
|
||||||
use crate::os::unix::fs::FileTypeExt;
|
use crate::os::unix::fs::FileTypeExt;
|
||||||
use crate::os::unix::io::{AsRawFd, FromRawFd, RawFd};
|
use crate::os::unix::io::{AsRawFd, FromRawFd, RawFd};
|
||||||
use crate::process::{ChildStderr, ChildStdin, ChildStdout};
|
use crate::process::{ChildStderr, ChildStdin, ChildStdout};
|
||||||
use crate::sys::fs::{copy_regular_files, sendfile_splice, CopyResult, SpliceMode};
|
use crate::ptr;
|
||||||
|
use crate::sys::cvt;
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests;
|
mod tests;
|
||||||
|
@ -423,3 +425,145 @@ fn fd_to_meta<T: AsRawFd>(fd: &T) -> FdMeta {
|
||||||
Err(_) => FdMeta::NoneObtained,
|
Err(_) => FdMeta::NoneObtained,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub(super) enum CopyResult {
|
||||||
|
Ended(Result<u64>),
|
||||||
|
Fallback(u64),
|
||||||
|
}
|
||||||
|
|
||||||
|
/// linux-specific implementation that will attempt to use copy_file_range for copy offloading
|
||||||
|
/// as the name says, it only works on regular files
|
||||||
|
///
|
||||||
|
/// Callers must handle fallback to a generic copy loop.
|
||||||
|
/// `Fallback` may indicate non-zero number of bytes already written
|
||||||
|
/// if one of the files' cursor +`max_len` would exceed u64::MAX (`EOVERFLOW`).
|
||||||
|
/// If the initial file offset was 0 then `Fallback` will only contain `0`.
|
||||||
|
pub(super) fn copy_regular_files(reader: RawFd, writer: RawFd, max_len: u64) -> CopyResult {
|
||||||
|
use crate::cmp;
|
||||||
|
use crate::sync::atomic::{AtomicBool, Ordering};
|
||||||
|
|
||||||
|
// Kernel prior to 4.5 don't have copy_file_range
|
||||||
|
// We store the availability in a global to avoid unnecessary syscalls
|
||||||
|
static HAS_COPY_FILE_RANGE: AtomicBool = AtomicBool::new(true);
|
||||||
|
|
||||||
|
unsafe fn copy_file_range(
|
||||||
|
fd_in: libc::c_int,
|
||||||
|
off_in: *mut libc::loff_t,
|
||||||
|
fd_out: libc::c_int,
|
||||||
|
off_out: *mut libc::loff_t,
|
||||||
|
len: libc::size_t,
|
||||||
|
flags: libc::c_uint,
|
||||||
|
) -> libc::c_long {
|
||||||
|
libc::syscall(libc::SYS_copy_file_range, fd_in, off_in, fd_out, off_out, len, flags)
|
||||||
|
}
|
||||||
|
|
||||||
|
let has_copy_file_range = HAS_COPY_FILE_RANGE.load(Ordering::Relaxed);
|
||||||
|
let mut written = 0u64;
|
||||||
|
while written < max_len {
|
||||||
|
let copy_result = if has_copy_file_range {
|
||||||
|
let bytes_to_copy = cmp::min(max_len - written, usize::MAX as u64);
|
||||||
|
// cap to 2GB chunks in case u64::MAX is passed in as file size and the file has a non-zero offset
|
||||||
|
// this allows us to copy large chunks without hitting the limit,
|
||||||
|
// unless someone sets a file offset close to u64::MAX - 2GB, in which case the fallback would kick in
|
||||||
|
let bytes_to_copy = cmp::min(bytes_to_copy as usize, 0x8000_0000usize);
|
||||||
|
let copy_result = unsafe {
|
||||||
|
// We actually don't have to adjust the offsets,
|
||||||
|
// because copy_file_range adjusts the file offset automatically
|
||||||
|
cvt(copy_file_range(
|
||||||
|
reader,
|
||||||
|
ptr::null_mut(),
|
||||||
|
writer,
|
||||||
|
ptr::null_mut(),
|
||||||
|
bytes_to_copy,
|
||||||
|
0,
|
||||||
|
))
|
||||||
|
};
|
||||||
|
if let Err(ref copy_err) = copy_result {
|
||||||
|
match copy_err.raw_os_error() {
|
||||||
|
Some(libc::ENOSYS | libc::EPERM | libc::EOPNOTSUPP) => {
|
||||||
|
HAS_COPY_FILE_RANGE.store(false, Ordering::Relaxed);
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
copy_result
|
||||||
|
} else {
|
||||||
|
Err(Error::from_raw_os_error(libc::ENOSYS))
|
||||||
|
};
|
||||||
|
match copy_result {
|
||||||
|
Ok(0) if written == 0 => {
|
||||||
|
// fallback to work around several kernel bugs where copy_file_range will fail to
|
||||||
|
// copy any bytes and return 0 instead of an error if
|
||||||
|
// - reading virtual files from the proc filesystem which appear to have 0 size
|
||||||
|
// but are not empty. noted in coreutils to affect kernels at least up to 5.6.19.
|
||||||
|
// - copying from an overlay filesystem in docker. reported to occur on fedora 32.
|
||||||
|
return CopyResult::Fallback(0);
|
||||||
|
}
|
||||||
|
Ok(0) => return CopyResult::Ended(Ok(written)), // reached EOF
|
||||||
|
Ok(ret) => written += ret as u64,
|
||||||
|
Err(err) => {
|
||||||
|
return match err.raw_os_error() {
|
||||||
|
// when file offset + max_length > u64::MAX
|
||||||
|
Some(libc::EOVERFLOW) => CopyResult::Fallback(written),
|
||||||
|
Some(
|
||||||
|
libc::ENOSYS | libc::EXDEV | libc::EINVAL | libc::EPERM | libc::EOPNOTSUPP,
|
||||||
|
) => {
|
||||||
|
// Try fallback io::copy if either:
|
||||||
|
// - Kernel version is < 4.5 (ENOSYS)
|
||||||
|
// - Files are mounted on different fs (EXDEV)
|
||||||
|
// - copy_file_range is broken in various ways on RHEL/CentOS 7 (EOPNOTSUPP)
|
||||||
|
// - copy_file_range is disallowed, for example by seccomp (EPERM)
|
||||||
|
// - copy_file_range cannot be used with pipes or device nodes (EINVAL)
|
||||||
|
assert_eq!(written, 0);
|
||||||
|
CopyResult::Fallback(0)
|
||||||
|
}
|
||||||
|
_ => CopyResult::Ended(Err(err)),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
CopyResult::Ended(Ok(written))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(PartialEq)]
|
||||||
|
enum SpliceMode {
|
||||||
|
Sendfile,
|
||||||
|
Splice,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// performs splice or sendfile between file descriptors
|
||||||
|
/// Does _not_ fall back to a generic copy loop.
|
||||||
|
fn sendfile_splice(mode: SpliceMode, reader: RawFd, writer: RawFd, len: u64) -> CopyResult {
|
||||||
|
let mut written = 0u64;
|
||||||
|
while written < len {
|
||||||
|
let chunk_size = crate::cmp::min(len - written, 0x7ffff000_u64) as usize;
|
||||||
|
|
||||||
|
let result = match mode {
|
||||||
|
SpliceMode::Sendfile => {
|
||||||
|
cvt(unsafe { libc::sendfile(writer, reader, ptr::null_mut(), chunk_size) })
|
||||||
|
}
|
||||||
|
SpliceMode::Splice => cvt(unsafe {
|
||||||
|
libc::splice(reader, ptr::null_mut(), writer, ptr::null_mut(), chunk_size, 0)
|
||||||
|
}),
|
||||||
|
};
|
||||||
|
|
||||||
|
match result {
|
||||||
|
Ok(0) => break, // EOF
|
||||||
|
Ok(ret) => written += ret as u64,
|
||||||
|
Err(err) => {
|
||||||
|
return match err.raw_os_error() {
|
||||||
|
Some(os_err) if os_err == libc::EINVAL => {
|
||||||
|
// splice/sendfile do not support this particular file descritor (EINVAL)
|
||||||
|
assert_eq!(written, 0);
|
||||||
|
CopyResult::Fallback(0)
|
||||||
|
}
|
||||||
|
Some(os_err) if mode == SpliceMode::Sendfile && os_err == libc::EOVERFLOW => {
|
||||||
|
CopyResult::Fallback(written)
|
||||||
|
}
|
||||||
|
_ => CopyResult::Ended(Err(err)),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
CopyResult::Ended(Ok(written))
|
||||||
|
}
|
||||||
|
|
|
@ -159,8 +159,8 @@ fn bench_socket_pipe_socket_copy(b: &mut test::Bencher) {
|
||||||
let local_source = local_end.clone();
|
let local_source = local_end.clone();
|
||||||
crate::thread::spawn(move || {
|
crate::thread::spawn(move || {
|
||||||
loop {
|
loop {
|
||||||
crate::sys::fs::sendfile_splice(
|
super::sendfile_splice(
|
||||||
crate::sys::fs::SpliceMode::Splice,
|
super::SpliceMode::Splice,
|
||||||
local_source.as_raw_fd(),
|
local_source.as_raw_fd(),
|
||||||
write_end.as_raw_fd(),
|
write_end.as_raw_fd(),
|
||||||
u64::MAX,
|
u64::MAX,
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue