Auto merge of #97925 - the8472:cgroupv1, r=joshtriplett
Add cgroupv1 support to available_parallelism Fixes #97549 My dev machine uses cgroup v2 so I was only able to test that code path. So the v1 code path is written only based on documentation. I could use some help testing that it works on a machine with cgroups v1: ``` $ x.py build --stage 1 # quota.rs fn main() { println!("{:?}", std:🧵:available_parallelism()); } # assuming stage1 is linked in rustup $ rust +stage1 quota.rs # spawn a new cgroup scope for the current user $ sudo systemd-run -p CPUQuota="300%" --uid=$(id -u) -tdS # should print Ok(3) $ ./quota ``` If it doesn't work as expected an strace, the contents of `/proc/self/cgroups` and the structure of `/sys/fs/cgroups` would help.
This commit is contained in:
commit
e55c53c57e
3 changed files with 195 additions and 46 deletions
|
@ -276,6 +276,7 @@
|
||||||
#![feature(hasher_prefixfree_extras)]
|
#![feature(hasher_prefixfree_extras)]
|
||||||
#![feature(hashmap_internals)]
|
#![feature(hashmap_internals)]
|
||||||
#![feature(int_error_internals)]
|
#![feature(int_error_internals)]
|
||||||
|
#![feature(is_some_with)]
|
||||||
#![feature(maybe_uninit_slice)]
|
#![feature(maybe_uninit_slice)]
|
||||||
#![feature(maybe_uninit_write_slice)]
|
#![feature(maybe_uninit_write_slice)]
|
||||||
#![feature(mixed_integer_ops)]
|
#![feature(mixed_integer_ops)]
|
||||||
|
|
|
@ -285,7 +285,7 @@ pub fn available_parallelism() -> io::Result<NonZeroUsize> {
|
||||||
))] {
|
))] {
|
||||||
#[cfg(any(target_os = "android", target_os = "linux"))]
|
#[cfg(any(target_os = "android", target_os = "linux"))]
|
||||||
{
|
{
|
||||||
let quota = cgroup2_quota().max(1);
|
let quota = cgroups::quota().max(1);
|
||||||
let mut set: libc::cpu_set_t = unsafe { mem::zeroed() };
|
let mut set: libc::cpu_set_t = unsafe { mem::zeroed() };
|
||||||
unsafe {
|
unsafe {
|
||||||
if libc::sched_getaffinity(0, mem::size_of::<libc::cpu_set_t>(), &mut set) == 0 {
|
if libc::sched_getaffinity(0, mem::size_of::<libc::cpu_set_t>(), &mut set) == 0 {
|
||||||
|
@ -379,49 +379,88 @@ pub fn available_parallelism() -> io::Result<NonZeroUsize> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns cgroup CPU quota in core-equivalents, rounded down, or usize::MAX if the quota cannot
|
|
||||||
/// be determined or is not set.
|
|
||||||
#[cfg(any(target_os = "android", target_os = "linux"))]
|
#[cfg(any(target_os = "android", target_os = "linux"))]
|
||||||
fn cgroup2_quota() -> usize {
|
mod cgroups {
|
||||||
|
//! Currently not covered
|
||||||
|
//! * cgroup v2 in non-standard mountpoints
|
||||||
|
//! * paths containing control characters or spaces, since those would be escaped in procfs
|
||||||
|
//! output and we don't unescape
|
||||||
|
use crate::borrow::Cow;
|
||||||
use crate::ffi::OsString;
|
use crate::ffi::OsString;
|
||||||
use crate::fs::{try_exists, File};
|
use crate::fs::{try_exists, File};
|
||||||
use crate::io::Read;
|
use crate::io::Read;
|
||||||
|
use crate::io::{BufRead, BufReader};
|
||||||
use crate::os::unix::ffi::OsStringExt;
|
use crate::os::unix::ffi::OsStringExt;
|
||||||
|
use crate::path::Path;
|
||||||
use crate::path::PathBuf;
|
use crate::path::PathBuf;
|
||||||
|
use crate::str::from_utf8;
|
||||||
|
|
||||||
let mut quota = usize::MAX;
|
#[derive(PartialEq)]
|
||||||
if cfg!(miri) {
|
enum Cgroup {
|
||||||
// Attempting to open a file fails under default flags due to isolation.
|
V1,
|
||||||
// And Miri does not have parallelism anyway.
|
V2,
|
||||||
return quota;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
let _: Option<()> = try {
|
/// Returns cgroup CPU quota in core-equivalents, rounded down or usize::MAX if the quota cannot
|
||||||
let mut buf = Vec::with_capacity(128);
|
/// be determined or is not set.
|
||||||
// find our place in the cgroup hierarchy
|
pub(super) fn quota() -> usize {
|
||||||
File::open("/proc/self/cgroup").ok()?.read_to_end(&mut buf).ok()?;
|
let mut quota = usize::MAX;
|
||||||
let cgroup_path = buf
|
if cfg!(miri) {
|
||||||
.split(|&c| c == b'\n')
|
// Attempting to open a file fails under default flags due to isolation.
|
||||||
.filter_map(|line| {
|
// And Miri does not have parallelism anyway.
|
||||||
let mut fields = line.splitn(3, |&c| c == b':');
|
return quota;
|
||||||
// expect cgroupv2 which has an empty 2nd field
|
}
|
||||||
if fields.nth(1) != Some(b"") {
|
|
||||||
return None;
|
let _: Option<()> = try {
|
||||||
}
|
let mut buf = Vec::with_capacity(128);
|
||||||
let path = fields.last()?;
|
// find our place in the cgroup hierarchy
|
||||||
// skip leading slash
|
File::open("/proc/self/cgroup").ok()?.read_to_end(&mut buf).ok()?;
|
||||||
Some(path[1..].to_owned())
|
let (cgroup_path, version) =
|
||||||
})
|
buf.split(|&c| c == b'\n').fold(None, |previous, line| {
|
||||||
.next()?;
|
let mut fields = line.splitn(3, |&c| c == b':');
|
||||||
let cgroup_path = PathBuf::from(OsString::from_vec(cgroup_path));
|
// 2nd field is a list of controllers for v1 or empty for v2
|
||||||
|
let version = match fields.nth(1) {
|
||||||
|
Some(b"") => Cgroup::V2,
|
||||||
|
Some(controllers)
|
||||||
|
if from_utf8(controllers)
|
||||||
|
.is_ok_and(|c| c.split(",").any(|c| c == "cpu")) =>
|
||||||
|
{
|
||||||
|
Cgroup::V1
|
||||||
|
}
|
||||||
|
_ => return previous,
|
||||||
|
};
|
||||||
|
|
||||||
|
// already-found v1 trumps v2 since it explicitly specifies its controllers
|
||||||
|
if previous.is_some() && version == Cgroup::V2 {
|
||||||
|
return previous;
|
||||||
|
}
|
||||||
|
|
||||||
|
let path = fields.last()?;
|
||||||
|
// skip leading slash
|
||||||
|
Some((path[1..].to_owned(), version))
|
||||||
|
})?;
|
||||||
|
let cgroup_path = PathBuf::from(OsString::from_vec(cgroup_path));
|
||||||
|
|
||||||
|
quota = match version {
|
||||||
|
Cgroup::V1 => quota_v1(cgroup_path),
|
||||||
|
Cgroup::V2 => quota_v2(cgroup_path),
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
quota
|
||||||
|
}
|
||||||
|
|
||||||
|
fn quota_v2(group_path: PathBuf) -> usize {
|
||||||
|
let mut quota = usize::MAX;
|
||||||
|
|
||||||
let mut path = PathBuf::with_capacity(128);
|
let mut path = PathBuf::with_capacity(128);
|
||||||
let mut read_buf = String::with_capacity(20);
|
let mut read_buf = String::with_capacity(20);
|
||||||
|
|
||||||
|
// standard mount location defined in file-hierarchy(7) manpage
|
||||||
let cgroup_mount = "/sys/fs/cgroup";
|
let cgroup_mount = "/sys/fs/cgroup";
|
||||||
|
|
||||||
path.push(cgroup_mount);
|
path.push(cgroup_mount);
|
||||||
path.push(&cgroup_path);
|
path.push(&group_path);
|
||||||
|
|
||||||
path.push("cgroup.controllers");
|
path.push("cgroup.controllers");
|
||||||
|
|
||||||
|
@ -432,30 +471,134 @@ fn cgroup2_quota() -> usize {
|
||||||
|
|
||||||
path.pop();
|
path.pop();
|
||||||
|
|
||||||
while path.starts_with(cgroup_mount) {
|
let _: Option<()> = try {
|
||||||
path.push("cpu.max");
|
while path.starts_with(cgroup_mount) {
|
||||||
|
path.push("cpu.max");
|
||||||
|
|
||||||
read_buf.clear();
|
read_buf.clear();
|
||||||
|
|
||||||
if File::open(&path).and_then(|mut f| f.read_to_string(&mut read_buf)).is_ok() {
|
if File::open(&path).and_then(|mut f| f.read_to_string(&mut read_buf)).is_ok() {
|
||||||
let raw_quota = read_buf.lines().next()?;
|
let raw_quota = read_buf.lines().next()?;
|
||||||
let mut raw_quota = raw_quota.split(' ');
|
let mut raw_quota = raw_quota.split(' ');
|
||||||
let limit = raw_quota.next()?;
|
let limit = raw_quota.next()?;
|
||||||
let period = raw_quota.next()?;
|
let period = raw_quota.next()?;
|
||||||
match (limit.parse::<usize>(), period.parse::<usize>()) {
|
match (limit.parse::<usize>(), period.parse::<usize>()) {
|
||||||
(Ok(limit), Ok(period)) => {
|
(Ok(limit), Ok(period)) => {
|
||||||
quota = quota.min(limit / period);
|
quota = quota.min(limit / period);
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
}
|
}
|
||||||
_ => {}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
path.pop(); // pop filename
|
||||||
|
path.pop(); // pop dir
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
quota
|
||||||
|
}
|
||||||
|
|
||||||
|
fn quota_v1(group_path: PathBuf) -> usize {
|
||||||
|
let mut quota = usize::MAX;
|
||||||
|
let mut path = PathBuf::with_capacity(128);
|
||||||
|
let mut read_buf = String::with_capacity(20);
|
||||||
|
|
||||||
|
// Hardcode commonly used locations mentioned in the cgroups(7) manpage
|
||||||
|
// if that doesn't work scan mountinfo and adjust `group_path` for bind-mounts
|
||||||
|
let mounts: &[fn(&Path) -> Option<(_, &Path)>] = &[
|
||||||
|
|p| Some((Cow::Borrowed("/sys/fs/cgroup/cpu"), p)),
|
||||||
|
|p| Some((Cow::Borrowed("/sys/fs/cgroup/cpu,cpuacct"), p)),
|
||||||
|
// this can be expensive on systems with tons of mountpoints
|
||||||
|
// but we only get to this point when /proc/self/cgroups explicitly indicated
|
||||||
|
// this process belongs to a cpu-controller cgroup v1 and the defaults didn't work
|
||||||
|
find_mountpoint,
|
||||||
|
];
|
||||||
|
|
||||||
|
for mount in mounts {
|
||||||
|
let Some((mount, group_path)) = mount(&group_path) else { continue };
|
||||||
|
|
||||||
|
path.clear();
|
||||||
|
path.push(mount.as_ref());
|
||||||
|
path.push(&group_path);
|
||||||
|
|
||||||
|
// skip if we guessed the mount incorrectly
|
||||||
|
if matches!(try_exists(&path), Err(_) | Ok(false)) {
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
path.pop(); // pop filename
|
while path.starts_with(mount.as_ref()) {
|
||||||
path.pop(); // pop dir
|
let mut parse_file = |name| {
|
||||||
}
|
path.push(name);
|
||||||
};
|
read_buf.clear();
|
||||||
|
|
||||||
quota
|
let f = File::open(&path);
|
||||||
|
path.pop(); // restore buffer before any early returns
|
||||||
|
f.ok()?.read_to_string(&mut read_buf).ok()?;
|
||||||
|
let parsed = read_buf.trim().parse::<usize>().ok()?;
|
||||||
|
|
||||||
|
Some(parsed)
|
||||||
|
};
|
||||||
|
|
||||||
|
let limit = parse_file("cpu.cfs_quota_us");
|
||||||
|
let period = parse_file("cpu.cfs_period_us");
|
||||||
|
|
||||||
|
match (limit, period) {
|
||||||
|
(Some(limit), Some(period)) => quota = quota.min(limit / period),
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
|
||||||
|
path.pop();
|
||||||
|
}
|
||||||
|
|
||||||
|
// we passed the try_exists above so we should have traversed the correct hierarchy
|
||||||
|
// when reaching this line
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
quota
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Scan mountinfo for cgroup v1 mountpoint with a cpu controller
|
||||||
|
///
|
||||||
|
/// If the cgroupfs is a bind mount then `group_path` is adjusted to skip
|
||||||
|
/// over the already-included prefix
|
||||||
|
fn find_mountpoint(group_path: &Path) -> Option<(Cow<'static, str>, &Path)> {
|
||||||
|
let mut reader = BufReader::new(File::open("/proc/self/mountinfo").ok()?);
|
||||||
|
let mut line = String::with_capacity(256);
|
||||||
|
loop {
|
||||||
|
line.clear();
|
||||||
|
if reader.read_line(&mut line).ok()? == 0 {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
let line = line.trim();
|
||||||
|
let mut items = line.split(' ');
|
||||||
|
|
||||||
|
let sub_path = items.nth(3)?;
|
||||||
|
let mount_point = items.next()?;
|
||||||
|
let mount_opts = items.next_back()?;
|
||||||
|
let filesystem_type = items.nth_back(1)?;
|
||||||
|
|
||||||
|
if filesystem_type != "cgroup" || !mount_opts.split(',').any(|opt| opt == "cpu") {
|
||||||
|
// not a cgroup / not a cpu-controller
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
let sub_path = Path::new(sub_path).strip_prefix("/").ok()?;
|
||||||
|
|
||||||
|
if !group_path.starts_with(sub_path) {
|
||||||
|
// this is a bind-mount and the bound subdirectory
|
||||||
|
// does not contain the cgroup this process belongs to
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
let trimmed_group_path = group_path.strip_prefix(sub_path).ok()?;
|
||||||
|
|
||||||
|
return Some((Cow::Owned(mount_point.to_owned()), trimmed_group_path));
|
||||||
|
}
|
||||||
|
|
||||||
|
None
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(all(
|
#[cfg(all(
|
||||||
|
|
|
@ -1577,10 +1577,15 @@ fn _assert_sync_and_send() {
|
||||||
///
|
///
|
||||||
/// On Linux:
|
/// On Linux:
|
||||||
/// - It may overcount the amount of parallelism available when limited by a
|
/// - It may overcount the amount of parallelism available when limited by a
|
||||||
/// process-wide affinity mask or cgroup quotas and cgroup2 fs or `sched_getaffinity()` can't be
|
/// process-wide affinity mask or cgroup quotas and `sched_getaffinity()` or cgroup fs can't be
|
||||||
/// queried, e.g. due to sandboxing.
|
/// queried, e.g. due to sandboxing.
|
||||||
/// - It may undercount the amount of parallelism if the current thread's affinity mask
|
/// - It may undercount the amount of parallelism if the current thread's affinity mask
|
||||||
/// does not reflect the process' cpuset, e.g. due to pinned threads.
|
/// does not reflect the process' cpuset, e.g. due to pinned threads.
|
||||||
|
/// - If the process is in a cgroup v1 cpu controller, this may need to
|
||||||
|
/// scan mountpoints to find the corresponding cgroup v1 controller,
|
||||||
|
/// which may take time on systems with large numbers of mountpoints.
|
||||||
|
/// (This does not apply to cgroup v2, or to processes not in a
|
||||||
|
/// cgroup.)
|
||||||
///
|
///
|
||||||
/// On all targets:
|
/// On all targets:
|
||||||
/// - It may overcount the amount of parallelism available when running in a VM
|
/// - It may overcount the amount of parallelism available when running in a VM
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue