1
Fork 0

Auto merge of #136593 - lukas-code:ty-value-perf, r=oli-obk

valtree performance tuning

Summary: This PR makes type checking of code with many type-level constants faster.

After https://github.com/rust-lang/rust/pull/136180 was merged, we observed a small perf regression (https://github.com/rust-lang/rust/pull/136318#issuecomment-2635562821). This happened because that PR introduced additional copies in the fast reject code path for consts, which is very hot for certain crates: 6c1d960d88/compiler/rustc_type_ir/src/fast_reject.rs (L486-L487)

This PR improves the performance again by properly interning the valtrees so that copying and comparing them becomes faster. This will become especially useful with `feature(adt_const_params)`, so the fast reject code doesn't have to do a deep compare of the valtrees.

Note that we can't just compare the interned consts themselves in the fast reject, because sometimes `'static` lifetimes in the type are be replaced with inference variables (due to canonicalization) on one side but not the other.

A less invasive alternative that I considered is simply avoiding copies introduced by https://github.com/rust-lang/rust/pull/136180 and comparing the valtrees it in-place (see commit: 9e91e50ac5 / perf results: https://github.com/rust-lang/rust/pull/136593#issuecomment-2642303245), however that was still measurably slower than interning.

There are some minor regressions in secondary benchmarks: These happen due to changes in memory allocations and seem acceptable to me. The crates that make heavy use of valtrees show no significant changes in memory usage.
This commit is contained in:
bors 2025-02-13 15:27:30 +00:00
commit c241e14650
15 changed files with 159 additions and 130 deletions

View file

@ -2,7 +2,7 @@ use rustc_abi::{BackendRepr, VariantIdx};
use rustc_data_structures::stack::ensure_sufficient_stack;
use rustc_middle::mir::interpret::{EvalToValTreeResult, GlobalId, ReportedErrorInfo};
use rustc_middle::ty::layout::{LayoutCx, LayoutOf, TyAndLayout};
use rustc_middle::ty::{self, ScalarInt, Ty, TyCtxt};
use rustc_middle::ty::{self, Ty, TyCtxt};
use rustc_middle::{bug, mir};
use rustc_span::DUMMY_SP;
use tracing::{debug, instrument, trace};
@ -21,7 +21,7 @@ use crate::interpret::{
fn branches<'tcx>(
ecx: &CompileTimeInterpCx<'tcx>,
place: &MPlaceTy<'tcx>,
n: usize,
field_count: usize,
variant: Option<VariantIdx>,
num_nodes: &mut usize,
) -> ValTreeCreationResult<'tcx> {
@ -29,30 +29,28 @@ fn branches<'tcx>(
Some(variant) => ecx.project_downcast(place, variant).unwrap(),
None => place.clone(),
};
let variant = variant.map(|variant| Some(ty::ValTree::Leaf(ScalarInt::from(variant.as_u32()))));
debug!(?place, ?variant);
debug!(?place);
let mut fields = Vec::with_capacity(n);
for i in 0..n {
let field = ecx.project_field(&place, i).unwrap();
let valtree = const_to_valtree_inner(ecx, &field, num_nodes)?;
fields.push(Some(valtree));
}
let mut branches = Vec::with_capacity(field_count + variant.is_some() as usize);
// For enums, we prepend their variant index before the variant's fields so we can figure out
// the variant again when just seeing a valtree.
let branches = variant
.into_iter()
.chain(fields.into_iter())
.collect::<Option<Vec<_>>>()
.expect("should have already checked for errors in ValTree creation");
if let Some(variant) = variant {
branches.push(ty::ValTree::from_scalar_int(*ecx.tcx, variant.as_u32().into()));
}
for i in 0..field_count {
let field = ecx.project_field(&place, i).unwrap();
let valtree = const_to_valtree_inner(ecx, &field, num_nodes)?;
branches.push(valtree);
}
// Have to account for ZSTs here
if branches.len() == 0 {
*num_nodes += 1;
}
Ok(ty::ValTree::Branch(ecx.tcx.arena.alloc_from_iter(branches)))
Ok(ty::ValTree::from_branches(*ecx.tcx, branches))
}
#[instrument(skip(ecx), level = "debug")]
@ -70,7 +68,7 @@ fn slice_branches<'tcx>(
elems.push(valtree);
}
Ok(ty::ValTree::Branch(ecx.tcx.arena.alloc_from_iter(elems)))
Ok(ty::ValTree::from_branches(*ecx.tcx, elems))
}
#[instrument(skip(ecx), level = "debug")]
@ -79,6 +77,7 @@ fn const_to_valtree_inner<'tcx>(
place: &MPlaceTy<'tcx>,
num_nodes: &mut usize,
) -> ValTreeCreationResult<'tcx> {
let tcx = *ecx.tcx;
let ty = place.layout.ty;
debug!("ty kind: {:?}", ty.kind());
@ -89,14 +88,14 @@ fn const_to_valtree_inner<'tcx>(
match ty.kind() {
ty::FnDef(..) => {
*num_nodes += 1;
Ok(ty::ValTree::zst())
Ok(ty::ValTree::zst(tcx))
}
ty::Bool | ty::Int(_) | ty::Uint(_) | ty::Float(_) | ty::Char => {
let val = ecx.read_immediate(place).unwrap();
let val = val.to_scalar_int().unwrap();
*num_nodes += 1;
Ok(ty::ValTree::Leaf(val))
Ok(ty::ValTree::from_scalar_int(tcx, val))
}
ty::Pat(base, ..) => {
@ -127,7 +126,7 @@ fn const_to_valtree_inner<'tcx>(
return Err(ValTreeCreationError::NonSupportedType(ty));
};
// It's just a ScalarInt!
Ok(ty::ValTree::Leaf(val))
Ok(ty::ValTree::from_scalar_int(tcx, val))
}
// Technically we could allow function pointers (represented as `ty::Instance`), but this is not guaranteed to
@ -287,16 +286,11 @@ pub fn valtree_to_const_value<'tcx>(
// FIXME: Does this need an example?
match *cv.ty.kind() {
ty::FnDef(..) => {
assert!(cv.valtree.unwrap_branch().is_empty());
assert!(cv.valtree.is_zst());
mir::ConstValue::ZeroSized
}
ty::Bool | ty::Int(_) | ty::Uint(_) | ty::Float(_) | ty::Char | ty::RawPtr(_, _) => {
match cv.valtree {
ty::ValTree::Leaf(scalar_int) => mir::ConstValue::Scalar(Scalar::Int(scalar_int)),
ty::ValTree::Branch(_) => bug!(
"ValTrees for Bool, Int, Uint, Float, Char or RawPtr should have the form ValTree::Leaf"
),
}
mir::ConstValue::Scalar(Scalar::Int(cv.valtree.unwrap_leaf()))
}
ty::Pat(ty, _) => {
let cv = ty::Value { valtree: cv.valtree, ty };