Auto merge of #113777 - nnethercote:overlap-based-cgu-merging, r=pnkfelix
Inline overlap based CGU merging Introduce a new CGU merging algorithm that aims to minimize the number of duplicated inlined items. r? `@wesleywiser`
This commit is contained in:
commit
0d6a9b2bf7
2 changed files with 103 additions and 37 deletions
|
@ -56,6 +56,8 @@ impl<'tcx> MonoItem<'tcx> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Note: if you change how item size estimates work, you might need to
|
||||||
|
// change NON_INCR_MIN_CGU_SIZE as well.
|
||||||
pub fn size_estimate(&self, tcx: TyCtxt<'tcx>) -> usize {
|
pub fn size_estimate(&self, tcx: TyCtxt<'tcx>) -> usize {
|
||||||
match *self {
|
match *self {
|
||||||
MonoItem::Fn(instance) => {
|
MonoItem::Fn(instance) => {
|
||||||
|
@ -248,8 +250,14 @@ pub struct CodegenUnit<'tcx> {
|
||||||
/// Auxiliary info about a `MonoItem`.
|
/// Auxiliary info about a `MonoItem`.
|
||||||
#[derive(Copy, Clone, PartialEq, Debug, HashStable)]
|
#[derive(Copy, Clone, PartialEq, Debug, HashStable)]
|
||||||
pub struct MonoItemData {
|
pub struct MonoItemData {
|
||||||
|
/// A cached copy of the result of `MonoItem::instantiation_mode`, where
|
||||||
|
/// `GloballyShared` maps to `false` and `LocalCopy` maps to `true`.
|
||||||
|
pub inlined: bool,
|
||||||
|
|
||||||
pub linkage: Linkage,
|
pub linkage: Linkage,
|
||||||
pub visibility: Visibility,
|
pub visibility: Visibility,
|
||||||
|
|
||||||
|
/// A cached copy of the result of `MonoItem::size_estimate`.
|
||||||
pub size_estimate: usize,
|
pub size_estimate: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -248,7 +248,8 @@ where
|
||||||
}
|
}
|
||||||
let size_estimate = mono_item.size_estimate(cx.tcx);
|
let size_estimate = mono_item.size_estimate(cx.tcx);
|
||||||
|
|
||||||
cgu.items_mut().insert(mono_item, MonoItemData { linkage, visibility, size_estimate });
|
cgu.items_mut()
|
||||||
|
.insert(mono_item, MonoItemData { inlined: false, linkage, visibility, size_estimate });
|
||||||
|
|
||||||
// Get all inlined items that are reachable from `mono_item` without
|
// Get all inlined items that are reachable from `mono_item` without
|
||||||
// going via another root item. This includes drop-glue, functions from
|
// going via another root item. This includes drop-glue, functions from
|
||||||
|
@ -263,6 +264,7 @@ where
|
||||||
for inlined_item in reachable_inlined_items {
|
for inlined_item in reachable_inlined_items {
|
||||||
// This is a CGU-private copy.
|
// This is a CGU-private copy.
|
||||||
cgu.items_mut().entry(inlined_item).or_insert_with(|| MonoItemData {
|
cgu.items_mut().entry(inlined_item).or_insert_with(|| MonoItemData {
|
||||||
|
inlined: true,
|
||||||
linkage: Linkage::Internal,
|
linkage: Linkage::Internal,
|
||||||
visibility: Visibility::Default,
|
visibility: Visibility::Default,
|
||||||
size_estimate: inlined_item.size_estimate(cx.tcx),
|
size_estimate: inlined_item.size_estimate(cx.tcx),
|
||||||
|
@ -316,6 +318,60 @@ fn merge_codegen_units<'tcx>(
|
||||||
let mut cgu_contents: FxHashMap<Symbol, Vec<Symbol>> =
|
let mut cgu_contents: FxHashMap<Symbol, Vec<Symbol>> =
|
||||||
codegen_units.iter().map(|cgu| (cgu.name(), vec![cgu.name()])).collect();
|
codegen_units.iter().map(|cgu| (cgu.name(), vec![cgu.name()])).collect();
|
||||||
|
|
||||||
|
// If N is the maximum number of CGUs, and the CGUs are sorted from largest
|
||||||
|
// to smallest, we repeatedly find which CGU in codegen_units[N..] has the
|
||||||
|
// greatest overlap of inlined items with codegen_units[N-1], merge that
|
||||||
|
// CGU into codegen_units[N-1], then re-sort by size and repeat.
|
||||||
|
//
|
||||||
|
// We use inlined item overlap to guide this merging because it minimizes
|
||||||
|
// duplication of inlined items, which makes LLVM be faster and generate
|
||||||
|
// better and smaller machine code.
|
||||||
|
//
|
||||||
|
// Why merge into codegen_units[N-1]? We want CGUs to have similar sizes,
|
||||||
|
// which means we don't want codegen_units[0..N] (the already big ones)
|
||||||
|
// getting any bigger, if we can avoid it. When we have more than N CGUs
|
||||||
|
// then at least one of the biggest N will have to grow. codegen_units[N-1]
|
||||||
|
// is the smallest of those, and so has the most room to grow.
|
||||||
|
let max_codegen_units = cx.tcx.sess.codegen_units().as_usize();
|
||||||
|
while codegen_units.len() > max_codegen_units {
|
||||||
|
// Sort small CGUs to the back.
|
||||||
|
codegen_units.sort_by_key(|cgu| cmp::Reverse(cgu.size_estimate()));
|
||||||
|
|
||||||
|
let cgu_dst = &codegen_units[max_codegen_units - 1];
|
||||||
|
|
||||||
|
// Find the CGU that overlaps the most with `cgu_dst`. In the case of a
|
||||||
|
// tie, favour the earlier (bigger) CGU.
|
||||||
|
let mut max_overlap = 0;
|
||||||
|
let mut max_overlap_i = max_codegen_units;
|
||||||
|
for (i, cgu_src) in codegen_units.iter().enumerate().skip(max_codegen_units) {
|
||||||
|
if cgu_src.size_estimate() <= max_overlap {
|
||||||
|
// None of the remaining overlaps can exceed `max_overlap`, so
|
||||||
|
// stop looking.
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
let overlap = compute_inlined_overlap(cgu_dst, cgu_src);
|
||||||
|
if overlap > max_overlap {
|
||||||
|
max_overlap = overlap;
|
||||||
|
max_overlap_i = i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut cgu_src = codegen_units.swap_remove(max_overlap_i);
|
||||||
|
let cgu_dst = &mut codegen_units[max_codegen_units - 1];
|
||||||
|
|
||||||
|
// Move the items from `cgu_src` to `cgu_dst`. Some of them may be
|
||||||
|
// duplicate inlined items, in which case the destination CGU is
|
||||||
|
// unaffected. Recalculate size estimates afterwards.
|
||||||
|
cgu_dst.items_mut().extend(cgu_src.items_mut().drain());
|
||||||
|
cgu_dst.compute_size_estimate();
|
||||||
|
|
||||||
|
// Record that `cgu_dst` now contains all the stuff that was in
|
||||||
|
// `cgu_src` before.
|
||||||
|
let mut consumed_cgu_names = cgu_contents.remove(&cgu_src.name()).unwrap();
|
||||||
|
cgu_contents.get_mut(&cgu_dst.name()).unwrap().append(&mut consumed_cgu_names);
|
||||||
|
}
|
||||||
|
|
||||||
// Having multiple CGUs can drastically speed up compilation. But for
|
// Having multiple CGUs can drastically speed up compilation. But for
|
||||||
// non-incremental builds, tiny CGUs slow down compilation *and* result in
|
// non-incremental builds, tiny CGUs slow down compilation *and* result in
|
||||||
// worse generated code. So we don't allow CGUs smaller than this (unless
|
// worse generated code. So we don't allow CGUs smaller than this (unless
|
||||||
|
@ -323,24 +379,22 @@ fn merge_codegen_units<'tcx>(
|
||||||
// common in larger programs, so this isn't all that large.
|
// common in larger programs, so this isn't all that large.
|
||||||
const NON_INCR_MIN_CGU_SIZE: usize = 1800;
|
const NON_INCR_MIN_CGU_SIZE: usize = 1800;
|
||||||
|
|
||||||
// Repeatedly merge the two smallest codegen units as long as:
|
// Repeatedly merge the two smallest codegen units as long as: it's a
|
||||||
// - we have more CGUs than the upper limit, or
|
// non-incremental build, and the user didn't specify a CGU count, and
|
||||||
// - (Non-incremental builds only) the user didn't specify a CGU count, and
|
// there are multiple CGUs, and some are below the minimum size.
|
||||||
// there are multiple CGUs, and some are below the minimum size.
|
|
||||||
//
|
//
|
||||||
// The "didn't specify a CGU count" condition is because when an explicit
|
// The "didn't specify a CGU count" condition is because when an explicit
|
||||||
// count is requested we observe it as closely as possible. For example,
|
// count is requested we observe it as closely as possible. For example,
|
||||||
// the `compiler_builtins` crate sets `codegen-units = 10000` and it's
|
// the `compiler_builtins` crate sets `codegen-units = 10000` and it's
|
||||||
// critical they aren't merged. Also, some tests use explicit small values
|
// critical they aren't merged. Also, some tests use explicit small values
|
||||||
// and likewise won't work if small CGUs are merged.
|
// and likewise won't work if small CGUs are merged.
|
||||||
while codegen_units.len() > cx.tcx.sess.codegen_units().as_usize()
|
while cx.tcx.sess.opts.incremental.is_none()
|
||||||
|| (cx.tcx.sess.opts.incremental.is_none()
|
&& matches!(cx.tcx.sess.codegen_units(), CodegenUnits::Default(_))
|
||||||
&& matches!(cx.tcx.sess.codegen_units(), CodegenUnits::Default(_))
|
&& codegen_units.len() > 1
|
||||||
&& codegen_units.len() > 1
|
&& codegen_units.iter().any(|cgu| cgu.size_estimate() < NON_INCR_MIN_CGU_SIZE)
|
||||||
&& codegen_units.iter().any(|cgu| cgu.size_estimate() < NON_INCR_MIN_CGU_SIZE))
|
|
||||||
{
|
{
|
||||||
// Sort small cgus to the back.
|
// Sort small cgus to the back.
|
||||||
codegen_units.sort_by_key(|cgu| cmp::Reverse(cgu.size_estimate()));
|
codegen_units.sort_by_cached_key(|cgu| cmp::Reverse(cgu.size_estimate()));
|
||||||
|
|
||||||
let mut smallest = codegen_units.pop().unwrap();
|
let mut smallest = codegen_units.pop().unwrap();
|
||||||
let second_smallest = codegen_units.last_mut().unwrap();
|
let second_smallest = codegen_units.last_mut().unwrap();
|
||||||
|
@ -351,16 +405,7 @@ fn merge_codegen_units<'tcx>(
|
||||||
second_smallest.items_mut().extend(smallest.items_mut().drain());
|
second_smallest.items_mut().extend(smallest.items_mut().drain());
|
||||||
second_smallest.compute_size_estimate();
|
second_smallest.compute_size_estimate();
|
||||||
|
|
||||||
// Record that `second_smallest` now contains all the stuff that was
|
// Don't update `cgu_contents`, that's only for incremental builds.
|
||||||
// in `smallest` before.
|
|
||||||
let mut consumed_cgu_names = cgu_contents.remove(&smallest.name()).unwrap();
|
|
||||||
cgu_contents.get_mut(&second_smallest.name()).unwrap().append(&mut consumed_cgu_names);
|
|
||||||
|
|
||||||
debug!(
|
|
||||||
"CodegenUnit {} merged into CodegenUnit {}",
|
|
||||||
smallest.name(),
|
|
||||||
second_smallest.name()
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
let cgu_name_builder = &mut CodegenUnitNameBuilder::new(cx.tcx);
|
let cgu_name_builder = &mut CodegenUnitNameBuilder::new(cx.tcx);
|
||||||
|
@ -439,6 +484,25 @@ fn merge_codegen_units<'tcx>(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Compute the combined size of all inlined items that appear in both `cgu1`
|
||||||
|
/// and `cgu2`.
|
||||||
|
fn compute_inlined_overlap<'tcx>(cgu1: &CodegenUnit<'tcx>, cgu2: &CodegenUnit<'tcx>) -> usize {
|
||||||
|
// Either order works. We pick the one that involves iterating over fewer
|
||||||
|
// items.
|
||||||
|
let (src_cgu, dst_cgu) =
|
||||||
|
if cgu1.items().len() <= cgu2.items().len() { (cgu1, cgu2) } else { (cgu2, cgu1) };
|
||||||
|
|
||||||
|
let mut overlap = 0;
|
||||||
|
for (item, data) in src_cgu.items().iter() {
|
||||||
|
if data.inlined {
|
||||||
|
if dst_cgu.items().contains_key(item) {
|
||||||
|
overlap += data.size_estimate;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
overlap
|
||||||
|
}
|
||||||
|
|
||||||
fn internalize_symbols<'tcx>(
|
fn internalize_symbols<'tcx>(
|
||||||
cx: &PartitioningCx<'_, 'tcx>,
|
cx: &PartitioningCx<'_, 'tcx>,
|
||||||
codegen_units: &mut [CodegenUnit<'tcx>],
|
codegen_units: &mut [CodegenUnit<'tcx>],
|
||||||
|
@ -870,19 +934,16 @@ fn debug_dump<'a, 'tcx: 'a>(tcx: TyCtxt<'tcx>, label: &str, cgus: &[CodegenUnit<
|
||||||
all_cgu_sizes.push(cgu.size_estimate());
|
all_cgu_sizes.push(cgu.size_estimate());
|
||||||
|
|
||||||
for (item, data) in cgu.items() {
|
for (item, data) in cgu.items() {
|
||||||
match item.instantiation_mode(tcx) {
|
if !data.inlined {
|
||||||
InstantiationMode::GloballyShared { .. } => {
|
root_items += 1;
|
||||||
root_items += 1;
|
root_size += data.size_estimate;
|
||||||
root_size += data.size_estimate;
|
} else {
|
||||||
}
|
if inlined_items.insert(item) {
|
||||||
InstantiationMode::LocalCopy => {
|
unique_inlined_items += 1;
|
||||||
if inlined_items.insert(item) {
|
unique_inlined_size += data.size_estimate;
|
||||||
unique_inlined_items += 1;
|
|
||||||
unique_inlined_size += data.size_estimate;
|
|
||||||
}
|
|
||||||
placed_inlined_items += 1;
|
|
||||||
placed_inlined_size += data.size_estimate;
|
|
||||||
}
|
}
|
||||||
|
placed_inlined_items += 1;
|
||||||
|
placed_inlined_size += data.size_estimate;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -937,10 +998,7 @@ fn debug_dump<'a, 'tcx: 'a>(tcx: TyCtxt<'tcx>, label: &str, cgus: &[CodegenUnit<
|
||||||
let symbol_name = item.symbol_name(tcx).name;
|
let symbol_name = item.symbol_name(tcx).name;
|
||||||
let symbol_hash_start = symbol_name.rfind('h');
|
let symbol_hash_start = symbol_name.rfind('h');
|
||||||
let symbol_hash = symbol_hash_start.map_or("<no hash>", |i| &symbol_name[i..]);
|
let symbol_hash = symbol_hash_start.map_or("<no hash>", |i| &symbol_name[i..]);
|
||||||
let kind = match item.instantiation_mode(tcx) {
|
let kind = if !data.inlined { "root" } else { "inlined" };
|
||||||
InstantiationMode::GloballyShared { .. } => "root",
|
|
||||||
InstantiationMode::LocalCopy => "inlined",
|
|
||||||
};
|
|
||||||
let size = data.size_estimate;
|
let size = data.size_estimate;
|
||||||
let _ = with_no_trimmed_paths!(writeln!(
|
let _ = with_no_trimmed_paths!(writeln!(
|
||||||
s,
|
s,
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue