Move all cold code to the end of the function
Fixes #836 Benchmark #1: simple-raytracer/raytracer_cg_clif Time (mean ± σ): 9.250 s ± 0.056 s [User: 9.213 s, System: 0.015 s] Range (min … max): 9.151 s … 9.348 s 20 runs Benchmark #2: simple-raytracer/raytracer_cg_clif_cold_separated Time (mean ± σ): 9.179 s ± 0.101 s [User: 9.141 s, System: 0.016 s] Range (min … max): 9.070 s … 9.473 s 20 runs Summary 'simple-raytracer/raytracer_cg_clif_cold_separated' ran 1.01 ± 0.01 times faster than 'simple-raytracer/raytracer_cg_clif'
This commit is contained in:
parent
38797f8bad
commit
c74b306037
7 changed files with 56 additions and 4 deletions
|
@ -506,6 +506,7 @@ fn codegen_call_inner<'tcx>(
|
|||
args: Vec<CValue<'tcx>>,
|
||||
ret_place: Option<CPlace<'tcx>>,
|
||||
) {
|
||||
// FIXME mark the current ebb as cold when calling a `#[cold]` function.
|
||||
let fn_sig = fx
|
||||
.tcx
|
||||
.normalize_erasing_late_bound_regions(ParamEnv::reveal_all(), &fn_ty.fn_sig(fx.tcx));
|
||||
|
|
15
src/base.rs
15
src/base.rs
|
@ -32,6 +32,12 @@ pub fn trans_fn<'clif, 'tcx, B: Backend + 'static>(
|
|||
// Predefine ebb's
|
||||
let start_ebb = bcx.create_ebb();
|
||||
let ebb_map: IndexVec<BasicBlock, Ebb> = (0..mir.basic_blocks().len()).map(|_| bcx.create_ebb()).collect();
|
||||
let mut cold_ebbs = EntitySet::new();
|
||||
for (bb, &ebb) in ebb_map.iter_enumerated() {
|
||||
if mir.basic_blocks()[bb].is_cleanup {
|
||||
cold_ebbs.insert(ebb);
|
||||
}
|
||||
}
|
||||
|
||||
// Make FunctionCx
|
||||
let pointer_type = cx.module.target_config().pointer_type();
|
||||
|
@ -49,6 +55,7 @@ pub fn trans_fn<'clif, 'tcx, B: Backend + 'static>(
|
|||
ebb_map,
|
||||
local_map: HashMap::new(),
|
||||
caller_location: None, // set by `codegen_fn_prelude`
|
||||
cold_ebbs,
|
||||
|
||||
clif_comments,
|
||||
constants_cx: &mut cx.constants_cx,
|
||||
|
@ -73,6 +80,7 @@ pub fn trans_fn<'clif, 'tcx, B: Backend + 'static>(
|
|||
let mut clif_comments = fx.clif_comments;
|
||||
let source_info_set = fx.source_info_set;
|
||||
let local_map = fx.local_map;
|
||||
let cold_ebbs = fx.cold_ebbs;
|
||||
|
||||
#[cfg(debug_assertions)]
|
||||
crate::pretty_clif::write_clif_file(cx.tcx, "unopt", instance, &context.func, &clif_comments, None);
|
||||
|
@ -82,7 +90,7 @@ pub fn trans_fn<'clif, 'tcx, B: Backend + 'static>(
|
|||
|
||||
// Perform rust specific optimizations
|
||||
tcx.sess.time("optimize clif ir", || {
|
||||
crate::optimize::optimize_function(tcx, instance, context, &mut clif_comments);
|
||||
crate::optimize::optimize_function(tcx, instance, context, &cold_ebbs, &mut clif_comments);
|
||||
});
|
||||
|
||||
// Define function
|
||||
|
@ -191,8 +199,11 @@ fn codegen_fn_content(fx: &mut FunctionCx<'_, '_, impl Backend>) {
|
|||
}
|
||||
}
|
||||
let cond = trans_operand(fx, cond).load_scalar(fx);
|
||||
|
||||
let target = fx.get_ebb(*target);
|
||||
let failure = fx.bcx.create_ebb();
|
||||
fx.cold_ebbs.insert(failure);
|
||||
|
||||
if *expected {
|
||||
fx.bcx.ins().brz(cond, failure, &[]);
|
||||
} else {
|
||||
|
@ -200,8 +211,6 @@ fn codegen_fn_content(fx: &mut FunctionCx<'_, '_, impl Backend>) {
|
|||
};
|
||||
fx.bcx.ins().jump(target, &[]);
|
||||
|
||||
// FIXME insert bb after all other bb's to reduce the amount of jumps in the common
|
||||
// case and improve code locality.
|
||||
fx.bcx.switch_to_block(failure);
|
||||
trap_panic(
|
||||
fx,
|
||||
|
|
|
@ -270,6 +270,9 @@ pub struct FunctionCx<'clif, 'tcx, B: Backend + 'static> {
|
|||
/// When `#[track_caller]` is used, the implicit caller location is stored in this variable.
|
||||
pub caller_location: Option<CValue<'tcx>>,
|
||||
|
||||
/// See [crate::optimize::code_layout] for more information.
|
||||
pub cold_ebbs: EntitySet<Ebb>,
|
||||
|
||||
pub clif_comments: crate::pretty_clif::CommentWriter,
|
||||
pub constants_cx: &'clif mut crate::constant::ConstantCx,
|
||||
pub vtables: &'clif mut HashMap<(Ty<'tcx>, Option<ty::PolyExistentialTraitRef<'tcx>>), DataId>,
|
||||
|
|
|
@ -95,6 +95,7 @@ mod prelude {
|
|||
pub use rustc_codegen_ssa::{CodegenResults, CompiledModule, ModuleKind};
|
||||
|
||||
pub use cranelift_codegen::Context;
|
||||
pub use cranelift_codegen::entity::EntitySet;
|
||||
pub use cranelift_codegen::ir::{AbiParam, Ebb, ExternalName, FuncRef, Inst, InstBuilder, MemFlags, Signature, SourceLoc, StackSlot, StackSlotData, StackSlotKind, TrapCode, Type, Value};
|
||||
pub use cranelift_codegen::ir::condcodes::{FloatCC, IntCC};
|
||||
pub use cranelift_codegen::ir::function::Function;
|
||||
|
|
34
src/optimize/code_layout.rs
Normal file
34
src/optimize/code_layout.rs
Normal file
|
@ -0,0 +1,34 @@
|
|||
//! This optimization moves cold code to the end of the function.
|
||||
//!
|
||||
//! Some code is executed much less often than other code. For example panicking or the
|
||||
//! landingpads for unwinding. By moving this cold code to the end of the function the average
|
||||
//! amount of jumps is reduced and the code locality is improved.
|
||||
//!
|
||||
//! # Undefined behaviour
|
||||
//!
|
||||
//! This optimization doesn't assume anything that isn't already assumed by Cranelift itself.
|
||||
|
||||
use crate::prelude::*;
|
||||
|
||||
pub fn optimize_function(ctx: &mut Context, cold_ebbs: &EntitySet<Ebb>) {
|
||||
// FIXME Move the ebb in place instead of remove and append once
|
||||
// bytecodealliance/cranelift#1339 is implemented.
|
||||
|
||||
let mut ebb_insts = HashMap::new();
|
||||
for ebb in cold_ebbs.keys().filter(|&ebb| cold_ebbs.contains(ebb)) {
|
||||
let insts = ctx.func.layout.ebb_insts(ebb).collect::<Vec<_>>();
|
||||
for &inst in &insts {
|
||||
ctx.func.layout.remove_inst(inst);
|
||||
}
|
||||
ebb_insts.insert(ebb, insts);
|
||||
ctx.func.layout.remove_ebb(ebb);
|
||||
}
|
||||
|
||||
// And then append them at the back again.
|
||||
for ebb in cold_ebbs.keys().filter(|&ebb| cold_ebbs.contains(ebb)) {
|
||||
ctx.func.layout.append_ebb(ebb);
|
||||
for inst in ebb_insts.remove(&ebb).unwrap() {
|
||||
ctx.func.layout.append_inst(inst, ebb);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,13 +1,18 @@
|
|||
use crate::prelude::*;
|
||||
|
||||
mod code_layout;
|
||||
mod stack2reg;
|
||||
|
||||
pub fn optimize_function<'tcx>(
|
||||
tcx: TyCtxt<'tcx>,
|
||||
instance: Instance<'tcx>,
|
||||
ctx: &mut Context,
|
||||
cold_ebbs: &EntitySet<Ebb>,
|
||||
clif_comments: &mut crate::pretty_clif::CommentWriter,
|
||||
) {
|
||||
// The code_layout optimization is very cheap.
|
||||
self::code_layout::optimize_function(ctx, cold_ebbs);
|
||||
|
||||
if tcx.sess.opts.optimize == rustc_session::config::OptLevel::No {
|
||||
return; // FIXME classify optimizations over opt levels
|
||||
}
|
||||
|
|
|
@ -13,7 +13,6 @@ use std::collections::{BTreeMap, HashSet};
|
|||
use std::ops::Not;
|
||||
|
||||
use cranelift_codegen::cursor::{Cursor, FuncCursor};
|
||||
use cranelift_codegen::entity::EntitySet;
|
||||
use cranelift_codegen::ir::{InstructionData, Opcode, ValueDef};
|
||||
use cranelift_codegen::ir::immediates::Offset32;
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue