Move all cold code to the end of the function
Fixes #836 Benchmark #1: simple-raytracer/raytracer_cg_clif Time (mean ± σ): 9.250 s ± 0.056 s [User: 9.213 s, System: 0.015 s] Range (min … max): 9.151 s … 9.348 s 20 runs Benchmark #2: simple-raytracer/raytracer_cg_clif_cold_separated Time (mean ± σ): 9.179 s ± 0.101 s [User: 9.141 s, System: 0.016 s] Range (min … max): 9.070 s … 9.473 s 20 runs Summary 'simple-raytracer/raytracer_cg_clif_cold_separated' ran 1.01 ± 0.01 times faster than 'simple-raytracer/raytracer_cg_clif'
This commit is contained in:
parent
38797f8bad
commit
c74b306037
7 changed files with 56 additions and 4 deletions
|
@ -506,6 +506,7 @@ fn codegen_call_inner<'tcx>(
|
||||||
args: Vec<CValue<'tcx>>,
|
args: Vec<CValue<'tcx>>,
|
||||||
ret_place: Option<CPlace<'tcx>>,
|
ret_place: Option<CPlace<'tcx>>,
|
||||||
) {
|
) {
|
||||||
|
// FIXME mark the current ebb as cold when calling a `#[cold]` function.
|
||||||
let fn_sig = fx
|
let fn_sig = fx
|
||||||
.tcx
|
.tcx
|
||||||
.normalize_erasing_late_bound_regions(ParamEnv::reveal_all(), &fn_ty.fn_sig(fx.tcx));
|
.normalize_erasing_late_bound_regions(ParamEnv::reveal_all(), &fn_ty.fn_sig(fx.tcx));
|
||||||
|
|
15
src/base.rs
15
src/base.rs
|
@ -32,6 +32,12 @@ pub fn trans_fn<'clif, 'tcx, B: Backend + 'static>(
|
||||||
// Predefine ebb's
|
// Predefine ebb's
|
||||||
let start_ebb = bcx.create_ebb();
|
let start_ebb = bcx.create_ebb();
|
||||||
let ebb_map: IndexVec<BasicBlock, Ebb> = (0..mir.basic_blocks().len()).map(|_| bcx.create_ebb()).collect();
|
let ebb_map: IndexVec<BasicBlock, Ebb> = (0..mir.basic_blocks().len()).map(|_| bcx.create_ebb()).collect();
|
||||||
|
let mut cold_ebbs = EntitySet::new();
|
||||||
|
for (bb, &ebb) in ebb_map.iter_enumerated() {
|
||||||
|
if mir.basic_blocks()[bb].is_cleanup {
|
||||||
|
cold_ebbs.insert(ebb);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Make FunctionCx
|
// Make FunctionCx
|
||||||
let pointer_type = cx.module.target_config().pointer_type();
|
let pointer_type = cx.module.target_config().pointer_type();
|
||||||
|
@ -49,6 +55,7 @@ pub fn trans_fn<'clif, 'tcx, B: Backend + 'static>(
|
||||||
ebb_map,
|
ebb_map,
|
||||||
local_map: HashMap::new(),
|
local_map: HashMap::new(),
|
||||||
caller_location: None, // set by `codegen_fn_prelude`
|
caller_location: None, // set by `codegen_fn_prelude`
|
||||||
|
cold_ebbs,
|
||||||
|
|
||||||
clif_comments,
|
clif_comments,
|
||||||
constants_cx: &mut cx.constants_cx,
|
constants_cx: &mut cx.constants_cx,
|
||||||
|
@ -73,6 +80,7 @@ pub fn trans_fn<'clif, 'tcx, B: Backend + 'static>(
|
||||||
let mut clif_comments = fx.clif_comments;
|
let mut clif_comments = fx.clif_comments;
|
||||||
let source_info_set = fx.source_info_set;
|
let source_info_set = fx.source_info_set;
|
||||||
let local_map = fx.local_map;
|
let local_map = fx.local_map;
|
||||||
|
let cold_ebbs = fx.cold_ebbs;
|
||||||
|
|
||||||
#[cfg(debug_assertions)]
|
#[cfg(debug_assertions)]
|
||||||
crate::pretty_clif::write_clif_file(cx.tcx, "unopt", instance, &context.func, &clif_comments, None);
|
crate::pretty_clif::write_clif_file(cx.tcx, "unopt", instance, &context.func, &clif_comments, None);
|
||||||
|
@ -82,7 +90,7 @@ pub fn trans_fn<'clif, 'tcx, B: Backend + 'static>(
|
||||||
|
|
||||||
// Perform rust specific optimizations
|
// Perform rust specific optimizations
|
||||||
tcx.sess.time("optimize clif ir", || {
|
tcx.sess.time("optimize clif ir", || {
|
||||||
crate::optimize::optimize_function(tcx, instance, context, &mut clif_comments);
|
crate::optimize::optimize_function(tcx, instance, context, &cold_ebbs, &mut clif_comments);
|
||||||
});
|
});
|
||||||
|
|
||||||
// Define function
|
// Define function
|
||||||
|
@ -191,8 +199,11 @@ fn codegen_fn_content(fx: &mut FunctionCx<'_, '_, impl Backend>) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
let cond = trans_operand(fx, cond).load_scalar(fx);
|
let cond = trans_operand(fx, cond).load_scalar(fx);
|
||||||
|
|
||||||
let target = fx.get_ebb(*target);
|
let target = fx.get_ebb(*target);
|
||||||
let failure = fx.bcx.create_ebb();
|
let failure = fx.bcx.create_ebb();
|
||||||
|
fx.cold_ebbs.insert(failure);
|
||||||
|
|
||||||
if *expected {
|
if *expected {
|
||||||
fx.bcx.ins().brz(cond, failure, &[]);
|
fx.bcx.ins().brz(cond, failure, &[]);
|
||||||
} else {
|
} else {
|
||||||
|
@ -200,8 +211,6 @@ fn codegen_fn_content(fx: &mut FunctionCx<'_, '_, impl Backend>) {
|
||||||
};
|
};
|
||||||
fx.bcx.ins().jump(target, &[]);
|
fx.bcx.ins().jump(target, &[]);
|
||||||
|
|
||||||
// FIXME insert bb after all other bb's to reduce the amount of jumps in the common
|
|
||||||
// case and improve code locality.
|
|
||||||
fx.bcx.switch_to_block(failure);
|
fx.bcx.switch_to_block(failure);
|
||||||
trap_panic(
|
trap_panic(
|
||||||
fx,
|
fx,
|
||||||
|
|
|
@ -270,6 +270,9 @@ pub struct FunctionCx<'clif, 'tcx, B: Backend + 'static> {
|
||||||
/// When `#[track_caller]` is used, the implicit caller location is stored in this variable.
|
/// When `#[track_caller]` is used, the implicit caller location is stored in this variable.
|
||||||
pub caller_location: Option<CValue<'tcx>>,
|
pub caller_location: Option<CValue<'tcx>>,
|
||||||
|
|
||||||
|
/// See [crate::optimize::code_layout] for more information.
|
||||||
|
pub cold_ebbs: EntitySet<Ebb>,
|
||||||
|
|
||||||
pub clif_comments: crate::pretty_clif::CommentWriter,
|
pub clif_comments: crate::pretty_clif::CommentWriter,
|
||||||
pub constants_cx: &'clif mut crate::constant::ConstantCx,
|
pub constants_cx: &'clif mut crate::constant::ConstantCx,
|
||||||
pub vtables: &'clif mut HashMap<(Ty<'tcx>, Option<ty::PolyExistentialTraitRef<'tcx>>), DataId>,
|
pub vtables: &'clif mut HashMap<(Ty<'tcx>, Option<ty::PolyExistentialTraitRef<'tcx>>), DataId>,
|
||||||
|
|
|
@ -95,6 +95,7 @@ mod prelude {
|
||||||
pub use rustc_codegen_ssa::{CodegenResults, CompiledModule, ModuleKind};
|
pub use rustc_codegen_ssa::{CodegenResults, CompiledModule, ModuleKind};
|
||||||
|
|
||||||
pub use cranelift_codegen::Context;
|
pub use cranelift_codegen::Context;
|
||||||
|
pub use cranelift_codegen::entity::EntitySet;
|
||||||
pub use cranelift_codegen::ir::{AbiParam, Ebb, ExternalName, FuncRef, Inst, InstBuilder, MemFlags, Signature, SourceLoc, StackSlot, StackSlotData, StackSlotKind, TrapCode, Type, Value};
|
pub use cranelift_codegen::ir::{AbiParam, Ebb, ExternalName, FuncRef, Inst, InstBuilder, MemFlags, Signature, SourceLoc, StackSlot, StackSlotData, StackSlotKind, TrapCode, Type, Value};
|
||||||
pub use cranelift_codegen::ir::condcodes::{FloatCC, IntCC};
|
pub use cranelift_codegen::ir::condcodes::{FloatCC, IntCC};
|
||||||
pub use cranelift_codegen::ir::function::Function;
|
pub use cranelift_codegen::ir::function::Function;
|
||||||
|
|
34
src/optimize/code_layout.rs
Normal file
34
src/optimize/code_layout.rs
Normal file
|
@ -0,0 +1,34 @@
|
||||||
|
//! This optimization moves cold code to the end of the function.
|
||||||
|
//!
|
||||||
|
//! Some code is executed much less often than other code. For example panicking or the
|
||||||
|
//! landingpads for unwinding. By moving this cold code to the end of the function the average
|
||||||
|
//! amount of jumps is reduced and the code locality is improved.
|
||||||
|
//!
|
||||||
|
//! # Undefined behaviour
|
||||||
|
//!
|
||||||
|
//! This optimization doesn't assume anything that isn't already assumed by Cranelift itself.
|
||||||
|
|
||||||
|
use crate::prelude::*;
|
||||||
|
|
||||||
|
pub fn optimize_function(ctx: &mut Context, cold_ebbs: &EntitySet<Ebb>) {
|
||||||
|
// FIXME Move the ebb in place instead of remove and append once
|
||||||
|
// bytecodealliance/cranelift#1339 is implemented.
|
||||||
|
|
||||||
|
let mut ebb_insts = HashMap::new();
|
||||||
|
for ebb in cold_ebbs.keys().filter(|&ebb| cold_ebbs.contains(ebb)) {
|
||||||
|
let insts = ctx.func.layout.ebb_insts(ebb).collect::<Vec<_>>();
|
||||||
|
for &inst in &insts {
|
||||||
|
ctx.func.layout.remove_inst(inst);
|
||||||
|
}
|
||||||
|
ebb_insts.insert(ebb, insts);
|
||||||
|
ctx.func.layout.remove_ebb(ebb);
|
||||||
|
}
|
||||||
|
|
||||||
|
// And then append them at the back again.
|
||||||
|
for ebb in cold_ebbs.keys().filter(|&ebb| cold_ebbs.contains(ebb)) {
|
||||||
|
ctx.func.layout.append_ebb(ebb);
|
||||||
|
for inst in ebb_insts.remove(&ebb).unwrap() {
|
||||||
|
ctx.func.layout.append_inst(inst, ebb);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,13 +1,18 @@
|
||||||
use crate::prelude::*;
|
use crate::prelude::*;
|
||||||
|
|
||||||
|
mod code_layout;
|
||||||
mod stack2reg;
|
mod stack2reg;
|
||||||
|
|
||||||
pub fn optimize_function<'tcx>(
|
pub fn optimize_function<'tcx>(
|
||||||
tcx: TyCtxt<'tcx>,
|
tcx: TyCtxt<'tcx>,
|
||||||
instance: Instance<'tcx>,
|
instance: Instance<'tcx>,
|
||||||
ctx: &mut Context,
|
ctx: &mut Context,
|
||||||
|
cold_ebbs: &EntitySet<Ebb>,
|
||||||
clif_comments: &mut crate::pretty_clif::CommentWriter,
|
clif_comments: &mut crate::pretty_clif::CommentWriter,
|
||||||
) {
|
) {
|
||||||
|
// The code_layout optimization is very cheap.
|
||||||
|
self::code_layout::optimize_function(ctx, cold_ebbs);
|
||||||
|
|
||||||
if tcx.sess.opts.optimize == rustc_session::config::OptLevel::No {
|
if tcx.sess.opts.optimize == rustc_session::config::OptLevel::No {
|
||||||
return; // FIXME classify optimizations over opt levels
|
return; // FIXME classify optimizations over opt levels
|
||||||
}
|
}
|
||||||
|
|
|
@ -13,7 +13,6 @@ use std::collections::{BTreeMap, HashSet};
|
||||||
use std::ops::Not;
|
use std::ops::Not;
|
||||||
|
|
||||||
use cranelift_codegen::cursor::{Cursor, FuncCursor};
|
use cranelift_codegen::cursor::{Cursor, FuncCursor};
|
||||||
use cranelift_codegen::entity::EntitySet;
|
|
||||||
use cranelift_codegen::ir::{InstructionData, Opcode, ValueDef};
|
use cranelift_codegen::ir::{InstructionData, Opcode, ValueDef};
|
||||||
use cranelift_codegen::ir::immediates::Offset32;
|
use cranelift_codegen::ir::immediates::Offset32;
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue