1
Fork 0

Move all cold code to the end of the function

Fixes #836

Benchmark #1: simple-raytracer/raytracer_cg_clif
  Time (mean ± σ):      9.250 s ±  0.056 s    [User: 9.213 s, System: 0.015 s]
  Range (min … max):    9.151 s …  9.348 s    20 runs

Benchmark #2: simple-raytracer/raytracer_cg_clif_cold_separated
  Time (mean ± σ):      9.179 s ±  0.101 s    [User: 9.141 s, System: 0.016 s]
  Range (min … max):    9.070 s …  9.473 s    20 runs

Summary
  'simple-raytracer/raytracer_cg_clif_cold_separated' ran
    1.01 ± 0.01 times faster than 'simple-raytracer/raytracer_cg_clif'
This commit is contained in:
bjorn3 2020-01-11 17:57:18 +01:00
parent 38797f8bad
commit c74b306037
7 changed files with 56 additions and 4 deletions

View file

@ -506,6 +506,7 @@ fn codegen_call_inner<'tcx>(
args: Vec<CValue<'tcx>>,
ret_place: Option<CPlace<'tcx>>,
) {
// FIXME mark the current ebb as cold when calling a `#[cold]` function.
let fn_sig = fx
.tcx
.normalize_erasing_late_bound_regions(ParamEnv::reveal_all(), &fn_ty.fn_sig(fx.tcx));

View file

@ -32,6 +32,12 @@ pub fn trans_fn<'clif, 'tcx, B: Backend + 'static>(
// Predefine ebb's
let start_ebb = bcx.create_ebb();
let ebb_map: IndexVec<BasicBlock, Ebb> = (0..mir.basic_blocks().len()).map(|_| bcx.create_ebb()).collect();
let mut cold_ebbs = EntitySet::new();
for (bb, &ebb) in ebb_map.iter_enumerated() {
if mir.basic_blocks()[bb].is_cleanup {
cold_ebbs.insert(ebb);
}
}
// Make FunctionCx
let pointer_type = cx.module.target_config().pointer_type();
@ -49,6 +55,7 @@ pub fn trans_fn<'clif, 'tcx, B: Backend + 'static>(
ebb_map,
local_map: HashMap::new(),
caller_location: None, // set by `codegen_fn_prelude`
cold_ebbs,
clif_comments,
constants_cx: &mut cx.constants_cx,
@ -73,6 +80,7 @@ pub fn trans_fn<'clif, 'tcx, B: Backend + 'static>(
let mut clif_comments = fx.clif_comments;
let source_info_set = fx.source_info_set;
let local_map = fx.local_map;
let cold_ebbs = fx.cold_ebbs;
#[cfg(debug_assertions)]
crate::pretty_clif::write_clif_file(cx.tcx, "unopt", instance, &context.func, &clif_comments, None);
@ -82,7 +90,7 @@ pub fn trans_fn<'clif, 'tcx, B: Backend + 'static>(
// Perform rust specific optimizations
tcx.sess.time("optimize clif ir", || {
crate::optimize::optimize_function(tcx, instance, context, &mut clif_comments);
crate::optimize::optimize_function(tcx, instance, context, &cold_ebbs, &mut clif_comments);
});
// Define function
@ -191,8 +199,11 @@ fn codegen_fn_content(fx: &mut FunctionCx<'_, '_, impl Backend>) {
}
}
let cond = trans_operand(fx, cond).load_scalar(fx);
let target = fx.get_ebb(*target);
let failure = fx.bcx.create_ebb();
fx.cold_ebbs.insert(failure);
if *expected {
fx.bcx.ins().brz(cond, failure, &[]);
} else {
@ -200,8 +211,6 @@ fn codegen_fn_content(fx: &mut FunctionCx<'_, '_, impl Backend>) {
};
fx.bcx.ins().jump(target, &[]);
// FIXME insert bb after all other bb's to reduce the amount of jumps in the common
// case and improve code locality.
fx.bcx.switch_to_block(failure);
trap_panic(
fx,

View file

@ -270,6 +270,9 @@ pub struct FunctionCx<'clif, 'tcx, B: Backend + 'static> {
/// When `#[track_caller]` is used, the implicit caller location is stored in this variable.
pub caller_location: Option<CValue<'tcx>>,
/// See [crate::optimize::code_layout] for more information.
pub cold_ebbs: EntitySet<Ebb>,
pub clif_comments: crate::pretty_clif::CommentWriter,
pub constants_cx: &'clif mut crate::constant::ConstantCx,
pub vtables: &'clif mut HashMap<(Ty<'tcx>, Option<ty::PolyExistentialTraitRef<'tcx>>), DataId>,

View file

@ -95,6 +95,7 @@ mod prelude {
pub use rustc_codegen_ssa::{CodegenResults, CompiledModule, ModuleKind};
pub use cranelift_codegen::Context;
pub use cranelift_codegen::entity::EntitySet;
pub use cranelift_codegen::ir::{AbiParam, Ebb, ExternalName, FuncRef, Inst, InstBuilder, MemFlags, Signature, SourceLoc, StackSlot, StackSlotData, StackSlotKind, TrapCode, Type, Value};
pub use cranelift_codegen::ir::condcodes::{FloatCC, IntCC};
pub use cranelift_codegen::ir::function::Function;

View file

@ -0,0 +1,34 @@
//! This optimization moves cold code to the end of the function.
//!
//! Some code is executed much less often than other code. For example panicking or the
//! landingpads for unwinding. By moving this cold code to the end of the function the average
//! amount of jumps is reduced and the code locality is improved.
//!
//! # Undefined behaviour
//!
//! This optimization doesn't assume anything that isn't already assumed by Cranelift itself.
use crate::prelude::*;
pub fn optimize_function(ctx: &mut Context, cold_ebbs: &EntitySet<Ebb>) {
// FIXME Move the ebb in place instead of remove and append once
// bytecodealliance/cranelift#1339 is implemented.
let mut ebb_insts = HashMap::new();
for ebb in cold_ebbs.keys().filter(|&ebb| cold_ebbs.contains(ebb)) {
let insts = ctx.func.layout.ebb_insts(ebb).collect::<Vec<_>>();
for &inst in &insts {
ctx.func.layout.remove_inst(inst);
}
ebb_insts.insert(ebb, insts);
ctx.func.layout.remove_ebb(ebb);
}
// And then append them at the back again.
for ebb in cold_ebbs.keys().filter(|&ebb| cold_ebbs.contains(ebb)) {
ctx.func.layout.append_ebb(ebb);
for inst in ebb_insts.remove(&ebb).unwrap() {
ctx.func.layout.append_inst(inst, ebb);
}
}
}

View file

@ -1,13 +1,18 @@
use crate::prelude::*;
mod code_layout;
mod stack2reg;
pub fn optimize_function<'tcx>(
tcx: TyCtxt<'tcx>,
instance: Instance<'tcx>,
ctx: &mut Context,
cold_ebbs: &EntitySet<Ebb>,
clif_comments: &mut crate::pretty_clif::CommentWriter,
) {
// The code_layout optimization is very cheap.
self::code_layout::optimize_function(ctx, cold_ebbs);
if tcx.sess.opts.optimize == rustc_session::config::OptLevel::No {
return; // FIXME classify optimizations over opt levels
}

View file

@ -13,7 +13,6 @@ use std::collections::{BTreeMap, HashSet};
use std::ops::Not;
use cranelift_codegen::cursor::{Cursor, FuncCursor};
use cranelift_codegen::entity::EntitySet;
use cranelift_codegen::ir::{InstructionData, Opcode, ValueDef};
use cranelift_codegen::ir::immediates::Offset32;