rust/compiler/rustc_codegen_ssa/src/mir/mod.rs

494 lines
18 KiB
Rust
Raw Normal View History

2019-02-09 23:31:47 +09:00
use crate::base;
use crate::traits::*;
use rustc_errors::ErrorReported;
2020-03-29 16:41:09 +02:00
use rustc_middle::mir;
use rustc_middle::mir::interpret::ErrorHandled;
2020-03-29 16:41:09 +02:00
use rustc_middle::ty::layout::{FnAbiExt, HasTyCtxt, TyAndLayout};
use rustc_middle::ty::{self, Instance, Ty, TypeFoldable};
2019-12-22 17:42:04 -05:00
use rustc_target::abi::call::{FnAbi, PassMode};
use rustc_target::abi::HasDataLayout;
use std::iter;
use rustc_index::bit_set::BitSet;
use rustc_index::vec::IndexVec;
2016-12-18 17:04:00 -07:00
use self::analyze::CleanupKind;
use self::debuginfo::{FunctionDebugContext, PerLocalVarDebugInfo};
use self::place::PlaceRef;
2020-03-29 16:41:09 +02:00
use rustc_middle::mir::traversal;
Various improvements to MIR and LLVM IR Construction Primarily affects the MIR construction, which indirectly improves LLVM IR generation, but some LLVM IR changes have been made too. * Handle "statement expressions" more intelligently. These are expressions that always evaluate to `()`. Previously a temporary would be generated as a destination to translate into, which is unnecessary. This affects assignment, augmented assignment, `return`, `break` and `continue`. * Avoid inserting drops for non-drop types in more places. Scheduled drops were already skipped for types that we knew wouldn't need dropping at construction time. However manually-inserted drops like those for `x` in `x = y;` were still generated. `build_drop` now takes a type parameter like its `schedule_drop` counterpart and checks to see if the type needs dropping. * Avoid generating an extra temporary for an assignment where the types involved don't need dropping. Previously an expression like `a = b + 1;` would result in a temporary for `b + 1`. This is so the RHS can be evaluated, then the LHS evaluated and dropped and have everything work correctly. However, this isn't necessary if the `LHS` doesn't need a drop, as we can just overwrite the existing value. * Improves lvalue analysis to allow treating an `Rvalue::Use` as an operand in certain conditions. The reason for it never being an operand is so it can be zeroed/drop-filled, but this is only true for types that need dropping. The first two changes result in significantly fewer MIR blocks being generated, as previously almost every statement would end up generating a new block due to the drop of the `()` temporary being generated.
2016-04-15 12:36:16 +12:00
use self::operand::{OperandRef, OperandValue};
2018-05-08 16:10:16 +03:00
/// Master context for codegenning from MIR.
pub struct FunctionCx<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> {
2018-01-16 09:31:48 +01:00
instance: Instance<'tcx>,
2020-04-12 10:31:00 -07:00
mir: &'tcx mir::Body<'tcx>,
debug_context: Option<FunctionDebugContext<Bx::DIScope>>,
2019-10-13 11:28:19 +02:00
llfn: Bx::Function,
cx: &'a Bx::CodegenCx,
2016-12-19 17:48:41 -07:00
fn_abi: FnAbi<'tcx, Ty<'tcx>>,
2016-12-19 19:16:36 -07:00
/// When unwinding is initiated, we have to store this personality
/// value somewhere so that we can load it and re-use it in the
/// resume instruction. The personality is (afaik) some kind of
/// value used for C++ unwinding, which must filter by type: we
/// don't really care about it very much. Anyway, this value
/// contains an alloca into which the personality is stored and
/// then later loaded when generating the DIVERGE_BLOCK.
personality_slot: Option<PlaceRef<'tcx, Bx::Value>>,
/// A `Block` for each MIR `BasicBlock`
blocks: IndexVec<mir::BasicBlock, Bx::BasicBlock>,
/// The funclet status of each basic block
cleanup_kinds: IndexVec<mir::BasicBlock, analyze::CleanupKind>,
/// When targeting MSVC, this stores the cleanup info for each funclet
/// BB. This is initialized as we compute the funclets' head block in RPO.
funclets: IndexVec<mir::BasicBlock, Option<Bx::Funclet>>,
/// This stores the landing-pad block for a given BB, computed lazily on GNU
/// and eagerly on MSVC.
landing_pads: IndexVec<mir::BasicBlock, Option<Bx::BasicBlock>>,
/// Cached unreachable block
unreachable_block: Option<Bx::BasicBlock>,
/// The location where each MIR arg/var/tmp/ret is stored. This is
2017-12-01 14:31:47 +02:00
/// usually an `PlaceRef` representing an alloca, but not always:
/// sometimes we can skip the alloca and just store the value
/// directly using an `OperandRef`, which makes for tighter LLVM
/// IR. The conditions for using an `OperandRef` are as follows:
///
/// - the type of the local must be judged "immediate" by `is_llvm_immediate`
/// - the operand must never be referenced indirectly
/// - we should not take its address using the `&` operator
/// - nor should it appear in a place path like `tmp.a`
/// - the operand must be defined by an rvalue that can generate immediate
/// values
2015-11-03 15:50:04 -05:00
///
/// Avoiding allocs can also be important for certain intrinsics,
/// notably `expect`.
locals: IndexVec<mir::Local, LocalRef<'tcx, Bx::Value>>,
/// All `VarDebugInfo` from the MIR body, partitioned by `Local`.
/// This is `None` if no var`#[non_exhaustive]`iable debuginfo/names are needed.
per_local_var_debug_info:
Option<IndexVec<mir::Local, Vec<PerLocalVarDebugInfo<'tcx, Bx::DIVariable>>>>,
/// Caller location propagated if this function has `#[track_caller]`.
caller_location: Option<OperandRef<'tcx, Bx::Value>>,
}
impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
2016-12-18 16:05:40 -07:00
pub fn monomorphize<T>(&self, value: &T) -> T
2019-12-22 17:42:04 -05:00
where
T: Copy + TypeFoldable<'tcx>,
{
debug!("monomorphize: self.instance={:?}", self.instance);
if let Some(substs) = self.instance.substs_for_mir_body() {
self.cx.tcx().subst_and_normalize_erasing_regions(
substs,
ty::ParamEnv::reveal_all(),
&value,
)
} else {
self.cx.tcx().normalize_erasing_regions(ty::ParamEnv::reveal_all(), *value)
}
2016-12-18 16:05:40 -07:00
}
}
enum LocalRef<'tcx, V> {
Place(PlaceRef<'tcx, V>),
/// `UnsizedPlace(p)`: `p` itself is a thin pointer (indirect place).
/// `*p` is the fat pointer that references the actual unsized place.
/// Every time it is initialized, we have to reallocate the place
/// and update the fat pointer. That's the reason why it is indirect.
UnsizedPlace(PlaceRef<'tcx, V>),
Operand(Option<OperandRef<'tcx, V>>),
}
impl<'a, 'tcx, V: CodegenObject> LocalRef<'tcx, V> {
fn new_operand<Bx: BuilderMethods<'a, 'tcx, Value = V>>(
bx: &mut Bx,
2020-03-04 14:50:21 +00:00
layout: TyAndLayout<'tcx>,
) -> LocalRef<'tcx, V> {
if layout.is_zst() {
// Zero-size temporaries aren't always initialized, which
// doesn't matter because they don't contain data, but
// we need something in the operand.
LocalRef::Operand(Some(OperandRef::new_zst(bx, layout)))
} else {
LocalRef::Operand(None)
}
}
}
///////////////////////////////////////////////////////////////////////////
pub fn codegen_mir<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
cx: &'a Bx::CodegenCx,
instance: Instance<'tcx>,
) {
2018-12-02 18:04:39 +01:00
assert!(!instance.substs.needs_infer());
let llfn = cx.get_fn(instance);
let mir = cx.tcx().instance_mir(instance.def);
let fn_abi = FnAbi::of_instance(cx, instance, &[]);
debug!("fn_abi: {:?}", fn_abi);
let debug_context = cx.create_function_debug_context(instance, &fn_abi, llfn, &mir);
let mut bx = Bx::new_block(cx, llfn, "start");
if mir.basic_blocks().iter().any(|bb| bb.is_cleanup) {
2018-01-05 07:12:32 +02:00
bx.set_personality_fn(cx.eh_personality());
}
bx.sideeffect(false);
let cleanup_kinds = analyze::cleanup_kinds(&mir);
// Allocate a `Block` for every basic block, except
// the start block, if nothing loops back to it.
let reentrant_start_block = !mir.predecessors()[mir::START_BLOCK].is_empty();
2019-12-22 17:42:04 -05:00
let block_bxs: IndexVec<mir::BasicBlock, Bx::BasicBlock> = mir
.basic_blocks()
.indices()
.map(|bb| {
if bb == mir::START_BLOCK && !reentrant_start_block {
2018-01-05 07:12:32 +02:00
bx.llbb()
} else {
2018-01-05 07:12:32 +02:00
bx.build_sibling_block(&format!("{:?}", bb)).llbb()
}
2019-12-22 17:42:04 -05:00
})
.collect();
let (landing_pads, funclets) = create_funclets(&mir, &mut bx, &cleanup_kinds, &block_bxs);
let mut fx = FunctionCx {
2018-01-16 09:31:48 +01:00
instance,
mir,
llfn,
fn_abi,
2018-01-05 07:04:08 +02:00
cx,
personality_slot: None,
2018-01-05 07:12:32 +02:00
blocks: block_bxs,
unreachable_block: None,
cleanup_kinds,
landing_pads,
funclets,
locals: IndexVec::new(),
debug_context,
per_local_var_debug_info: None,
caller_location: None,
};
fx.per_local_var_debug_info = fx.compute_per_local_var_debug_info();
for const_ in &mir.required_consts {
if let Err(err) = fx.eval_mir_constant(const_) {
match err {
// errored or at least linted
ErrorHandled::Reported(ErrorReported) | ErrorHandled::Linted => {}
ErrorHandled::TooGeneric => {
span_bug!(const_.span, "codgen encountered polymorphic constant: {:?}", err)
}
}
}
}
let memory_locals = analyze::non_ssa_locals(&fx);
2016-12-18 16:05:40 -07:00
// Allocate variable and temp allocas
fx.locals = {
let args = arg_local_refs(&mut bx, &mut fx, &memory_locals);
let mut allocate_local = |local| {
2020-04-12 10:31:00 -07:00
let decl = &mir.local_decls[local];
let layout = bx.layout_of(fx.monomorphize(&decl.ty));
assert!(!layout.ty.has_erasable_regions());
if local == mir::RETURN_PLACE && fx.fn_abi.ret.is_indirect() {
debug!("alloc: {:?} (return place) -> place", local);
let llretptr = bx.get_param(0);
return LocalRef::Place(PlaceRef::new_sized(llretptr, layout));
}
if memory_locals.contains(local) {
debug!("alloc: {:?} -> place", local);
if layout.is_unsized() {
LocalRef::UnsizedPlace(PlaceRef::alloca_unsized_indirect(&mut bx, layout))
} else {
LocalRef::Place(PlaceRef::alloca(&mut bx, layout))
}
} else {
debug!("alloc: {:?} -> operand", local);
LocalRef::new_operand(&mut bx, layout)
}
};
let retptr = allocate_local(mir::RETURN_PLACE);
iter::once(retptr)
.chain(args.into_iter())
2020-04-12 10:31:00 -07:00
.chain(mir.vars_and_temps_iter().map(allocate_local))
.collect()
};
// Apply debuginfo to the newly allocated locals.
fx.debug_introduce_locals(&mut bx);
// Branch to the START block, if it's not the entry block.
if reentrant_start_block {
bx.br(fx.blocks[mir::START_BLOCK]);
}
2020-04-12 10:31:00 -07:00
let rpo = traversal::reverse_postorder(&mir);
let mut visited = BitSet::new_empty(mir.basic_blocks().len());
2018-05-08 16:10:16 +03:00
// Codegen the body of each block using reverse postorder
for (bb, _) in rpo {
visited.insert(bb.index());
2019-10-14 01:38:38 -04:00
fx.codegen_block(bb);
}
// Remove blocks that haven't been visited, or have no
// predecessors.
2020-04-12 10:31:00 -07:00
for bb in mir.basic_blocks().indices() {
// Unreachable block
if !visited.contains(bb.index()) {
2018-05-08 16:10:16 +03:00
debug!("codegen_mir: block {:?} was not visited", bb);
unsafe {
bx.delete_basic_block(fx.blocks[bb]);
}
}
}
}
fn create_funclets<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
mir: &'tcx mir::Body<'tcx>,
bx: &mut Bx,
cleanup_kinds: &IndexVec<mir::BasicBlock, CleanupKind>,
block_bxs: &IndexVec<mir::BasicBlock, Bx::BasicBlock>,
) -> (
IndexVec<mir::BasicBlock, Option<Bx::BasicBlock>>,
IndexVec<mir::BasicBlock, Option<Bx::Funclet>>,
) {
2019-12-22 17:42:04 -05:00
block_bxs
.iter_enumerated()
.zip(cleanup_kinds)
.map(|((bb, &llbb), cleanup_kind)| {
match *cleanup_kind {
CleanupKind::Funclet if base::wants_msvc_seh(bx.sess()) => {}
_ => return (None, None),
rustc: Tweak funclet cleanups of ffi functions This commit is targeted at addressing #48251 by specifically fixing a case where a longjmp over Rust frames on MSVC runs cleanups, accidentally running the "abort the program" cleanup as well. Added in #46833 `extern` ABI functions in Rust will abort the process if Rust panics, and currently this is modeled as a normal cleanup like all other destructors. Unfortunately it turns out that `longjmp` on MSVC is implemented with SEH, the same mechanism used to implement panics in Rust. This means that `longjmp` over Rust frames will run Rust cleanups (even though we don't necessarily want it to). Notably this means that if you `longjmp` over a Rust stack frame then that probably means you'll abort the program because one of the cleanups will abort the process. After some discussion on IRC it turns out that `longjmp` doesn't run cleanups for *caught* exceptions, it only runs cleanups for cleanup pads. Using this information this commit tweaks the codegen for an `extern` function to a catch-all clause for exceptions instead of a cleanup block. This catch-all is equivalent to the C++ code: try { foo(); } catch (...) { bar(); } and in fact our codegen here is designed to match exactly what clang emits for that C++ code! With this tweak a longjmp over Rust code will no longer abort the process. A longjmp will continue to "accidentally" run Rust cleanups (destructors) on MSVC. Other non-MSVC platforms will not rust destructors with a longjmp, so we'll probably still recommend "don't have destructors on the stack", but in any case this is a more surgical fix than #48567 and should help us stick to standard personality functions a bit longer.
2018-02-26 18:59:47 -08:00
}
2019-12-22 17:42:04 -05:00
let funclet;
let ret_llbb;
match mir[bb].terminator.as_ref().map(|t| &t.kind) {
// This is a basic block that we're aborting the program for,
// notably in an `extern` function. These basic blocks are inserted
// so that we assert that `extern` functions do indeed not panic,
// and if they do we abort the process.
//
// On MSVC these are tricky though (where we're doing funclets). If
// we were to do a cleanuppad (like below) the normal functions like
// `longjmp` would trigger the abort logic, terminating the
// program. Instead we insert the equivalent of `catch(...)` for C++
// which magically doesn't trigger when `longjmp` files over this
// frame.
//
// Lots more discussion can be found on #48251 but this codegen is
// modeled after clang's for:
//
// try {
// foo();
// } catch (...) {
// bar();
// }
Some(&mir::TerminatorKind::Abort) => {
let mut cs_bx = bx.build_sibling_block(&format!("cs_funclet{:?}", bb));
let mut cp_bx = bx.build_sibling_block(&format!("cp_funclet{:?}", bb));
ret_llbb = cs_bx.llbb();
let cs = cs_bx.catch_switch(None, None, 1);
cs_bx.add_handler(cs, cp_bx.llbb());
// The "null" here is actually a RTTI type descriptor for the
// C++ personality function, but `catch (...)` has no type so
// it's null. The 64 here is actually a bitfield which
// represents that this is a catch-all block.
let null = bx.const_null(
bx.type_i8p_ext(bx.cx().data_layout().instruction_address_space),
);
2019-12-22 17:42:04 -05:00
let sixty_four = bx.const_i32(64);
funclet = cp_bx.catch_pad(cs, &[null, sixty_four, null]);
cp_bx.br(llbb);
}
_ => {
let mut cleanup_bx = bx.build_sibling_block(&format!("funclet_{:?}", bb));
ret_llbb = cleanup_bx.llbb();
funclet = cleanup_bx.cleanup_pad(None, &[]);
cleanup_bx.br(llbb);
}
};
(Some(ret_llbb), Some(funclet))
})
.unzip()
}
2019-02-08 14:53:55 +01:00
/// Produces, for each argument, a `Value` pointing at the
/// argument's value. As arguments are places, these are always
/// indirect.
fn arg_local_refs<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
bx: &mut Bx,
fx: &mut FunctionCx<'a, 'tcx, Bx>,
Merge indexed_set.rs into bitvec.rs, and rename it bit_set.rs. Currently we have two files implementing bitsets (and 2D bit matrices). This commit combines them into one, taking the best features from each. This involves renaming a lot of things. The high level changes are as follows. - bitvec.rs --> bit_set.rs - indexed_set.rs --> (removed) - BitArray + IdxSet --> BitSet (merged, see below) - BitVector --> GrowableBitSet - {,Sparse,Hybrid}IdxSet --> {,Sparse,Hybrid}BitSet - BitMatrix --> BitMatrix - SparseBitMatrix --> SparseBitMatrix The changes within the bitset types themselves are as follows. ``` OLD OLD NEW BitArray<C> IdxSet<T> BitSet<T> -------- ------ ------ grow - grow new - (remove) new_empty new_empty new_empty new_filled new_filled new_filled - to_hybrid to_hybrid clear clear clear set_up_to set_up_to set_up_to clear_above - clear_above count - count contains(T) contains(&T) contains(T) contains_all - superset is_empty - is_empty insert(T) add(&T) insert(T) insert_all - insert_all() remove(T) remove(&T) remove(T) words words words words_mut words_mut words_mut - overwrite overwrite merge union union - subtract subtract - intersect intersect iter iter iter ``` In general, when choosing names I went with: - names that are more obvious (e.g. `BitSet` over `IdxSet`). - names that are more like the Rust libraries (e.g. `T` over `C`, `insert` over `add`); - names that are more set-like (e.g. `union` over `merge`, `superset` over `contains_all`, `domain_size` over `num_bits`). Also, using `T` for index arguments seems more sensible than `&T` -- even though the latter is standard in Rust collection types -- because indices are always copyable. It also results in fewer `&` and `*` sigils in practice.
2018-09-14 15:07:25 +10:00
memory_locals: &BitSet<mir::Local>,
) -> Vec<LocalRef<'tcx, Bx::Value>> {
let mir = fx.mir;
let mut idx = 0;
let mut llarg_idx = fx.fn_abi.ret.is_indirect() as usize;
2019-12-22 17:42:04 -05:00
let args = mir
.args_iter()
.enumerate()
.map(|(arg_index, local)| {
let arg_decl = &mir.local_decls[local];
if Some(local) == mir.spread_arg {
// This argument (e.g., the last argument in the "rust-call" ABI)
// is a tuple that was spread at the ABI level and now we have
// to reconstruct it into a tuple local variable, from multiple
// individual LLVM function arguments.
let arg_ty = fx.monomorphize(&arg_decl.ty);
2020-08-03 00:49:11 +02:00
let tupled_arg_tys = match arg_ty.kind() {
ty::Tuple(tys) => tys,
2019-12-22 17:42:04 -05:00
_ => bug!("spread argument isn't a tuple?!"),
};
let place = PlaceRef::alloca(bx, bx.layout_of(arg_ty));
for i in 0..tupled_arg_tys.len() {
let arg = &fx.fn_abi.args[idx];
idx += 1;
if arg.pad.is_some() {
llarg_idx += 1;
}
let pr_field = place.project_field(bx, i);
bx.store_fn_arg(arg, &mut llarg_idx, pr_field);
}
2019-12-22 17:42:04 -05:00
return LocalRef::Place(place);
}
2016-03-08 14:24:44 +02:00
2019-12-22 17:42:04 -05:00
if fx.fn_abi.c_variadic && arg_index == fx.fn_abi.args.len() {
let arg_ty = fx.monomorphize(&arg_decl.ty);
2019-12-22 17:42:04 -05:00
let va_list = PlaceRef::alloca(bx, bx.layout_of(arg_ty));
bx.va_start(va_list.llval);
2019-12-22 17:42:04 -05:00
return LocalRef::Place(va_list);
}
2019-12-22 17:42:04 -05:00
let arg = &fx.fn_abi.args[idx];
idx += 1;
if arg.pad.is_some() {
llarg_idx += 1;
}
2019-12-22 17:42:04 -05:00
if !memory_locals.contains(local) {
// We don't have to cast or keep the argument in the alloca.
// FIXME(eddyb): We should figure out how to use llvm.dbg.value instead
// of putting everything in allocas just so we can use llvm.dbg.declare.
let local = |op| LocalRef::Operand(Some(op));
match arg.mode {
PassMode::Ignore => {
return local(OperandRef::new_zst(bx, arg.layout));
}
PassMode::Direct(_) => {
let llarg = bx.get_param(llarg_idx);
llarg_idx += 1;
return local(OperandRef::from_immediate_or_packed_pair(
bx, llarg, arg.layout,
));
}
PassMode::Pair(..) => {
let (a, b) = (bx.get_param(llarg_idx), bx.get_param(llarg_idx + 1));
llarg_idx += 2;
return local(OperandRef {
val: OperandValue::Pair(a, b),
layout: arg.layout,
});
}
_ => {}
}
}
2019-12-22 17:42:04 -05:00
if arg.is_sized_indirect() {
// Don't copy an indirect argument to an alloca, the caller
// already put it in a temporary alloca and gave it up.
// FIXME: lifetimes
let llarg = bx.get_param(llarg_idx);
llarg_idx += 1;
LocalRef::Place(PlaceRef::new_sized(llarg, arg.layout))
} else if arg.is_unsized_indirect() {
// As the storage for the indirect argument lives during
// the whole function call, we just copy the fat pointer.
let llarg = bx.get_param(llarg_idx);
llarg_idx += 1;
let llextra = bx.get_param(llarg_idx);
llarg_idx += 1;
let indirect_operand = OperandValue::Pair(llarg, llextra);
let tmp = PlaceRef::alloca_unsized_indirect(bx, arg.layout);
indirect_operand.store(bx, tmp);
LocalRef::UnsizedPlace(tmp)
} else {
let tmp = PlaceRef::alloca(bx, arg.layout);
bx.store_fn_arg(arg, &mut llarg_idx, tmp);
LocalRef::Place(tmp)
}
})
.collect::<Vec<_>>();
if fx.instance.def.requires_caller_location(bx.tcx()) {
assert_eq!(
2019-12-22 17:42:04 -05:00
fx.fn_abi.args.len(),
args.len() + 1,
"#[track_caller] fn's must have 1 more argument in their ABI than in their MIR",
);
2019-12-06 17:05:51 -08:00
let arg = fx.fn_abi.args.last().unwrap();
match arg.mode {
PassMode::Direct(_) => (),
2019-12-06 17:05:51 -08:00
_ => bug!("caller location must be PassMode::Direct, found {:?}", arg.mode),
}
fx.caller_location = Some(OperandRef {
val: OperandValue::Immediate(bx.get_param(llarg_idx)),
layout: arg.layout,
});
}
args
}
mod analyze;
mod block;
pub mod constant;
pub mod coverageinfo;
pub mod debuginfo;
mod intrinsic;
pub mod operand;
2019-12-22 17:42:04 -05:00
pub mod place;
mod rvalue;
mod statement;