Remove my scalar_copy_backend_type
optimization attempt
I added this back in 111999, but I no longer think it's a good idea - It had to get scaled back to only power-of-two things to not break a bunch of targets - LLVM seems to be getting better at memcpy removal anyway - Introducing vector instructions has seemed to sometimes (115515) make autovectorization worse So this removes it from the codegen crates entirely, and instead just tries to use <https://doc.rust-lang.org/nightly/nightly-rustc/rustc_codegen_ssa/traits/builder/trait.BuilderMethods.html#method.typed_place_copy> instead of direct `memcpy` so things will still use load/store for immediates.
This commit is contained in:
parent
ff24ef91fc
commit
b5376ba601
10 changed files with 60 additions and 154 deletions
|
@ -12,7 +12,7 @@ use crate::mir;
|
|||
use crate::mir::operand::OperandValue;
|
||||
use crate::mir::place::PlaceRef;
|
||||
use crate::traits::*;
|
||||
use crate::{CachedModuleCodegen, CompiledModule, CrateInfo, MemFlags, ModuleCodegen, ModuleKind};
|
||||
use crate::{CachedModuleCodegen, CompiledModule, CrateInfo, ModuleCodegen, ModuleKind};
|
||||
|
||||
use rustc_ast::expand::allocator::{global_fn_name, AllocatorKind, ALLOCATOR_METHODS};
|
||||
use rustc_attr as attr;
|
||||
|
@ -37,7 +37,7 @@ use rustc_session::config::{self, CrateType, EntryFnType, OptLevel, OutputType};
|
|||
use rustc_session::Session;
|
||||
use rustc_span::symbol::sym;
|
||||
use rustc_span::Symbol;
|
||||
use rustc_target::abi::{Align, FIRST_VARIANT};
|
||||
use rustc_target::abi::FIRST_VARIANT;
|
||||
|
||||
use std::cmp;
|
||||
use std::collections::BTreeSet;
|
||||
|
@ -282,15 +282,7 @@ pub fn coerce_unsized_into<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
|
|||
}
|
||||
|
||||
if src_f.layout.ty == dst_f.layout.ty {
|
||||
memcpy_ty(
|
||||
bx,
|
||||
dst_f.llval,
|
||||
dst_f.align,
|
||||
src_f.llval,
|
||||
src_f.align,
|
||||
src_f.layout,
|
||||
MemFlags::empty(),
|
||||
);
|
||||
bx.typed_place_copy(dst_f, src_f);
|
||||
} else {
|
||||
coerce_unsized_into(bx, src_f, dst_f);
|
||||
}
|
||||
|
@ -382,30 +374,6 @@ pub fn wants_new_eh_instructions(sess: &Session) -> bool {
|
|||
wants_wasm_eh(sess) || wants_msvc_seh(sess)
|
||||
}
|
||||
|
||||
pub fn memcpy_ty<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
|
||||
bx: &mut Bx,
|
||||
dst: Bx::Value,
|
||||
dst_align: Align,
|
||||
src: Bx::Value,
|
||||
src_align: Align,
|
||||
layout: TyAndLayout<'tcx>,
|
||||
flags: MemFlags,
|
||||
) {
|
||||
let size = layout.size.bytes();
|
||||
if size == 0 {
|
||||
return;
|
||||
}
|
||||
|
||||
if flags == MemFlags::empty()
|
||||
&& let Some(bty) = bx.cx().scalar_copy_backend_type(layout)
|
||||
{
|
||||
let temp = bx.load(bty, src, src_align);
|
||||
bx.store(temp, dst, dst_align);
|
||||
} else {
|
||||
bx.memcpy(dst, dst_align, src, src_align, bx.cx().const_usize(size), flags);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn codegen_instance<'a, 'tcx: 'a, Bx: BuilderMethods<'a, 'tcx>>(
|
||||
cx: &'a Bx::CodegenCx,
|
||||
instance: Instance<'tcx>,
|
||||
|
|
|
@ -1459,7 +1459,7 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
|
|||
}
|
||||
_ => (op.immediate_or_packed_pair(bx), arg.layout.align.abi, false),
|
||||
},
|
||||
Ref(llval, _, align) => match arg.mode {
|
||||
Ref(llval, llextra, align) => match arg.mode {
|
||||
PassMode::Indirect { attrs, .. } => {
|
||||
let required_align = match attrs.pointee_align {
|
||||
Some(pointee_align) => cmp::max(pointee_align, arg.layout.align.abi),
|
||||
|
@ -1470,15 +1470,8 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
|
|||
// alignment requirements may be higher than the type's alignment, so copy
|
||||
// to a higher-aligned alloca.
|
||||
let scratch = PlaceRef::alloca_aligned(bx, arg.layout, required_align);
|
||||
base::memcpy_ty(
|
||||
bx,
|
||||
scratch.llval,
|
||||
scratch.align,
|
||||
llval,
|
||||
align,
|
||||
op.layout,
|
||||
MemFlags::empty(),
|
||||
);
|
||||
let op_place = PlaceRef { llval, llextra, layout: op.layout, align };
|
||||
bx.typed_place_copy(scratch, op_place);
|
||||
(scratch.llval, scratch.align, true)
|
||||
} else {
|
||||
(llval, align, true)
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
use super::place::PlaceRef;
|
||||
use super::{FunctionCx, LocalRef};
|
||||
|
||||
use crate::base;
|
||||
use crate::size_of_val;
|
||||
use crate::traits::*;
|
||||
use crate::MemFlags;
|
||||
|
@ -398,7 +397,7 @@ impl<'a, 'tcx, V: CodegenObject> OperandValue<V> {
|
|||
self.store_with_flags(bx, dest, MemFlags::NONTEMPORAL);
|
||||
}
|
||||
|
||||
fn store_with_flags<Bx: BuilderMethods<'a, 'tcx, Value = V>>(
|
||||
pub(crate) fn store_with_flags<Bx: BuilderMethods<'a, 'tcx, Value = V>>(
|
||||
self,
|
||||
bx: &mut Bx,
|
||||
dest: PlaceRef<'tcx, V>,
|
||||
|
@ -410,16 +409,11 @@ impl<'a, 'tcx, V: CodegenObject> OperandValue<V> {
|
|||
// Avoid generating stores of zero-sized values, because the only way to have a zero-sized
|
||||
// value is through `undef`/`poison`, and the store itself is useless.
|
||||
}
|
||||
OperandValue::Ref(r, None, source_align) => {
|
||||
OperandValue::Ref(llval, llextra @ None, source_align) => {
|
||||
assert!(dest.layout.is_sized(), "cannot directly store unsized values");
|
||||
if flags.contains(MemFlags::NONTEMPORAL) {
|
||||
// HACK(nox): This is inefficient but there is no nontemporal memcpy.
|
||||
let ty = bx.backend_type(dest.layout);
|
||||
let val = bx.load(ty, r, source_align);
|
||||
bx.store_with_flags(val, dest.llval, dest.align, flags);
|
||||
return;
|
||||
}
|
||||
base::memcpy_ty(bx, dest.llval, dest.align, r, source_align, dest.layout, flags)
|
||||
let source_place =
|
||||
PlaceRef { llval, llextra, align: source_align, layout: dest.layout };
|
||||
bx.typed_place_copy_with_flags(dest, source_place, flags);
|
||||
}
|
||||
OperandValue::Ref(_, Some(_), _) => {
|
||||
bug!("cannot directly store unsized values");
|
||||
|
|
|
@ -281,17 +281,31 @@ pub trait BuilderMethods<'a, 'tcx>:
|
|||
dst: PlaceRef<'tcx, Self::Value>,
|
||||
src: PlaceRef<'tcx, Self::Value>,
|
||||
) {
|
||||
debug_assert!(src.llextra.is_none());
|
||||
debug_assert!(dst.llextra.is_none());
|
||||
self.typed_place_copy_with_flags(dst, src, MemFlags::empty());
|
||||
}
|
||||
|
||||
fn typed_place_copy_with_flags(
|
||||
&mut self,
|
||||
dst: PlaceRef<'tcx, Self::Value>,
|
||||
src: PlaceRef<'tcx, Self::Value>,
|
||||
flags: MemFlags,
|
||||
) {
|
||||
debug_assert!(src.llextra.is_none(), "cannot directly copy from unsized values");
|
||||
debug_assert!(dst.llextra.is_none(), "cannot directly copy into unsized values");
|
||||
debug_assert_eq!(dst.layout.size, src.layout.size);
|
||||
if self.sess().opts.optimize == OptLevel::No && self.is_backend_immediate(dst.layout) {
|
||||
// If we're not optimizing, the aliasing information from `memcpy`
|
||||
// isn't useful, so just load-store the value for smaller code.
|
||||
let temp = self.load_operand(src);
|
||||
temp.val.store(self, dst);
|
||||
temp.val.store_with_flags(self, dst, flags);
|
||||
} else if flags.contains(MemFlags::NONTEMPORAL) {
|
||||
// HACK(nox): This is inefficient but there is no nontemporal memcpy.
|
||||
let ty = self.backend_type(dst.layout);
|
||||
let val = self.load(ty, src.llval, src.align);
|
||||
self.store_with_flags(val, dst.llval, dst.align, flags);
|
||||
} else if !dst.layout.is_zst() {
|
||||
let bytes = self.const_usize(dst.layout.size.bytes());
|
||||
self.memcpy(dst.llval, dst.align, src.llval, src.align, bytes, MemFlags::empty());
|
||||
self.memcpy(dst.llval, dst.align, src.llval, src.align, bytes, flags);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -133,28 +133,6 @@ pub trait LayoutTypeMethods<'tcx>: Backend<'tcx> {
|
|||
|| self.is_backend_immediate(layout)
|
||||
|| self.is_backend_scalar_pair(layout))
|
||||
}
|
||||
|
||||
/// A type that can be used in a [`super::BuilderMethods::load`] +
|
||||
/// [`super::BuilderMethods::store`] pair to implement a *typed* copy,
|
||||
/// such as a MIR `*_0 = *_1`.
|
||||
///
|
||||
/// It's always legal to return `None` here, as the provided impl does,
|
||||
/// in which case callers should use [`super::BuilderMethods::memcpy`]
|
||||
/// instead of the `load`+`store` pair.
|
||||
///
|
||||
/// This can be helpful for things like arrays, where the LLVM backend type
|
||||
/// `[3 x i16]` optimizes to three separate loads and stores, but it can
|
||||
/// instead be copied via an `i48` that stays as the single `load`+`store`.
|
||||
/// (As of 2023-05 LLVM cannot necessarily optimize away a `memcpy` in these
|
||||
/// cases, due to `poison` handling, but in codegen we have more information
|
||||
/// about the type invariants, so can emit something better instead.)
|
||||
///
|
||||
/// This *should* return `None` for particularly-large types, where leaving
|
||||
/// the `memcpy` may well be important to avoid code size explosion.
|
||||
fn scalar_copy_backend_type(&self, layout: TyAndLayout<'tcx>) -> Option<Self::Type> {
|
||||
let _ = layout;
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
// For backends that support CFI using type membership (i.e., testing whether a given pointer is
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue