Merge commit '05677b6bd6' into sync_cg_clif-2021-08-06

2021-08-06 16:26:56 +02:00 · 2021-08-06 16:26:56 +02:00 · 279f486960
commit 279f486960
parent 1f94abcda6 05677b6bd6
40 changed files with 823 additions and 590 deletions
--- a/compiler/rustc_codegen_cranelift/src/abi/mod.rs
+++ b/compiler/rustc_codegen_cranelift/src/abi/mod.rs
@ -9,13 +9,12 @@ use rustc_middle::ty::layout::FnAbiExt;
 use rustc_target::abi::call::{Conv, FnAbi};
 use rustc_target::spec::abi::Abi;

-use cranelift_codegen::ir::AbiParam;
-use smallvec::smallvec;
+use cranelift_codegen::ir::{AbiParam, SigRef};

 use self::pass_mode::*;
 use crate::prelude::*;

-pub(crate) use self::returning::{can_return_to_ssa_var, codegen_return};
+pub(crate) use self::returning::codegen_return;

 fn clif_sig_from_fn_abi<'tcx>(
    tcx: TyCtxt<'tcx>,
@ -236,27 +235,20 @@ pub(crate) fn codegen_fn_prelude<'tcx>(fx: &mut FunctionCx<'_, '_, 'tcx>, start_
        // not mutated by the current function, this is necessary to support unsized arguments.
        if let ArgKind::Normal(Some(val)) = arg_kind {
            if let Some((addr, meta)) = val.try_to_ptr() {
-                let local_decl = &fx.mir.local_decls[local];
-                //                       v this ! is important
-                let internally_mutable = !val
-                    .layout()
-                    .ty
-                    .is_freeze(fx.tcx.at(local_decl.source_info.span), ParamEnv::reveal_all());
-                if local_decl.mutability == mir::Mutability::Not && !internally_mutable {
-                    // We wont mutate this argument, so it is fine to borrow the backing storage
-                    // of this argument, to prevent a copy.
+                // Ownership of the value at the backing storage for an argument is passed to the
+                // callee per the ABI, so it is fine to borrow the backing storage of this argument
+                // to prevent a copy.

-                    let place = if let Some(meta) = meta {
-                        CPlace::for_ptr_with_extra(addr, meta, val.layout())
-                    } else {
-                        CPlace::for_ptr(addr, val.layout())
-                    };
+                let place = if let Some(meta) = meta {
+                    CPlace::for_ptr_with_extra(addr, meta, val.layout())
+                } else {
+                    CPlace::for_ptr(addr, val.layout())
+                };

-                    self::comments::add_local_place_comments(fx, place, local);
+                self::comments::add_local_place_comments(fx, place, local);

-                    assert_eq!(fx.local_map.push(place), local);
-                    continue;
-                }
+                assert_eq!(fx.local_map.push(place), local);
+                continue;
            }
        }

@ -292,6 +284,22 @@ pub(crate) fn codegen_fn_prelude<'tcx>(fx: &mut FunctionCx<'_, '_, 'tcx>, start_
    fx.bcx.ins().jump(*fx.block_map.get(START_BLOCK).unwrap(), &[]);
 }

+struct CallArgument<'tcx> {
+    value: CValue<'tcx>,
+    is_owned: bool,
+}
+
+// FIXME avoid intermediate `CValue` before calling `adjust_arg_for_abi`
+fn codegen_call_argument_operand<'tcx>(
+    fx: &mut FunctionCx<'_, '_, 'tcx>,
+    operand: &Operand<'tcx>,
+) -> CallArgument<'tcx> {
+    CallArgument {
+        value: codegen_operand(fx, operand),
+        is_owned: matches!(operand, Operand::Move(_)),
+    }
+}
+
 pub(crate) fn codegen_terminator_call<'tcx>(
    fx: &mut FunctionCx<'_, '_, 'tcx>,
    span: Span,
@ -360,12 +368,12 @@ pub(crate) fn codegen_terminator_call<'tcx>(
    }

    // Unpack arguments tuple for closures
-    let args = if fn_sig.abi == Abi::RustCall {
+    let mut args = if fn_sig.abi == Abi::RustCall {
        assert_eq!(args.len(), 2, "rust-call abi requires two arguments");
-        let self_arg = codegen_operand(fx, &args[0]);
-        let pack_arg = codegen_operand(fx, &args[1]);
+        let self_arg = codegen_call_argument_operand(fx, &args[0]);
+        let pack_arg = codegen_call_argument_operand(fx, &args[1]);

-        let tupled_arguments = match pack_arg.layout().ty.kind() {
+        let tupled_arguments = match pack_arg.value.layout().ty.kind() {
            ty::Tuple(ref tupled_arguments) => tupled_arguments,
            _ => bug!("argument to function with \"rust-call\" ABI is not a tuple"),
        };
@ -373,37 +381,53 @@ pub(crate) fn codegen_terminator_call<'tcx>(
        let mut args = Vec::with_capacity(1 + tupled_arguments.len());
        args.push(self_arg);
        for i in 0..tupled_arguments.len() {
-            args.push(pack_arg.value_field(fx, mir::Field::new(i)));
+            args.push(CallArgument {
+                value: pack_arg.value.value_field(fx, mir::Field::new(i)),
+                is_owned: pack_arg.is_owned,
+            });
        }
        args
    } else {
-        args.iter().map(|arg| codegen_operand(fx, arg)).collect::<Vec<_>>()
+        args.iter().map(|arg| codegen_call_argument_operand(fx, arg)).collect::<Vec<_>>()
    };

-    //   | indirect call target
-    //   |         | the first argument to be passed
-    //   v         v
-    let (func_ref, first_arg) = match instance {
+    // Pass the caller location for `#[track_caller]`.
+    if instance.map(|inst| inst.def.requires_caller_location(fx.tcx)).unwrap_or(false) {
+        let caller_location = fx.get_caller_location(span);
+        args.push(CallArgument { value: caller_location, is_owned: false });
+    }
+
+    let args = args;
+    assert_eq!(fn_abi.args.len(), args.len());
+
+    enum CallTarget {
+        Direct(FuncRef),
+        Indirect(SigRef, Value),
+    }
+
+    let (func_ref, first_arg_override) = match instance {
        // Trait object call
        Some(Instance { def: InstanceDef::Virtual(_, idx), .. }) => {
            if fx.clif_comments.enabled() {
                let nop_inst = fx.bcx.ins().nop();
                fx.add_comment(
                    nop_inst,
-                    format!("virtual call; self arg pass mode: {:?}", &fn_abi.args[0],),
+                    format!("virtual call; self arg pass mode: {:?}", &fn_abi.args[0]),
                );
            }
-            let (ptr, method) = crate::vtable::get_ptr_and_method_ref(fx, args[0], idx);
-            (Some(method), smallvec![ptr])
+
+            let (ptr, method) = crate::vtable::get_ptr_and_method_ref(fx, args[0].value, idx);
+            let sig = clif_sig_from_fn_abi(fx.tcx, fx.triple(), &fn_abi);
+            let sig = fx.bcx.import_signature(sig);
+
+            (CallTarget::Indirect(sig, method), Some(ptr))
        }

        // Normal call
-        Some(_) => (
-            None,
-            args.get(0)
-                .map(|arg| adjust_arg_for_abi(fx, *arg, &fn_abi.args[0]))
-                .unwrap_or(smallvec![]),
-        ),
+        Some(instance) => {
+            let func_ref = fx.get_function_ref(instance);
+            (CallTarget::Direct(func_ref), None)
+        }

        // Indirect call
        None => {
@ -411,80 +435,64 @@ pub(crate) fn codegen_terminator_call<'tcx>(
                let nop_inst = fx.bcx.ins().nop();
                fx.add_comment(nop_inst, "indirect call");
            }
+
            let func = codegen_operand(fx, func).load_scalar(fx);
-            (
-                Some(func),
-                args.get(0)
-                    .map(|arg| adjust_arg_for_abi(fx, *arg, &fn_abi.args[0]))
-                    .unwrap_or(smallvec![]),
-            )
+            let sig = clif_sig_from_fn_abi(fx.tcx, fx.triple(), &fn_abi);
+            let sig = fx.bcx.import_signature(sig);
+
+            (CallTarget::Indirect(sig, func), None)
        }
    };

    let ret_place = destination.map(|(place, _)| place);
-    let (call_inst, call_args) = self::returning::codegen_with_call_return_arg(
-        fx,
-        &fn_abi.ret,
-        ret_place,
-        |fx, return_ptr| {
-            let regular_args_count = args.len();
-            let mut call_args: Vec<Value> = return_ptr
-                .into_iter()
-                .chain(first_arg.into_iter())
-                .chain(
-                    args.into_iter()
-                        .enumerate()
-                        .skip(1)
-                        .map(|(i, arg)| adjust_arg_for_abi(fx, arg, &fn_abi.args[i]).into_iter())
-                        .flatten(),
-                )
-                .collect::<Vec<_>>();
-
-            if instance.map(|inst| inst.def.requires_caller_location(fx.tcx)).unwrap_or(false) {
-                // Pass the caller location for `#[track_caller]`.
-                let caller_location = fx.get_caller_location(span);
-                call_args.extend(
-                    adjust_arg_for_abi(fx, caller_location, &fn_abi.args[regular_args_count])
-                        .into_iter(),
-                );
-                assert_eq!(fn_abi.args.len(), regular_args_count + 1);
-            } else {
-                assert_eq!(fn_abi.args.len(), regular_args_count);
-            }
-
-            let call_inst = if let Some(func_ref) = func_ref {
-                let sig = clif_sig_from_fn_abi(fx.tcx, fx.triple(), &fn_abi);
-                let sig = fx.bcx.import_signature(sig);
-                fx.bcx.ins().call_indirect(sig, func_ref, &call_args)
-            } else {
-                let func_ref =
-                    fx.get_function_ref(instance.expect("non-indirect call on non-FnDef type"));
-                fx.bcx.ins().call(func_ref, &call_args)
-            };
-
-            (call_inst, call_args)
-        },
-    );
-
-    // FIXME find a cleaner way to support varargs
-    if fn_sig.c_variadic {
-        if !matches!(fn_sig.abi, Abi::C { .. }) {
-            fx.tcx.sess.span_fatal(span, &format!("Variadic call for non-C abi {:?}", fn_sig.abi));
-        }
-        let sig_ref = fx.bcx.func.dfg.call_signature(call_inst).unwrap();
-        let abi_params = call_args
+    self::returning::codegen_with_call_return_arg(fx, &fn_abi.ret, ret_place, |fx, return_ptr| {
+        let call_args = return_ptr
            .into_iter()
-            .map(|arg| {
-                let ty = fx.bcx.func.dfg.value_type(arg);
-                if !ty.is_int() {
-                    // FIXME set %al to upperbound on float args once floats are supported
-                    fx.tcx.sess.span_fatal(span, &format!("Non int ty {:?} for variadic call", ty));
-                }
-                AbiParam::new(ty)
-            })
-            .collect::<Vec<AbiParam>>();
-        fx.bcx.func.dfg.signatures[sig_ref].params = abi_params;
-    }
+            .chain(first_arg_override.into_iter())
+            .chain(
+                args.into_iter()
+                    .enumerate()
+                    .skip(if first_arg_override.is_some() { 1 } else { 0 })
+                    .map(|(i, arg)| {
+                        adjust_arg_for_abi(fx, arg.value, &fn_abi.args[i], arg.is_owned).into_iter()
+                    })
+                    .flatten(),
+            )
+            .collect::<Vec<Value>>();
+
+        let call_inst = match func_ref {
+            CallTarget::Direct(func_ref) => fx.bcx.ins().call(func_ref, &call_args),
+            CallTarget::Indirect(sig, func_ptr) => {
+                fx.bcx.ins().call_indirect(sig, func_ptr, &call_args)
+            }
+        };
+
+        // FIXME find a cleaner way to support varargs
+        if fn_sig.c_variadic {
+            if !matches!(fn_sig.abi, Abi::C { .. }) {
+                fx.tcx
+                    .sess
+                    .span_fatal(span, &format!("Variadic call for non-C abi {:?}", fn_sig.abi));
+            }
+            let sig_ref = fx.bcx.func.dfg.call_signature(call_inst).unwrap();
+            let abi_params = call_args
+                .into_iter()
+                .map(|arg| {
+                    let ty = fx.bcx.func.dfg.value_type(arg);
+                    if !ty.is_int() {
+                        // FIXME set %al to upperbound on float args once floats are supported
+                        fx.tcx
+                            .sess
+                            .span_fatal(span, &format!("Non int ty {:?} for variadic call", ty));
+                    }
+                    AbiParam::new(ty)
+                })
+                .collect::<Vec<AbiParam>>();
+            fx.bcx.func.dfg.signatures[sig_ref].params = abi_params;
+        }
+
+        call_inst
+    });

    if let Some((_, dest)) = destination {
        let ret_block = fx.get_block(dest);
@ -535,7 +543,7 @@ pub(crate) fn codegen_drop<'tcx>(
                        TypeAndMut { ty, mutbl: crate::rustc_hir::Mutability::Mut },
                    )),
                );
-                let arg_value = adjust_arg_for_abi(fx, arg_value, &fn_abi.args[0]);
+                let arg_value = adjust_arg_for_abi(fx, arg_value, &fn_abi.args[0], true);

                let mut call_args: Vec<Value> = arg_value.into_iter().collect::<Vec<_>>();

@ -543,7 +551,7 @@ pub(crate) fn codegen_drop<'tcx>(
                    // Pass the caller location for `#[track_caller]`.
                    let caller_location = fx.get_caller_location(span);
                    call_args.extend(
-                        adjust_arg_for_abi(fx, caller_location, &fn_abi.args[1]).into_iter(),
+                        adjust_arg_for_abi(fx, caller_location, &fn_abi.args[1], false).into_iter(),
                    );
                }

--- a/compiler/rustc_codegen_cranelift/src/abi/pass_mode.rs
+++ b/compiler/rustc_codegen_cranelift/src/abi/pass_mode.rs
@ -227,6 +227,7 @@ pub(super) fn adjust_arg_for_abi<'tcx>(
    fx: &mut FunctionCx<'_, '_, 'tcx>,
    arg: CValue<'tcx>,
    arg_abi: &ArgAbi<'tcx, Ty<'tcx>>,
+    is_owned: bool,
 ) -> SmallVec<[Value; 2]> {
    assert_assignable(fx, arg.layout().ty, arg_abi.layout.ty);
    match arg_abi.mode {
@ -237,10 +238,21 @@ pub(super) fn adjust_arg_for_abi<'tcx>(
            smallvec![a, b]
        }
        PassMode::Cast(cast) => to_casted_value(fx, arg, cast),
-        PassMode::Indirect { .. } => match arg.force_stack(fx) {
-            (ptr, None) => smallvec![ptr.get_addr(fx)],
-            (ptr, Some(meta)) => smallvec![ptr.get_addr(fx), meta],
-        },
+        PassMode::Indirect { .. } => {
+            if is_owned {
+                match arg.force_stack(fx) {
+                    (ptr, None) => smallvec![ptr.get_addr(fx)],
+                    (ptr, Some(meta)) => smallvec![ptr.get_addr(fx), meta],
+                }
+            } else {
+                // Ownership of the value at the backing storage for an argument is passed to the
+                // callee per the ABI, so we must make a copy of the argument unless the argument
+                // local is moved.
+                let place = CPlace::new_stack_slot(fx, arg.layout());
+                place.write_cvalue(fx, arg);
+                smallvec![place.to_ptr().get_addr(fx)]
+            }
+        }
    }
 }

--- a/compiler/rustc_codegen_cranelift/src/abi/returning.rs
+++ b/compiler/rustc_codegen_cranelift/src/abi/returning.rs
@ -2,54 +2,9 @@

 use crate::prelude::*;

-use rustc_middle::ty::layout::FnAbiExt;
-use rustc_target::abi::call::{ArgAbi, FnAbi, PassMode};
+use rustc_target::abi::call::{ArgAbi, PassMode};
 use smallvec::{smallvec, SmallVec};

-/// Can the given type be returned into an ssa var or does it need to be returned on the stack.
-pub(crate) fn can_return_to_ssa_var<'tcx>(
-    fx: &FunctionCx<'_, '_, 'tcx>,
-    func: &mir::Operand<'tcx>,
-    args: &[mir::Operand<'tcx>],
-) -> bool {
-    let fn_ty = fx.monomorphize(func.ty(fx.mir, fx.tcx));
-    let fn_sig =
-        fx.tcx.normalize_erasing_late_bound_regions(ParamEnv::reveal_all(), fn_ty.fn_sig(fx.tcx));
-
-    // Handle special calls like instrinsics and empty drop glue.
-    let instance = if let ty::FnDef(def_id, substs) = *fn_ty.kind() {
-        let instance = ty::Instance::resolve(fx.tcx, ty::ParamEnv::reveal_all(), def_id, substs)
-            .unwrap()
-            .unwrap()
-            .polymorphize(fx.tcx);
-
-        match instance.def {
-            InstanceDef::Intrinsic(_) | InstanceDef::DropGlue(_, _) => {
-                return true;
-            }
-            _ => Some(instance),
-        }
-    } else {
-        None
-    };
-
-    let extra_args = &args[fn_sig.inputs().len()..];
-    let extra_args = extra_args
-        .iter()
-        .map(|op_arg| fx.monomorphize(op_arg.ty(fx.mir, fx.tcx)))
-        .collect::<Vec<_>>();
-    let fn_abi = if let Some(instance) = instance {
-        FnAbi::of_instance(&RevealAllLayoutCx(fx.tcx), instance, &extra_args)
-    } else {
-        FnAbi::of_fn_ptr(&RevealAllLayoutCx(fx.tcx), fn_ty.fn_sig(fx.tcx), &extra_args)
-    };
-    match fn_abi.ret.mode {
-        PassMode::Ignore | PassMode::Direct(_) | PassMode::Pair(_, _) => true,
-        // FIXME Make it possible to return Cast and Indirect to an ssa var.
-        PassMode::Cast(_) | PassMode::Indirect { .. } => false,
-    }
-}
-
 /// Return a place where the return value of the current function can be written to. If necessary
 /// this adds an extra parameter pointing to where the return value needs to be stored.
 pub(super) fn codegen_return_param<'tcx>(
@ -58,8 +13,7 @@ pub(super) fn codegen_return_param<'tcx>(
    block_params_iter: &mut impl Iterator<Item = Value>,
 ) -> CPlace<'tcx> {
    let (ret_place, ret_param): (_, SmallVec<[_; 2]>) = match fx.fn_abi.as_ref().unwrap().ret.mode {
-        PassMode::Ignore => (CPlace::no_place(fx.fn_abi.as_ref().unwrap().ret.layout), smallvec![]),
-        PassMode::Direct(_) | PassMode::Pair(_, _) | PassMode::Cast(_) => {
+        PassMode::Ignore | PassMode::Direct(_) | PassMode::Pair(_, _) | PassMode::Cast(_) => {
            let is_ssa = ssa_analyzed[RETURN_PLACE] == crate::analyze::SsaKind::Ssa;
            (
                super::make_local_place(
@ -73,7 +27,7 @@ pub(super) fn codegen_return_param<'tcx>(
        }
        PassMode::Indirect { attrs: _, extra_attrs: None, on_stack: _ } => {
            let ret_param = block_params_iter.next().unwrap();
-            assert_eq!(fx.bcx.func.dfg.value_type(ret_param), pointer_ty(fx.tcx));
+            assert_eq!(fx.bcx.func.dfg.value_type(ret_param), fx.pointer_type);
            (
                CPlace::for_ptr(Pointer::new(ret_param), fx.fn_abi.as_ref().unwrap().ret.layout),
                smallvec![ret_param],
@ -99,25 +53,33 @@ pub(super) fn codegen_return_param<'tcx>(

 /// Invokes the closure with if necessary a value representing the return pointer. When the closure
 /// returns the call return value(s) if any are written to the correct place.
-pub(super) fn codegen_with_call_return_arg<'tcx, T>(
+pub(super) fn codegen_with_call_return_arg<'tcx>(
    fx: &mut FunctionCx<'_, '_, 'tcx>,
    ret_arg_abi: &ArgAbi<'tcx, Ty<'tcx>>,
    ret_place: Option<CPlace<'tcx>>,
-    f: impl FnOnce(&mut FunctionCx<'_, '_, 'tcx>, Option<Value>) -> (Inst, T),
-) -> (Inst, T) {
-    let return_ptr = match ret_arg_abi.mode {
-        PassMode::Ignore => None,
+    f: impl FnOnce(&mut FunctionCx<'_, '_, 'tcx>, Option<Value>) -> Inst,
+) {
+    let (ret_temp_place, return_ptr) = match ret_arg_abi.mode {
+        PassMode::Ignore => (None, None),
        PassMode::Indirect { attrs: _, extra_attrs: None, on_stack: _ } => match ret_place {
-            Some(ret_place) => Some(ret_place.to_ptr().get_addr(fx)),
-            None => Some(fx.bcx.ins().iconst(fx.pointer_type, 43)), // FIXME allocate temp stack slot
+            Some(ret_place) if matches!(ret_place.inner(), CPlaceInner::Addr(_, None)) => {
+                // This is an optimization to prevent unnecessary copies of the return value when
+                // the return place is already a memory place as opposed to a register.
+                // This match arm can be safely removed.
+                (None, Some(ret_place.to_ptr().get_addr(fx)))
+            }
+            _ => {
+                let place = CPlace::new_stack_slot(fx, ret_arg_abi.layout);
+                (Some(place), Some(place.to_ptr().get_addr(fx)))
+            }
        },
        PassMode::Indirect { attrs: _, extra_attrs: Some(_), on_stack: _ } => {
            unreachable!("unsized return value")
        }
-        PassMode::Direct(_) | PassMode::Pair(_, _) | PassMode::Cast(_) => None,
+        PassMode::Direct(_) | PassMode::Pair(_, _) | PassMode::Cast(_) => (None, None),
    };

-    let (call_inst, meta) = f(fx, return_ptr);
+    let call_inst = f(fx, return_ptr);

    match ret_arg_abi.mode {
        PassMode::Ignore => {}
@ -150,13 +112,19 @@ pub(super) fn codegen_with_call_return_arg<'tcx, T>(
                ret_place.write_cvalue(fx, result);
            }
        }
-        PassMode::Indirect { attrs: _, extra_attrs: None, on_stack: _ } => {}
+        PassMode::Indirect { attrs: _, extra_attrs: None, on_stack: _ } => {
+            if let (Some(ret_place), Some(ret_temp_place)) = (ret_place, ret_temp_place) {
+                // Both ret_place and ret_temp_place must be Some. If ret_place is None, this is
+                // a non-returning call. If ret_temp_place is None, it is not necessary to copy the
+                // return value.
+                let ret_temp_value = ret_temp_place.to_cvalue(fx);
+                ret_place.write_cvalue(fx, ret_temp_value);
+            }
+        }
        PassMode::Indirect { attrs: _, extra_attrs: Some(_), on_stack: _ } => {
            unreachable!("unsized return value")
        }
    }
-
-    (call_inst, meta)
 }

 /// Codegen a return instruction with the right return value(s) if any.
--- a/compiler/rustc_codegen_cranelift/src/allocator.rs
+++ b/compiler/rustc_codegen_cranelift/src/allocator.rs
@ -5,7 +5,6 @@ use crate::prelude::*;

 use cranelift_codegen::binemit::{NullStackMapSink, NullTrapSink};
 use rustc_ast::expand::allocator::{AllocatorKind, AllocatorTy, ALLOCATOR_METHODS};
-use rustc_span::symbol::sym;

 /// Returns whether an allocator shim was created
 pub(crate) fn codegen(
@ -20,7 +19,7 @@ pub(crate) fn codegen(
    if any_dynamic_crate {
        false
    } else if let Some(kind) = tcx.allocator_kind(()) {
-        codegen_inner(module, unwind_context, kind);
+        codegen_inner(module, unwind_context, kind, tcx.lang_items().oom().is_some());
        true
    } else {
        false
@ -31,6 +30,7 @@ fn codegen_inner(
    module: &mut impl Module,
    unwind_context: &mut UnwindContext,
    kind: AllocatorKind,
+    has_alloc_error_handler: bool,
 ) {
    let usize_ty = module.target_config().pointer_type();

@ -65,7 +65,6 @@ fn codegen_inner(

        let caller_name = format!("__rust_{}", method.name);
        let callee_name = kind.fn_name(method.name);
-        //eprintln!("Codegen allocator shim {} -> {} ({:?} -> {:?})", caller_name, callee_name, sig.params, sig.returns);

        let func_id = module.declare_function(&caller_name, Linkage::Export, &sig).unwrap();

@ -104,13 +103,12 @@ fn codegen_inner(
        returns: vec![],
    };

-    let callee_name = kind.fn_name(sym::oom);
-    //eprintln!("Codegen allocator shim {} -> {} ({:?} -> {:?})", caller_name, callee_name, sig.params, sig.returns);
+    let callee_name = if has_alloc_error_handler { "__rg_oom" } else { "__rdl_oom" };

    let func_id =
        module.declare_function("__rust_alloc_error_handler", Linkage::Export, &sig).unwrap();

-    let callee_func_id = module.declare_function(&callee_name, Linkage::Import, &sig).unwrap();
+    let callee_func_id = module.declare_function(callee_name, Linkage::Import, &sig).unwrap();

    let mut ctx = Context::new();
    ctx.func = Function::with_name_signature(ExternalName::user(0, 0), sig);
--- a/compiler/rustc_codegen_cranelift/src/analyze.rs
+++ b/compiler/rustc_codegen_cranelift/src/analyze.rs
@ -38,17 +38,6 @@ pub(crate) fn analyze(fx: &FunctionCx<'_, '_, '_>) -> IndexVec<Local, SsaKind> {
                _ => {}
            }
        }
-
-        match &bb.terminator().kind {
-            TerminatorKind::Call { destination, func, args, .. } => {
-                if let Some((dest_place, _dest_bb)) = destination {
-                    if !crate::abi::can_return_to_ssa_var(fx, func, args) {
-                        not_ssa(&mut flag_map, dest_place.local)
-                    }
-                }
-            }
-            _ => {}
-        }
    }

    flag_map
--- a/compiler/rustc_codegen_cranelift/src/base.rs
+++ b/compiler/rustc_codegen_cranelift/src/base.rs
@ -334,8 +334,6 @@ fn codegen_fn_content(fx: &mut FunctionCx<'_, '_, '_>) {
                        crate::optimize::peephole::maybe_unwrap_bool_not(&mut fx.bcx, discr);
                    let test_zero = if is_inverted { !test_zero } else { test_zero };
                    let discr = crate::optimize::peephole::maybe_unwrap_bint(&mut fx.bcx, discr);
-                    let discr =
-                        crate::optimize::peephole::make_branchable_value(&mut fx.bcx, discr);
                    if let Some(taken) = crate::optimize::peephole::maybe_known_branch_taken(
                        &fx.bcx, discr, test_zero,
                    ) {
--- a/compiler/rustc_codegen_cranelift/src/cast.rs
+++ b/compiler/rustc_codegen_cranelift/src/cast.rs
@ -14,21 +14,6 @@ pub(crate) fn clif_intcast(
        (_, _) if from == to => val,

        // extend
-        (_, types::I128) => {
-            let lo = if from == types::I64 {
-                val
-            } else if signed {
-                fx.bcx.ins().sextend(types::I64, val)
-            } else {
-                fx.bcx.ins().uextend(types::I64, val)
-            };
-            let hi = if signed {
-                fx.bcx.ins().sshr_imm(lo, 63)
-            } else {
-                fx.bcx.ins().iconst(types::I64, 0)
-            };
-            fx.bcx.ins().iconcat(lo, hi)
-        }
        (_, _) if to.wider_or_equal(from) => {
            if signed {
                fx.bcx.ins().sextend(to, val)
@ -38,10 +23,6 @@ pub(crate) fn clif_intcast(
        }

        // reduce
-        (types::I128, _) => {
-            let (lsb, _msb) = fx.bcx.ins().isplit(val);
-            if to == types::I64 { lsb } else { fx.bcx.ins().ireduce(to, lsb) }
-        }
        (_, _) => fx.bcx.ins().ireduce(to, val),
    }
 }
--- a/compiler/rustc_codegen_cranelift/src/codegen_i128.rs
+++ b/compiler/rustc_codegen_cranelift/src/codegen_i128.rs
@ -19,9 +19,6 @@ pub(crate) fn maybe_codegen<'tcx>(
        return None;
    }

-    let lhs_val = lhs.load_scalar(fx);
-    let rhs_val = rhs.load_scalar(fx);
-
    let is_signed = type_sign(lhs.layout().ty);

    match bin_op {
@ -30,29 +27,53 @@ pub(crate) fn maybe_codegen<'tcx>(
            None
        }
        BinOp::Add | BinOp::Sub if !checked => None,
-        BinOp::Mul if !checked => {
-            let val_ty = if is_signed { fx.tcx.types.i128 } else { fx.tcx.types.u128 };
-            if fx.tcx.sess.target.is_like_windows {
-                let ret_place = CPlace::new_stack_slot(fx, lhs.layout());
-                let (lhs_ptr, lhs_extra) = lhs.force_stack(fx);
-                let (rhs_ptr, rhs_extra) = rhs.force_stack(fx);
-                assert!(lhs_extra.is_none());
-                assert!(rhs_extra.is_none());
-                let args =
-                    [ret_place.to_ptr().get_addr(fx), lhs_ptr.get_addr(fx), rhs_ptr.get_addr(fx)];
-                fx.lib_call(
-                    "__multi3",
-                    vec![
-                        AbiParam::special(pointer_ty(fx.tcx), ArgumentPurpose::StructReturn),
-                        AbiParam::new(pointer_ty(fx.tcx)),
-                        AbiParam::new(pointer_ty(fx.tcx)),
-                    ],
-                    vec![],
-                    &args,
-                );
-                Some(ret_place.to_cvalue(fx))
+        BinOp::Mul if !checked || is_signed => {
+            if !checked {
+                let val_ty = if is_signed { fx.tcx.types.i128 } else { fx.tcx.types.u128 };
+                if fx.tcx.sess.target.is_like_windows {
+                    let ret_place = CPlace::new_stack_slot(fx, lhs.layout());
+                    let (lhs_ptr, lhs_extra) = lhs.force_stack(fx);
+                    let (rhs_ptr, rhs_extra) = rhs.force_stack(fx);
+                    assert!(lhs_extra.is_none());
+                    assert!(rhs_extra.is_none());
+                    let args = [
+                        ret_place.to_ptr().get_addr(fx),
+                        lhs_ptr.get_addr(fx),
+                        rhs_ptr.get_addr(fx),
+                    ];
+                    fx.lib_call(
+                        "__multi3",
+                        vec![
+                            AbiParam::special(fx.pointer_type, ArgumentPurpose::StructReturn),
+                            AbiParam::new(fx.pointer_type),
+                            AbiParam::new(fx.pointer_type),
+                        ],
+                        vec![],
+                        &args,
+                    );
+                    Some(ret_place.to_cvalue(fx))
+                } else {
+                    Some(fx.easy_call("__multi3", &[lhs, rhs], val_ty))
+                }
            } else {
-                Some(fx.easy_call("__multi3", &[lhs, rhs], val_ty))
+                let out_ty = fx.tcx.mk_tup([lhs.layout().ty, fx.tcx.types.bool].iter());
+                let oflow = CPlace::new_stack_slot(fx, fx.layout_of(fx.tcx.types.i32));
+                let lhs = lhs.load_scalar(fx);
+                let rhs = rhs.load_scalar(fx);
+                let oflow_ptr = oflow.to_ptr().get_addr(fx);
+                let res = fx.lib_call(
+                    "__muloti4",
+                    vec![
+                        AbiParam::new(types::I128),
+                        AbiParam::new(types::I128),
+                        AbiParam::new(fx.pointer_type),
+                    ],
+                    vec![AbiParam::new(types::I128)],
+                    &[lhs, rhs, oflow_ptr],
+                )[0];
+                let oflow = oflow.to_cvalue(fx).load_scalar(fx);
+                let oflow = fx.bcx.ins().ireduce(types::I8, oflow);
+                Some(CValue::by_val_pair(res, oflow, fx.layout_of(out_ty)))
            }
        }
        BinOp::Add | BinOp::Sub | BinOp::Mul => {
@ -66,16 +87,16 @@ pub(crate) fn maybe_codegen<'tcx>(
                assert!(rhs_extra.is_none());
                (
                    vec![
-                        AbiParam::special(pointer_ty(fx.tcx), ArgumentPurpose::StructReturn),
-                        AbiParam::new(pointer_ty(fx.tcx)),
-                        AbiParam::new(pointer_ty(fx.tcx)),
+                        AbiParam::special(fx.pointer_type, ArgumentPurpose::StructReturn),
+                        AbiParam::new(fx.pointer_type),
+                        AbiParam::new(fx.pointer_type),
                    ],
                    [out_place.to_ptr().get_addr(fx), lhs_ptr.get_addr(fx), rhs_ptr.get_addr(fx)],
                )
            } else {
                (
                    vec![
-                        AbiParam::special(pointer_ty(fx.tcx), ArgumentPurpose::StructReturn),
+                        AbiParam::special(fx.pointer_type, ArgumentPurpose::StructReturn),
                        AbiParam::new(types::I128),
                        AbiParam::new(types::I128),
                    ],
@ -88,7 +109,6 @@ pub(crate) fn maybe_codegen<'tcx>(
                (BinOp::Sub, false) => "__rust_u128_subo",
                (BinOp::Sub, true) => "__rust_i128_subo",
                (BinOp::Mul, false) => "__rust_u128_mulo",
-                (BinOp::Mul, true) => "__rust_i128_mulo",
                _ => unreachable!(),
            };
            fx.lib_call(name, param_types, vec![], &args);
@ -112,7 +132,7 @@ pub(crate) fn maybe_codegen<'tcx>(
                let args = [lhs_ptr.get_addr(fx), rhs_ptr.get_addr(fx)];
                let ret = fx.lib_call(
                    name,
-                    vec![AbiParam::new(pointer_ty(fx.tcx)), AbiParam::new(pointer_ty(fx.tcx))],
+                    vec![AbiParam::new(fx.pointer_type), AbiParam::new(fx.pointer_type)],
                    vec![AbiParam::new(types::I64X2)],
                    &args,
                )[0];
@ -128,40 +148,6 @@ pub(crate) fn maybe_codegen<'tcx>(
            assert!(!checked);
            None
        }
-        BinOp::Shl | BinOp::Shr => {
-            let is_overflow = if checked {
-                // rhs >= 128
-
-                // FIXME support non 128bit rhs
-                /*let (rhs_lsb, rhs_msb) = fx.bcx.ins().isplit(rhs_val);
-                let rhs_msb_gt_0 = fx.bcx.ins().icmp_imm(IntCC::NotEqual, rhs_msb, 0);
-                let rhs_lsb_ge_128 = fx.bcx.ins().icmp_imm(IntCC::SignedGreaterThan, rhs_lsb, 127);
-                let is_overflow = fx.bcx.ins().bor(rhs_msb_gt_0, rhs_lsb_ge_128);*/
-                let is_overflow = fx.bcx.ins().bconst(types::B1, false);
-
-                Some(fx.bcx.ins().bint(types::I8, is_overflow))
-            } else {
-                None
-            };
-
-            let truncated_rhs = clif_intcast(fx, rhs_val, types::I32, false);
-            let val = match bin_op {
-                BinOp::Shl => fx.bcx.ins().ishl(lhs_val, truncated_rhs),
-                BinOp::Shr => {
-                    if is_signed {
-                        fx.bcx.ins().sshr(lhs_val, truncated_rhs)
-                    } else {
-                        fx.bcx.ins().ushr(lhs_val, truncated_rhs)
-                    }
-                }
-                _ => unreachable!(),
-            };
-            if let Some(is_overflow) = is_overflow {
-                let out_ty = fx.tcx.mk_tup([lhs.layout().ty, fx.tcx.types.bool].iter());
-                Some(CValue::by_val_pair(val, is_overflow, fx.layout_of(out_ty)))
-            } else {
-                Some(CValue::by_val(val, lhs.layout()))
-            }
-        }
+        BinOp::Shl | BinOp::Shr => None,
    }
 }
--- a/compiler/rustc_codegen_cranelift/src/debuginfo/emit.rs
+++ b/compiler/rustc_codegen_cranelift/src/debuginfo/emit.rs
@ -160,7 +160,7 @@ impl Writer for WriterRelocate {
                let val = match eh_pe.application() {
                    gimli::DW_EH_PE_absptr => val,
                    gimli::DW_EH_PE_pcrel => {
-                        // TODO: better handling of sign
+                        // FIXME better handling of sign
                        let offset = self.len() as u64;
                        offset.wrapping_sub(val)
                    }
--- a/compiler/rustc_codegen_cranelift/src/debuginfo/mod.rs
+++ b/compiler/rustc_codegen_cranelift/src/debuginfo/mod.rs
@ -46,7 +46,7 @@ impl<'tcx> DebugContext<'tcx> {
    pub(crate) fn new(tcx: TyCtxt<'tcx>, isa: &dyn TargetIsa) -> Self {
        let encoding = Encoding {
            format: Format::Dwarf32,
-            // TODO: this should be configurable
+            // FIXME this should be configurable
            // macOS doesn't seem to support DWARF > 3
            // 5 version is required for md5 file hash
            version: if tcx.sess.target.is_like_osx {
--- a/compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs
+++ b/compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs
@ -175,12 +175,11 @@ fn simd_for_each_lane<'tcx>(
    assert_eq!(lane_count, ret_lane_count);

    for lane_idx in 0..lane_count {
-        let lane_idx = mir::Field::new(lane_idx.try_into().unwrap());
-        let lane = val.value_field(fx, lane_idx).load_scalar(fx);
+        let lane = val.value_lane(fx, lane_idx).load_scalar(fx);

        let res_lane = f(fx, lane_layout, ret_lane_layout, lane);

-        ret.place_field(fx, lane_idx).write_cvalue(fx, res_lane);
+        ret.place_lane(fx, lane_idx).write_cvalue(fx, res_lane);
    }
 }

@ -206,20 +205,20 @@ fn simd_pair_for_each_lane<'tcx>(
    let ret_lane_layout = fx.layout_of(ret_lane_ty);
    assert_eq!(lane_count, ret_lane_count);

-    for lane in 0..lane_count {
-        let lane = mir::Field::new(lane.try_into().unwrap());
-        let x_lane = x.value_field(fx, lane).load_scalar(fx);
-        let y_lane = y.value_field(fx, lane).load_scalar(fx);
+    for lane_idx in 0..lane_count {
+        let x_lane = x.value_lane(fx, lane_idx).load_scalar(fx);
+        let y_lane = y.value_lane(fx, lane_idx).load_scalar(fx);

        let res_lane = f(fx, lane_layout, ret_lane_layout, x_lane, y_lane);

-        ret.place_field(fx, lane).write_cvalue(fx, res_lane);
+        ret.place_lane(fx, lane_idx).write_cvalue(fx, res_lane);
    }
 }

 fn simd_reduce<'tcx>(
    fx: &mut FunctionCx<'_, '_, 'tcx>,
    val: CValue<'tcx>,
+    acc: Option<Value>,
    ret: CPlace<'tcx>,
    f: impl Fn(&mut FunctionCx<'_, '_, 'tcx>, TyAndLayout<'tcx>, Value, Value) -> Value,
 ) {
@ -227,16 +226,17 @@ fn simd_reduce<'tcx>(
    let lane_layout = fx.layout_of(lane_ty);
    assert_eq!(lane_layout, ret.layout());

-    let mut res_val = val.value_field(fx, mir::Field::new(0)).load_scalar(fx);
-    for lane_idx in 1..lane_count {
-        let lane =
-            val.value_field(fx, mir::Field::new(lane_idx.try_into().unwrap())).load_scalar(fx);
+    let (mut res_val, start_lane) =
+        if let Some(acc) = acc { (acc, 0) } else { (val.value_lane(fx, 0).load_scalar(fx), 1) };
+    for lane_idx in start_lane..lane_count {
+        let lane = val.value_lane(fx, lane_idx).load_scalar(fx);
        res_val = f(fx, lane_layout, res_val, lane);
    }
    let res = CValue::by_val(res_val, lane_layout);
    ret.write_cvalue(fx, res);
 }

+// FIXME move all uses to `simd_reduce`
 fn simd_reduce_bool<'tcx>(
    fx: &mut FunctionCx<'_, '_, 'tcx>,
    val: CValue<'tcx>,
@ -246,14 +246,18 @@ fn simd_reduce_bool<'tcx>(
    let (lane_count, _lane_ty) = val.layout().ty.simd_size_and_type(fx.tcx);
    assert!(ret.layout().ty.is_bool());

-    let res_val = val.value_field(fx, mir::Field::new(0)).load_scalar(fx);
+    let res_val = val.value_lane(fx, 0).load_scalar(fx);
    let mut res_val = fx.bcx.ins().band_imm(res_val, 1); // mask to boolean
    for lane_idx in 1..lane_count {
-        let lane =
-            val.value_field(fx, mir::Field::new(lane_idx.try_into().unwrap())).load_scalar(fx);
+        let lane = val.value_lane(fx, lane_idx).load_scalar(fx);
        let lane = fx.bcx.ins().band_imm(lane, 1); // mask to boolean
        res_val = f(fx, res_val, lane);
    }
+    let res_val = if fx.bcx.func.dfg.value_type(res_val) != types::I8 {
+        fx.bcx.ins().ireduce(types::I8, res_val)
+    } else {
+        res_val
+    };
    let res = CValue::by_val(res_val, ret.layout());
    ret.write_cvalue(fx, res);
 }
@ -288,7 +292,11 @@ macro simd_cmp {
        if let Some(vector_ty) = vector_ty {
            let x = $x.load_scalar($fx);
            let y = $y.load_scalar($fx);
-            let val = $fx.bcx.ins().icmp(IntCC::$cc, x, y);
+            let val = if vector_ty.lane_type().is_float() {
+                $fx.bcx.ins().fcmp(FloatCC::$cc_f, x, y)
+            } else {
+                $fx.bcx.ins().icmp(IntCC::$cc, x, y)
+            };

            // HACK This depends on the fact that icmp for vectors represents bools as 0 and !0, not 0 and 1.
            let val = $fx.bcx.ins().raw_bitcast(vector_ty, val);
@ -603,9 +611,6 @@ pub(crate) fn codegen_intrinsic_call<'tcx>(
            let (val, has_overflow) = checked_res.load_scalar_pair(fx);
            let clif_ty = fx.clif_type(T).unwrap();

-            // `select.i8` is not implemented by Cranelift.
-            let has_overflow = fx.bcx.ins().uextend(types::I32, has_overflow);
-
            let (min, max) = type_min_max_value(&mut fx.bcx, clif_ty, signed);

            let val = match (intrinsic, signed) {
@ -632,21 +637,11 @@ pub(crate) fn codegen_intrinsic_call<'tcx>(
        };
        rotate_left, <T>(v x, v y) {
            let layout = fx.layout_of(T);
-            let y = if fx.bcx.func.dfg.value_type(y) == types::I128 {
-                fx.bcx.ins().ireduce(types::I64, y)
-            } else {
-                y
-            };
            let res = fx.bcx.ins().rotl(x, y);
            ret.write_cvalue(fx, CValue::by_val(res, layout));
        };
        rotate_right, <T>(v x, v y) {
            let layout = fx.layout_of(T);
-            let y = if fx.bcx.func.dfg.value_type(y) == types::I128 {
-                fx.bcx.ins().ireduce(types::I64, y)
-            } else {
-                y
-            };
            let res = fx.bcx.ins().rotr(x, y);
            ret.write_cvalue(fx, CValue::by_val(res, layout));
        };
@ -684,35 +679,13 @@ pub(crate) fn codegen_intrinsic_call<'tcx>(
        };
        ctlz | ctlz_nonzero, <T> (v arg) {
            // FIXME trap on `ctlz_nonzero` with zero arg.
-            let res = if T == fx.tcx.types.u128 || T == fx.tcx.types.i128 {
-                // FIXME verify this algorithm is correct
-                let (lsb, msb) = fx.bcx.ins().isplit(arg);
-                let lsb_lz = fx.bcx.ins().clz(lsb);
-                let msb_lz = fx.bcx.ins().clz(msb);
-                let msb_is_zero = fx.bcx.ins().icmp_imm(IntCC::Equal, msb, 0);
-                let lsb_lz_plus_64 = fx.bcx.ins().iadd_imm(lsb_lz, 64);
-                let res = fx.bcx.ins().select(msb_is_zero, lsb_lz_plus_64, msb_lz);
-                fx.bcx.ins().uextend(types::I128, res)
-            } else {
-                fx.bcx.ins().clz(arg)
-            };
+            let res = fx.bcx.ins().clz(arg);
            let res = CValue::by_val(res, fx.layout_of(T));
            ret.write_cvalue(fx, res);
        };
        cttz | cttz_nonzero, <T> (v arg) {
            // FIXME trap on `cttz_nonzero` with zero arg.
-            let res = if T == fx.tcx.types.u128 || T == fx.tcx.types.i128 {
-                // FIXME verify this algorithm is correct
-                let (lsb, msb) = fx.bcx.ins().isplit(arg);
-                let lsb_tz = fx.bcx.ins().ctz(lsb);
-                let msb_tz = fx.bcx.ins().ctz(msb);
-                let lsb_is_zero = fx.bcx.ins().icmp_imm(IntCC::Equal, lsb, 0);
-                let msb_tz_plus_64 = fx.bcx.ins().iadd_imm(msb_tz, 64);
-                let res = fx.bcx.ins().select(lsb_is_zero, msb_tz_plus_64, lsb_tz);
-                fx.bcx.ins().uextend(types::I128, res)
-            } else {
-                fx.bcx.ins().ctz(arg)
-            };
+            let res = fx.bcx.ins().ctz(arg);
            let res = CValue::by_val(res, fx.layout_of(T));
            ret.write_cvalue(fx, res);
        };
@ -995,8 +968,6 @@ pub(crate) fn codegen_intrinsic_call<'tcx>(
            let old = CValue::by_val(old, layout);
            ret.write_cvalue(fx, old);
        };
-
-        // FIXME https://github.com/bytecodealliance/wasmtime/issues/2647
        _ if intrinsic.as_str().starts_with("atomic_nand"), (v ptr, c src) {
            let layout = src.layout();
            validate_atomic_type!(fx, intrinsic, span, layout.ty);
@ -1058,23 +1029,39 @@ pub(crate) fn codegen_intrinsic_call<'tcx>(
            ret.write_cvalue(fx, old);
        };

+        // In Rust floating point min and max don't propagate NaN. In Cranelift they do however.
+        // For this reason it is necessary to use `a.is_nan() ? b : (a >= b ? b : a)` for `minnumf*`
+        // and `a.is_nan() ? b : (a <= b ? b : a)` for `maxnumf*`. NaN checks are done by comparing
+        // a float against itself. Only in case of NaN is it not equal to itself.
        minnumf32, (v a, v b) {
-            let val = fx.bcx.ins().fmin(a, b);
+            let a_is_nan = fx.bcx.ins().fcmp(FloatCC::NotEqual, a, a);
+            let a_ge_b = fx.bcx.ins().fcmp(FloatCC::GreaterThanOrEqual, a, b);
+            let temp = fx.bcx.ins().select(a_ge_b, b, a);
+            let val = fx.bcx.ins().select(a_is_nan, b, temp);
            let val = CValue::by_val(val, fx.layout_of(fx.tcx.types.f32));
            ret.write_cvalue(fx, val);
        };
        minnumf64, (v a, v b) {
-            let val = fx.bcx.ins().fmin(a, b);
+            let a_is_nan = fx.bcx.ins().fcmp(FloatCC::NotEqual, a, a);
+            let a_ge_b = fx.bcx.ins().fcmp(FloatCC::GreaterThanOrEqual, a, b);
+            let temp = fx.bcx.ins().select(a_ge_b, b, a);
+            let val = fx.bcx.ins().select(a_is_nan, b, temp);
            let val = CValue::by_val(val, fx.layout_of(fx.tcx.types.f64));
            ret.write_cvalue(fx, val);
        };
        maxnumf32, (v a, v b) {
-            let val = fx.bcx.ins().fmax(a, b);
+            let a_is_nan = fx.bcx.ins().fcmp(FloatCC::NotEqual, a, a);
+            let a_le_b = fx.bcx.ins().fcmp(FloatCC::LessThanOrEqual, a, b);
+            let temp = fx.bcx.ins().select(a_le_b, b, a);
+            let val = fx.bcx.ins().select(a_is_nan, b, temp);
            let val = CValue::by_val(val, fx.layout_of(fx.tcx.types.f32));
            ret.write_cvalue(fx, val);
        };
        maxnumf64, (v a, v b) {
-            let val = fx.bcx.ins().fmax(a, b);
+            let a_is_nan = fx.bcx.ins().fcmp(FloatCC::NotEqual, a, a);
+            let a_le_b = fx.bcx.ins().fcmp(FloatCC::LessThanOrEqual, a, b);
+            let temp = fx.bcx.ins().select(a_le_b, b, a);
+            let val = fx.bcx.ins().select(a_is_nan, b, temp);
            let val = CValue::by_val(val, fx.layout_of(fx.tcx.types.f64));
            ret.write_cvalue(fx, val);
        };
@ -1122,6 +1109,7 @@ pub(crate) fn codegen_intrinsic_call<'tcx>(
            }

            let size = fx.layout_of(T).layout.size;
+            // FIXME add and use emit_small_memcmp
            let is_eq_value =
                if size == Size::ZERO {
                    // No bytes means they're trivially equal
@ -1137,10 +1125,9 @@ pub(crate) fn codegen_intrinsic_call<'tcx>(
                } else {
                    // Just call `memcmp` (like slices do in core) when the
                    // size is too large or it's not a power-of-two.
-                    let ptr_ty = pointer_ty(fx.tcx);
                    let signed_bytes = i64::try_from(size.bytes()).unwrap();
-                    let bytes_val = fx.bcx.ins().iconst(ptr_ty, signed_bytes);
-                    let params = vec![AbiParam::new(ptr_ty); 3];
+                    let bytes_val = fx.bcx.ins().iconst(fx.pointer_type, signed_bytes);
+                    let params = vec![AbiParam::new(fx.pointer_type); 3];
                    let returns = vec![AbiParam::new(types::I32)];
                    let args = &[lhs_ref, rhs_ref, bytes_val];
                    let cmp = fx.lib_call("memcmp", params, returns, args)[0];
--- a/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs
+++ b/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs
@ -108,11 +108,11 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(

            for (out_idx, in_idx) in indexes.into_iter().enumerate() {
                let in_lane = if u64::from(in_idx) < lane_count {
-                    x.value_field(fx, mir::Field::new(in_idx.into()))
+                    x.value_lane(fx, in_idx.into())
                } else {
-                    y.value_field(fx, mir::Field::new(usize::from(in_idx) - usize::try_from(lane_count).unwrap()))
+                    y.value_lane(fx, u64::from(in_idx) - lane_count)
                };
-                let out_lane = ret.place_field(fx, mir::Field::new(out_idx));
+                let out_lane = ret.place_lane(fx, u64::try_from(out_idx).unwrap());
                out_lane.write_cvalue(fx, in_lane);
            }
        };
@ -163,10 +163,38 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
                fx.tcx.sess.span_fatal(fx.mir.span, &format!("[simd_extract] idx {} >= lane_count {}", idx, lane_count));
            }

-            let ret_lane = v.value_field(fx, mir::Field::new(idx.try_into().unwrap()));
+            let ret_lane = v.value_lane(fx, idx.try_into().unwrap());
            ret.write_cvalue(fx, ret_lane);
        };

+        simd_neg, (c a) {
+            validate_simd_type!(fx, intrinsic, span, a.layout().ty);
+            simd_for_each_lane(fx, a, ret, |fx, lane_layout, ret_lane_layout, lane| {
+                let ret_lane = match lane_layout.ty.kind() {
+                    ty::Int(_) => fx.bcx.ins().ineg(lane),
+                    ty::Float(_) => fx.bcx.ins().fneg(lane),
+                    _ => unreachable!(),
+                };
+                CValue::by_val(ret_lane, ret_lane_layout)
+            });
+        };
+
+        simd_fabs, (c a) {
+            validate_simd_type!(fx, intrinsic, span, a.layout().ty);
+            simd_for_each_lane(fx, a, ret, |fx, _lane_layout, ret_lane_layout, lane| {
+                let ret_lane = fx.bcx.ins().fabs(lane);
+                CValue::by_val(ret_lane, ret_lane_layout)
+            });
+        };
+
+        simd_fsqrt, (c a) {
+            validate_simd_type!(fx, intrinsic, span, a.layout().ty);
+            simd_for_each_lane(fx, a, ret, |fx, _lane_layout, ret_lane_layout, lane| {
+                let ret_lane = fx.bcx.ins().sqrt(lane);
+                CValue::by_val(ret_lane, ret_lane_layout)
+            });
+        };
+
        simd_add, (c x, c y) {
            validate_simd_type!(fx, intrinsic, span, x.layout().ty);
            simd_int_flt_binop!(fx, iadd|fadd(x, y) -> ret);
@ -183,6 +211,29 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
            validate_simd_type!(fx, intrinsic, span, x.layout().ty);
            simd_int_flt_binop!(fx, udiv|sdiv|fdiv(x, y) -> ret);
        };
+        simd_rem, (c x, c y) {
+            validate_simd_type!(fx, intrinsic, span, x.layout().ty);
+            simd_pair_for_each_lane(fx, x, y, ret, |fx, lane_layout, ret_lane_layout, x_lane, y_lane| {
+                let res_lane = match lane_layout.ty.kind() {
+                    ty::Uint(_) => fx.bcx.ins().urem(x_lane, y_lane),
+                    ty::Int(_) => fx.bcx.ins().srem(x_lane, y_lane),
+                    ty::Float(FloatTy::F32) => fx.lib_call(
+                        "fmodf",
+                        vec![AbiParam::new(types::F32), AbiParam::new(types::F32)],
+                        vec![AbiParam::new(types::F32)],
+                        &[x_lane, y_lane],
+                    )[0],
+                    ty::Float(FloatTy::F64) => fx.lib_call(
+                        "fmod",
+                        vec![AbiParam::new(types::F64), AbiParam::new(types::F64)],
+                        vec![AbiParam::new(types::F64)],
+                        &[x_lane, y_lane],
+                    )[0],
+                    _ => unreachable!("{:?}", lane_layout.ty),
+                };
+                CValue::by_val(res_lane, ret_lane_layout)
+            });
+        };
        simd_shl, (c x, c y) {
            validate_simd_type!(fx, intrinsic, span, x.layout().ty);
            simd_int_binop!(fx, ishl(x, y) -> ret);
@ -216,15 +267,14 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
            let ret_lane_layout = fx.layout_of(ret_lane_ty);

            for lane in 0..lane_count {
-                let lane = mir::Field::new(lane.try_into().unwrap());
-                let a_lane = a.value_field(fx, lane).load_scalar(fx);
-                let b_lane = b.value_field(fx, lane).load_scalar(fx);
-                let c_lane = c.value_field(fx, lane).load_scalar(fx);
+                let a_lane = a.value_lane(fx, lane).load_scalar(fx);
+                let b_lane = b.value_lane(fx, lane).load_scalar(fx);
+                let c_lane = c.value_lane(fx, lane).load_scalar(fx);

                let mul_lane = fx.bcx.ins().fmul(a_lane, b_lane);
                let res_lane = CValue::by_val(fx.bcx.ins().fadd(mul_lane, c_lane), ret_lane_layout);

-                ret.place_field(fx, lane).write_cvalue(fx, res_lane);
+                ret.place_lane(fx, lane).write_cvalue(fx, res_lane);
            }
        };

@ -237,9 +287,52 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
            simd_flt_binop!(fx, fmax(x, y) -> ret);
        };

-        simd_reduce_add_ordered | simd_reduce_add_unordered, (c v) {
+        simd_round, (c a) {
+            validate_simd_type!(fx, intrinsic, span, a.layout().ty);
+            simd_for_each_lane(fx, a, ret, |fx, lane_layout, ret_lane_layout, lane| {
+                let res_lane = match lane_layout.ty.kind() {
+                    ty::Float(FloatTy::F32) => fx.lib_call(
+                        "roundf",
+                        vec![AbiParam::new(types::F32)],
+                        vec![AbiParam::new(types::F32)],
+                        &[lane],
+                    )[0],
+                    ty::Float(FloatTy::F64) => fx.lib_call(
+                        "round",
+                        vec![AbiParam::new(types::F64)],
+                        vec![AbiParam::new(types::F64)],
+                        &[lane],
+                    )[0],
+                    _ => unreachable!("{:?}", lane_layout.ty),
+                };
+                CValue::by_val(res_lane, ret_lane_layout)
+            });
+        };
+        simd_ceil, (c a) {
+            validate_simd_type!(fx, intrinsic, span, a.layout().ty);
+            simd_for_each_lane(fx, a, ret, |fx, _lane_layout, ret_lane_layout, lane| {
+                let ret_lane = fx.bcx.ins().ceil(lane);
+                CValue::by_val(ret_lane, ret_lane_layout)
+            });
+        };
+        simd_floor, (c a) {
+            validate_simd_type!(fx, intrinsic, span, a.layout().ty);
+            simd_for_each_lane(fx, a, ret, |fx, _lane_layout, ret_lane_layout, lane| {
+                let ret_lane = fx.bcx.ins().floor(lane);
+                CValue::by_val(ret_lane, ret_lane_layout)
+            });
+        };
+        simd_trunc, (c a) {
+            validate_simd_type!(fx, intrinsic, span, a.layout().ty);
+            simd_for_each_lane(fx, a, ret, |fx, _lane_layout, ret_lane_layout, lane| {
+                let ret_lane = fx.bcx.ins().trunc(lane);
+                CValue::by_val(ret_lane, ret_lane_layout)
+            });
+        };
+
+        simd_reduce_add_ordered | simd_reduce_add_unordered, (c v, v acc) {
            validate_simd_type!(fx, intrinsic, span, v.layout().ty);
-            simd_reduce(fx, v, ret, |fx, lane_layout, a, b| {
+            simd_reduce(fx, v, Some(acc), ret, |fx, lane_layout, a, b| {
                if lane_layout.ty.is_floating_point() {
                    fx.bcx.ins().fadd(a, b)
                } else {
@ -248,9 +341,9 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
            });
        };

-        simd_reduce_mul_ordered | simd_reduce_mul_unordered, (c v) {
+        simd_reduce_mul_ordered | simd_reduce_mul_unordered, (c v, v acc) {
            validate_simd_type!(fx, intrinsic, span, v.layout().ty);
-            simd_reduce(fx, v, ret, |fx, lane_layout, a, b| {
+            simd_reduce(fx, v, Some(acc), ret, |fx, lane_layout, a, b| {
                if lane_layout.ty.is_floating_point() {
                    fx.bcx.ins().fmul(a, b)
                } else {
@ -269,13 +362,70 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
            simd_reduce_bool(fx, v, ret, |fx, a, b| fx.bcx.ins().bor(a, b));
        };

-        // simd_fabs
-        // simd_saturating_add
+        simd_reduce_and, (c v) {
+            validate_simd_type!(fx, intrinsic, span, v.layout().ty);
+            simd_reduce(fx, v, None, ret, |fx, _layout, a, b| fx.bcx.ins().band(a, b));
+        };
+
+        simd_reduce_or, (c v) {
+            validate_simd_type!(fx, intrinsic, span, v.layout().ty);
+            simd_reduce(fx, v, None, ret, |fx, _layout, a, b| fx.bcx.ins().bor(a, b));
+        };
+
+        simd_reduce_xor, (c v) {
+            validate_simd_type!(fx, intrinsic, span, v.layout().ty);
+            simd_reduce(fx, v, None, ret, |fx, _layout, a, b| fx.bcx.ins().bxor(a, b));
+        };
+
+        simd_reduce_min, (c v) {
+            // FIXME support floats
+            validate_simd_type!(fx, intrinsic, span, v.layout().ty);
+            simd_reduce(fx, v, None, ret, |fx, layout, a, b| {
+                let lt = fx.bcx.ins().icmp(if layout.ty.is_signed() {
+                    IntCC::SignedLessThan
+                } else {
+                    IntCC::UnsignedLessThan
+                }, a, b);
+                fx.bcx.ins().select(lt, a, b)
+            });
+        };
+
+        simd_reduce_max, (c v) {
+            // FIXME support floats
+            validate_simd_type!(fx, intrinsic, span, v.layout().ty);
+            simd_reduce(fx, v, None, ret, |fx, layout, a, b| {
+                let gt = fx.bcx.ins().icmp(if layout.ty.is_signed() {
+                    IntCC::SignedGreaterThan
+                } else {
+                    IntCC::UnsignedGreaterThan
+                }, a, b);
+                fx.bcx.ins().select(gt, a, b)
+            });
+        };
+
+        simd_select, (c m, c a, c b) {
+            validate_simd_type!(fx, intrinsic, span, m.layout().ty);
+            validate_simd_type!(fx, intrinsic, span, a.layout().ty);
+            assert_eq!(a.layout(), b.layout());
+
+            let (lane_count, lane_ty) = a.layout().ty.simd_size_and_type(fx.tcx);
+            let lane_layout = fx.layout_of(lane_ty);
+
+            for lane in 0..lane_count {
+                let m_lane = m.value_lane(fx, lane).load_scalar(fx);
+                let a_lane = a.value_lane(fx, lane).load_scalar(fx);
+                let b_lane = b.value_lane(fx, lane).load_scalar(fx);
+
+                let m_lane = fx.bcx.ins().icmp_imm(IntCC::Equal, m_lane, 0);
+                let res_lane = CValue::by_val(fx.bcx.ins().select(m_lane, b_lane, a_lane), lane_layout);
+
+                ret.place_lane(fx, lane).write_cvalue(fx, res_lane);
+            }
+        };
+
+        // simd_saturating_*
        // simd_bitmask
-        // simd_select
-        // simd_rem
-        // simd_neg
-        // simd_trunc
-        // simd_floor
+        // simd_scatter
+        // simd_gather
    }
 }
--- a/compiler/rustc_codegen_cranelift/src/lib.rs
+++ b/compiler/rustc_codegen_cranelift/src/lib.rs
@ -184,6 +184,9 @@ impl CodegenBackend for CraneliftCodegenBackend {
        let config = if let Some(config) = self.config.clone() {
            config
        } else {
+            if !tcx.sess.unstable_options() && !tcx.sess.opts.cg.llvm_args.is_empty() {
+                tcx.sess.fatal("`-Z unstable-options` must be passed to allow configuring cg_clif");
+            }
            BackendConfig::from_opts(&tcx.sess.opts.cg.llvm_args)
                .unwrap_or_else(|err| tcx.sess.fatal(&err))
        };
@ -217,16 +220,15 @@ impl CodegenBackend for CraneliftCodegenBackend {
    ) -> Result<(), ErrorReported> {
        use rustc_codegen_ssa::back::link::link_binary;

-        link_binary::<crate::archive::ArArchiveBuilder<'_>>(
-            sess,
-            &codegen_results,
-            outputs,
-        )
+        link_binary::<crate::archive::ArArchiveBuilder<'_>>(sess, &codegen_results, outputs)
    }
 }

 fn target_triple(sess: &Session) -> target_lexicon::Triple {
-    sess.target.llvm_target.parse().unwrap()
+    match sess.target.llvm_target.parse() {
+        Ok(triple) => triple,
+        Err(err) => sess.fatal(&format!("target not recognized: {}", err)),
+    }
 }

 fn build_isa(sess: &Session, backend_config: &BackendConfig) -> Box<dyn isa::TargetIsa + 'static> {
@ -276,15 +278,21 @@ fn build_isa(sess: &Session, backend_config: &BackendConfig) -> Box<dyn isa::Tar
        }
        Some(value) => {
            let mut builder =
-                cranelift_codegen::isa::lookup_variant(target_triple, variant).unwrap();
+                cranelift_codegen::isa::lookup_variant(target_triple.clone(), variant)
+                    .unwrap_or_else(|err| {
+                        sess.fatal(&format!("can't compile for {}: {}", target_triple, err));
+                    });
            if let Err(_) = builder.enable(value) {
-                sess.fatal("The specified target cpu isn't currently supported by Cranelift.");
+                sess.fatal("the specified target cpu isn't currently supported by Cranelift.");
            }
            builder
        }
        None => {
            let mut builder =
-                cranelift_codegen::isa::lookup_variant(target_triple.clone(), variant).unwrap();
+                cranelift_codegen::isa::lookup_variant(target_triple.clone(), variant)
+                    .unwrap_or_else(|err| {
+                        sess.fatal(&format!("can't compile for {}: {}", target_triple, err));
+                    });
            if target_triple.architecture == target_lexicon::Architecture::X86_64 {
                // Don't use "haswell" as the default, as it implies `has_lzcnt`.
                // macOS CI is still at Ivy Bridge EP, so `lzcnt` is interpreted as `bsr`.
--- a/compiler/rustc_codegen_cranelift/src/num.rs
+++ b/compiler/rustc_codegen_cranelift/src/num.rs
@ -67,19 +67,6 @@ pub(crate) fn codegen_binop<'tcx>(
                    let lhs = in_lhs.load_scalar(fx);
                    let rhs = in_rhs.load_scalar(fx);

-                    let (lhs, rhs) = if (bin_op == BinOp::Eq || bin_op == BinOp::Ne)
-                        && (in_lhs.layout().ty.kind() == fx.tcx.types.i8.kind()
-                            || in_lhs.layout().ty.kind() == fx.tcx.types.i16.kind())
-                    {
-                        // FIXME(CraneStation/cranelift#896) icmp_imm.i8/i16 with eq/ne for signed ints is implemented wrong.
-                        (
-                            fx.bcx.ins().sextend(types::I32, lhs),
-                            fx.bcx.ins().sextend(types::I32, rhs),
-                        )
-                    } else {
-                        (lhs, rhs)
-                    };
-
                    return codegen_compare_bin_op(fx, bin_op, signed, lhs, rhs);
                }
                _ => {}
@ -293,9 +280,8 @@ pub(crate) fn codegen_checked_int_binop<'tcx>(
        }
        BinOp::Shl => {
            let lhs_ty = fx.bcx.func.dfg.value_type(lhs);
-            let actual_shift = fx.bcx.ins().band_imm(rhs, i64::from(lhs_ty.bits() - 1));
-            let actual_shift = clif_intcast(fx, actual_shift, types::I8, false);
-            let val = fx.bcx.ins().ishl(lhs, actual_shift);
+            let masked_shift = fx.bcx.ins().band_imm(rhs, i64::from(lhs_ty.bits() - 1));
+            let val = fx.bcx.ins().ishl(lhs, masked_shift);
            let ty = fx.bcx.func.dfg.value_type(val);
            let max_shift = i64::from(ty.bits()) - 1;
            let has_overflow = fx.bcx.ins().icmp_imm(IntCC::UnsignedGreaterThan, rhs, max_shift);
@ -303,12 +289,11 @@ pub(crate) fn codegen_checked_int_binop<'tcx>(
        }
        BinOp::Shr => {
            let lhs_ty = fx.bcx.func.dfg.value_type(lhs);
-            let actual_shift = fx.bcx.ins().band_imm(rhs, i64::from(lhs_ty.bits() - 1));
-            let actual_shift = clif_intcast(fx, actual_shift, types::I8, false);
+            let masked_shift = fx.bcx.ins().band_imm(rhs, i64::from(lhs_ty.bits() - 1));
            let val = if !signed {
-                fx.bcx.ins().ushr(lhs, actual_shift)
+                fx.bcx.ins().ushr(lhs, masked_shift)
            } else {
-                fx.bcx.ins().sshr(lhs, actual_shift)
+                fx.bcx.ins().sshr(lhs, masked_shift)
            };
            let ty = fx.bcx.func.dfg.value_type(val);
            let max_shift = i64::from(ty.bits()) - 1;
--- a/compiler/rustc_codegen_cranelift/src/optimize/peephole.rs
+++ b/compiler/rustc_codegen_cranelift/src/optimize/peephole.rs
@ -1,8 +1,6 @@
 //! Peephole optimizations that can be performed while creating clif ir.

-use cranelift_codegen::ir::{
-    condcodes::IntCC, types, InstBuilder, InstructionData, Opcode, Value, ValueDef,
-};
+use cranelift_codegen::ir::{condcodes::IntCC, InstructionData, Opcode, Value, ValueDef};
 use cranelift_frontend::FunctionBuilder;

 /// If the given value was produced by a `bint` instruction, return it's input, otherwise return the
@ -37,43 +35,6 @@ pub(crate) fn maybe_unwrap_bool_not(bcx: &mut FunctionBuilder<'_>, arg: Value) -
    }
 }

-pub(crate) fn make_branchable_value(bcx: &mut FunctionBuilder<'_>, arg: Value) -> Value {
-    if bcx.func.dfg.value_type(arg).is_bool() {
-        return arg;
-    }
-
-    (|| {
-        let arg_inst = if let ValueDef::Result(arg_inst, 0) = bcx.func.dfg.value_def(arg) {
-            arg_inst
-        } else {
-            return None;
-        };
-
-        match bcx.func.dfg[arg_inst] {
-            // This is the lowering of Rvalue::Not
-            InstructionData::Load { opcode: Opcode::Load, arg: ptr, flags, offset } => {
-                // Using `load.i8 + uextend.i32` would legalize to `uload8 + ireduce.i8 +
-                // uextend.i32`. Just `uload8` is much faster.
-                match bcx.func.dfg.ctrl_typevar(arg_inst) {
-                    types::I8 => Some(bcx.ins().uload8(types::I32, flags, ptr, offset)),
-                    types::I16 => Some(bcx.ins().uload16(types::I32, flags, ptr, offset)),
-                    _ => None,
-                }
-            }
-            _ => None,
-        }
-    })()
-    .unwrap_or_else(|| {
-        match bcx.func.dfg.value_type(arg) {
-            types::I8 | types::I16 => {
-                // WORKAROUND for brz.i8 and brnz.i8 not yet being implemented
-                bcx.ins().uextend(types::I32, arg)
-            }
-            _ => arg,
-        }
-    })
-}
-
 /// Returns whether the branch is statically known to be taken or `None` if it isn't statically known.
 pub(crate) fn maybe_known_branch_taken(
    bcx: &FunctionBuilder<'_>,
--- a/compiler/rustc_codegen_cranelift/src/trap.rs
+++ b/compiler/rustc_codegen_cranelift/src/trap.rs
@ -10,7 +10,7 @@ fn codegen_print(fx: &mut FunctionCx<'_, '_, '_>, msg: &str) {
            Linkage::Import,
            &Signature {
                call_conv: CallConv::triple_default(fx.triple()),
-                params: vec![AbiParam::new(pointer_ty(fx.tcx))],
+                params: vec![AbiParam::new(fx.pointer_type)],
                returns: vec![AbiParam::new(types::I32)],
            },
        )
--- a/compiler/rustc_codegen_cranelift/src/unsize.rs
+++ b/compiler/rustc_codegen_cranelift/src/unsize.rs
@ -77,12 +77,10 @@ fn unsize_ptr<'tcx>(
        (&ty::Ref(_, a, _), &ty::Ref(_, b, _))
        | (&ty::Ref(_, a, _), &ty::RawPtr(ty::TypeAndMut { ty: b, .. }))
        | (&ty::RawPtr(ty::TypeAndMut { ty: a, .. }), &ty::RawPtr(ty::TypeAndMut { ty: b, .. })) => {
-            assert!(!fx.layout_of(a).is_unsized());
            (src, unsized_info(fx, a, b, old_info))
        }
        (&ty::Adt(def_a, _), &ty::Adt(def_b, _)) if def_a.is_box() && def_b.is_box() => {
            let (a, b) = (src_layout.ty.boxed_ty(), dst_layout.ty.boxed_ty());
-            assert!(!fx.layout_of(a).is_unsized());
            (src, unsized_info(fx, a, b, old_info))
        }
        (&ty::Adt(def_a, _), &ty::Adt(def_b, _)) => {
--- a/compiler/rustc_codegen_cranelift/src/value_and_place.rs
+++ b/compiler/rustc_codegen_cranelift/src/value_and_place.rs
@ -34,10 +34,10 @@ fn codegen_field<'tcx>(
                let (_, unsized_align) =
                    crate::unsize::size_and_align_of_dst(fx, field_layout, extra);

-                let one = fx.bcx.ins().iconst(pointer_ty(fx.tcx), 1);
+                let one = fx.bcx.ins().iconst(fx.pointer_type, 1);
                let align_sub_1 = fx.bcx.ins().isub(unsized_align, one);
                let and_lhs = fx.bcx.ins().iadd_imm(align_sub_1, unaligned_offset as i64);
-                let zero = fx.bcx.ins().iconst(pointer_ty(fx.tcx), 0);
+                let zero = fx.bcx.ins().iconst(fx.pointer_type, 0);
                let and_rhs = fx.bcx.ins().isub(zero, unsized_align);
                let offset = fx.bcx.ins().band(and_lhs, and_rhs);

@ -206,6 +206,38 @@ impl<'tcx> CValue<'tcx> {
        }
    }

+    /// Like [`CValue::value_field`] except handling ADTs containing a single array field in a way
+    /// such that you can access individual lanes.
+    pub(crate) fn value_lane(
+        self,
+        fx: &mut FunctionCx<'_, '_, 'tcx>,
+        lane_idx: u64,
+    ) -> CValue<'tcx> {
+        let layout = self.1;
+        assert!(layout.ty.is_simd());
+        let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
+        let lane_layout = fx.layout_of(lane_ty);
+        assert!(lane_idx < lane_count);
+        match self.0 {
+            CValueInner::ByVal(val) => match layout.abi {
+                Abi::Vector { element: _, count: _ } => {
+                    assert!(lane_count <= u8::MAX.into(), "SIMD type with more than 255 lanes???");
+                    let lane_idx = u8::try_from(lane_idx).unwrap();
+                    let lane = fx.bcx.ins().extractlane(val, lane_idx);
+                    CValue::by_val(lane, lane_layout)
+                }
+                _ => unreachable!("value_lane for ByVal with abi {:?}", layout.abi),
+            },
+            CValueInner::ByValPair(_, _) => unreachable!(),
+            CValueInner::ByRef(ptr, None) => {
+                let field_offset = lane_layout.size * lane_idx;
+                let field_ptr = ptr.offset_i64(fx, i64::try_from(field_offset.bytes()).unwrap());
+                CValue::by_ref(field_ptr, lane_layout)
+            }
+            CValueInner::ByRef(_, Some(_)) => unreachable!(),
+        }
+    }
+
    pub(crate) fn unsize_value(self, fx: &mut FunctionCx<'_, '_, 'tcx>, dest: CPlace<'tcx>) {
        crate::unsize::coerce_unsized_into(fx, self, dest);
    }
@ -286,17 +318,16 @@ impl<'tcx> CPlace<'tcx> {
        &self.inner
    }

-    pub(crate) fn no_place(layout: TyAndLayout<'tcx>) -> CPlace<'tcx> {
-        CPlace { inner: CPlaceInner::Addr(Pointer::dangling(layout.align.pref), None), layout }
-    }
-
    pub(crate) fn new_stack_slot(
        fx: &mut FunctionCx<'_, '_, 'tcx>,
        layout: TyAndLayout<'tcx>,
    ) -> CPlace<'tcx> {
        assert!(!layout.is_unsized());
        if layout.size.bytes() == 0 {
-            return CPlace::no_place(layout);
+            return CPlace {
+                inner: CPlaceInner::Addr(Pointer::dangling(layout.align.pref), None),
+                layout,
+            };
        }

        let stack_slot = fx.bcx.create_stack_slot(StackSlotData {
@ -610,6 +641,38 @@ impl<'tcx> CPlace<'tcx> {
        }
    }

+    /// Like [`CPlace::place_field`] except handling ADTs containing a single array field in a way
+    /// such that you can access individual lanes.
+    pub(crate) fn place_lane(
+        self,
+        fx: &mut FunctionCx<'_, '_, 'tcx>,
+        lane_idx: u64,
+    ) -> CPlace<'tcx> {
+        let layout = self.layout();
+        assert!(layout.ty.is_simd());
+        let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
+        let lane_layout = fx.layout_of(lane_ty);
+        assert!(lane_idx < lane_count);
+
+        match self.inner {
+            CPlaceInner::Var(local, var) => {
+                assert!(matches!(layout.abi, Abi::Vector { .. }));
+                CPlace {
+                    inner: CPlaceInner::VarLane(local, var, lane_idx.try_into().unwrap()),
+                    layout: lane_layout,
+                }
+            }
+            CPlaceInner::VarPair(_, _, _) => unreachable!(),
+            CPlaceInner::VarLane(_, _, _) => unreachable!(),
+            CPlaceInner::Addr(ptr, None) => {
+                let field_offset = lane_layout.size * lane_idx;
+                let field_ptr = ptr.offset_i64(fx, i64::try_from(field_offset.bytes()).unwrap());
+                CPlace::for_ptr(field_ptr, lane_layout)
+            }
+            CPlaceInner::Addr(_, Some(_)) => unreachable!(),
+        }
+    }
+
    pub(crate) fn place_index(
        self,
        fx: &mut FunctionCx<'_, '_, 'tcx>,
--- a/compiler/rustc_codegen_cranelift/src/vtable.rs
+++ b/compiler/rustc_codegen_cranelift/src/vtable.rs
@ -14,7 +14,7 @@ pub(crate) fn vtable_memflags() -> MemFlags {
 pub(crate) fn drop_fn_of_obj(fx: &mut FunctionCx<'_, '_, '_>, vtable: Value) -> Value {
    let usize_size = fx.layout_of(fx.tcx.types.usize).size.bytes() as usize;
    fx.bcx.ins().load(
-        pointer_ty(fx.tcx),
+        fx.pointer_type,
        vtable_memflags(),
        vtable,
        (ty::COMMON_VTABLE_ENTRIES_DROPINPLACE * usize_size) as i32,
@ -24,7 +24,7 @@ pub(crate) fn drop_fn_of_obj(fx: &mut FunctionCx<'_, '_, '_>, vtable: Value) ->
 pub(crate) fn size_of_obj(fx: &mut FunctionCx<'_, '_, '_>, vtable: Value) -> Value {
    let usize_size = fx.layout_of(fx.tcx.types.usize).size.bytes() as usize;
    fx.bcx.ins().load(
-        pointer_ty(fx.tcx),
+        fx.pointer_type,
        vtable_memflags(),
        vtable,
        (ty::COMMON_VTABLE_ENTRIES_SIZE * usize_size) as i32,
@ -34,7 +34,7 @@ pub(crate) fn size_of_obj(fx: &mut FunctionCx<'_, '_, '_>, vtable: Value) -> Val
 pub(crate) fn min_align_of_obj(fx: &mut FunctionCx<'_, '_, '_>, vtable: Value) -> Value {
    let usize_size = fx.layout_of(fx.tcx.types.usize).size.bytes() as usize;
    fx.bcx.ins().load(
-        pointer_ty(fx.tcx),
+        fx.pointer_type,
        vtable_memflags(),
        vtable,
        (ty::COMMON_VTABLE_ENTRIES_ALIGN * usize_size) as i32,
@ -55,7 +55,7 @@ pub(crate) fn get_ptr_and_method_ref<'tcx>(

    let usize_size = fx.layout_of(fx.tcx.types.usize).size.bytes();
    let func_ref = fx.bcx.ins().load(
-        pointer_ty(fx.tcx),
+        fx.pointer_type,
        vtable_memflags(),
        vtable,
        (idx * usize_size as usize) as i32,