Merge commit '98ed962c7d' into master

2024-07-10 12:44:23 +02:00 · 2024-07-10 12:44:23 +02:00 · 7cbe50e209
commit 7cbe50e209
parent 649feb9c1a 98ed962c7d
68 changed files with 2682 additions and 1135 deletions
--- a/compiler/rustc_codegen_gcc/src/abi.rs
+++ b/compiler/rustc_codegen_gcc/src/abi.rs
@ -4,6 +4,7 @@ use gccjit::{ToLValue, ToRValue, Type};
 use rustc_codegen_ssa::traits::{AbiBuilderMethods, BaseTypeMethods};
 use rustc_data_structures::fx::FxHashSet;
 use rustc_middle::bug;
+use rustc_middle::ty::layout::LayoutOf;
 use rustc_middle::ty::Ty;
 #[cfg(feature = "master")]
 use rustc_session::config;
@ -184,9 +185,17 @@ impl<'gcc, 'tcx> FnAbiGccExt<'gcc, 'tcx> for FnAbi<'tcx, Ty<'tcx>> {
                }
                PassMode::Indirect { attrs, meta_attrs: Some(meta_attrs), on_stack } => {
                    assert!(!on_stack);
-                    let ty =
-                        apply_attrs(cx.type_ptr_to(arg.memory_ty(cx)), &attrs, argument_tys.len());
-                    apply_attrs(ty, &meta_attrs, argument_tys.len())
+                    // Construct the type of a (wide) pointer to `ty`, and pass its two fields.
+                    // Any two ABI-compatible unsized types have the same metadata type and
+                    // moreover the same metadata value leads to the same dynamic size and
+                    // alignment, so this respects ABI compatibility.
+                    let ptr_ty = Ty::new_mut_ptr(cx.tcx, arg.layout.ty);
+                    let ptr_layout = cx.layout_of(ptr_ty);
+                    let typ1 = ptr_layout.scalar_pair_element_gcc_type(cx, 0);
+                    let typ2 = ptr_layout.scalar_pair_element_gcc_type(cx, 1);
+                    argument_tys.push(apply_attrs(typ1, &attrs, argument_tys.len()));
+                    argument_tys.push(apply_attrs(typ2, &meta_attrs, argument_tys.len()));
+                    continue;
                }
            };
            argument_tys.push(arg_ty);
--- a/compiler/rustc_codegen_gcc/src/asm.rs
+++ b/compiler/rustc_codegen_gcc/src/asm.rs
@ -115,7 +115,7 @@ impl<'a, 'gcc, 'tcx> AsmBuilderMethods<'tcx> for Builder<'a, 'gcc, 'tcx> {
        span: &[Span],
        instance: Instance<'_>,
        dest: Option<Self::BasicBlock>,
-        _catch_funclet: Option<(Self::BasicBlock, Option<&Self::Funclet>)>,
+        _dest_catch_funclet: Option<(Self::BasicBlock, Option<&Self::Funclet>)>,
    ) {
        if options.contains(InlineAsmOptions::MAY_UNWIND) {
            self.sess().dcx().create_err(UnwindingInlineAsm { span: span[0] }).emit();
@ -485,9 +485,8 @@ impl<'a, 'gcc, 'tcx> AsmBuilderMethods<'tcx> for Builder<'a, 'gcc, 'tcx> {
                        }

                        InlineAsmOperandRef::Label { label } => {
-                            let label_gcc_index = labels.iter()
-                                .position(|&l| l == label)
-                                .expect("wrong rust index");
+                            let label_gcc_index =
+                                labels.iter().position(|&l| l == label).expect("wrong rust index");
                            let gcc_index = label_gcc_index + outputs.len() + inputs.len();
                            push_to_template(Some('l'), gcc_index);
                        }
@ -538,9 +537,8 @@ impl<'a, 'gcc, 'tcx> AsmBuilderMethods<'tcx> for Builder<'a, 'gcc, 'tcx> {
        }
        if dest.is_none() && options.contains(InlineAsmOptions::NORETURN) {
            let builtin_unreachable = self.context.get_builtin_function("__builtin_unreachable");
-            let builtin_unreachable: RValue<'gcc> = unsafe {
-                std::mem::transmute(builtin_unreachable)
-            };
+            let builtin_unreachable: RValue<'gcc> =
+                unsafe { std::mem::transmute(builtin_unreachable) };
            self.call(self.type_void(), None, None, builtin_unreachable, &[], None, None);
        }

@ -696,10 +694,12 @@ fn reg_to_gcc(reg: InlineAsmRegOrRegClass) -> ConstraintOrRegister {
 fn dummy_output_type<'gcc, 'tcx>(cx: &CodegenCx<'gcc, 'tcx>, reg: InlineAsmRegClass) -> Type<'gcc> {
    match reg {
        InlineAsmRegClass::AArch64(AArch64InlineAsmRegClass::reg) => cx.type_i32(),
-        InlineAsmRegClass::AArch64(AArch64InlineAsmRegClass::preg) => unimplemented!(),
        InlineAsmRegClass::AArch64(AArch64InlineAsmRegClass::vreg)
        | InlineAsmRegClass::AArch64(AArch64InlineAsmRegClass::vreg_low16) => {
-            unimplemented!()
+            cx.type_vector(cx.type_i64(), 2)
+        }
+        InlineAsmRegClass::AArch64(AArch64InlineAsmRegClass::preg) => {
+            unreachable!("clobber-only")
        }
        InlineAsmRegClass::Arm(ArmInlineAsmRegClass::reg) => cx.type_i32(),
        InlineAsmRegClass::Arm(ArmInlineAsmRegClass::sreg)
@ -710,21 +710,13 @@ fn dummy_output_type<'gcc, 'tcx>(cx: &CodegenCx<'gcc, 'tcx>, reg: InlineAsmRegCl
        InlineAsmRegClass::Arm(ArmInlineAsmRegClass::qreg)
        | InlineAsmRegClass::Arm(ArmInlineAsmRegClass::qreg_low8)
        | InlineAsmRegClass::Arm(ArmInlineAsmRegClass::qreg_low4) => {
-            unimplemented!()
+            cx.type_vector(cx.type_i64(), 2)
        }
-        InlineAsmRegClass::Avr(_) => unimplemented!(),
-        InlineAsmRegClass::Bpf(_) => unimplemented!(),
        InlineAsmRegClass::Hexagon(HexagonInlineAsmRegClass::reg) => cx.type_i32(),
        InlineAsmRegClass::LoongArch(LoongArchInlineAsmRegClass::reg) => cx.type_i32(),
        InlineAsmRegClass::LoongArch(LoongArchInlineAsmRegClass::freg) => cx.type_f32(),
-        InlineAsmRegClass::M68k(M68kInlineAsmRegClass::reg) => cx.type_i32(),
-        InlineAsmRegClass::M68k(M68kInlineAsmRegClass::reg_addr) => cx.type_i32(),
-        InlineAsmRegClass::M68k(M68kInlineAsmRegClass::reg_data) => cx.type_i32(),
-        InlineAsmRegClass::CSKY(CSKYInlineAsmRegClass::reg) => cx.type_i32(),
-        InlineAsmRegClass::CSKY(CSKYInlineAsmRegClass::freg) => cx.type_f32(),
        InlineAsmRegClass::Mips(MipsInlineAsmRegClass::reg) => cx.type_i32(),
        InlineAsmRegClass::Mips(MipsInlineAsmRegClass::freg) => cx.type_f32(),
-        InlineAsmRegClass::Msp430(_) => unimplemented!(),
        InlineAsmRegClass::Nvptx(NvptxInlineAsmRegClass::reg16) => cx.type_i16(),
        InlineAsmRegClass::Nvptx(NvptxInlineAsmRegClass::reg32) => cx.type_i32(),
        InlineAsmRegClass::Nvptx(NvptxInlineAsmRegClass::reg64) => cx.type_i64(),
@ -737,26 +729,43 @@ fn dummy_output_type<'gcc, 'tcx>(cx: &CodegenCx<'gcc, 'tcx>, reg: InlineAsmRegCl
        }
        InlineAsmRegClass::RiscV(RiscVInlineAsmRegClass::reg) => cx.type_i32(),
        InlineAsmRegClass::RiscV(RiscVInlineAsmRegClass::freg) => cx.type_f32(),
-        InlineAsmRegClass::RiscV(RiscVInlineAsmRegClass::vreg) => cx.type_f32(),
+        InlineAsmRegClass::RiscV(RiscVInlineAsmRegClass::vreg) => {
+            unreachable!("clobber-only")
+        }
        InlineAsmRegClass::X86(X86InlineAsmRegClass::reg)
        | InlineAsmRegClass::X86(X86InlineAsmRegClass::reg_abcd) => cx.type_i32(),
        InlineAsmRegClass::X86(X86InlineAsmRegClass::reg_byte) => cx.type_i8(),
-        InlineAsmRegClass::X86(X86InlineAsmRegClass::mmx_reg) => unimplemented!(),
        InlineAsmRegClass::X86(X86InlineAsmRegClass::xmm_reg)
        | InlineAsmRegClass::X86(X86InlineAsmRegClass::ymm_reg)
        | InlineAsmRegClass::X86(X86InlineAsmRegClass::zmm_reg) => cx.type_f32(),
-        InlineAsmRegClass::X86(X86InlineAsmRegClass::x87_reg) => unimplemented!(),
        InlineAsmRegClass::X86(X86InlineAsmRegClass::kreg) => cx.type_i16(),
-        InlineAsmRegClass::X86(X86InlineAsmRegClass::kreg0) => cx.type_i16(),
-        InlineAsmRegClass::X86(X86InlineAsmRegClass::tmm_reg) => unimplemented!(),
-        InlineAsmRegClass::Wasm(WasmInlineAsmRegClass::local) => cx.type_i32(),
-        InlineAsmRegClass::SpirV(SpirVInlineAsmRegClass::reg) => {
-            bug!("LLVM backend does not support SPIR-V")
+        InlineAsmRegClass::X86(X86InlineAsmRegClass::x87_reg)
+        | InlineAsmRegClass::X86(X86InlineAsmRegClass::mmx_reg)
+        | InlineAsmRegClass::X86(X86InlineAsmRegClass::kreg0)
+        | InlineAsmRegClass::X86(X86InlineAsmRegClass::tmm_reg) => {
+            unreachable!("clobber-only")
        }
+        InlineAsmRegClass::Wasm(WasmInlineAsmRegClass::local) => cx.type_i32(),
+        InlineAsmRegClass::Bpf(BpfInlineAsmRegClass::reg) => cx.type_i64(),
+        InlineAsmRegClass::Bpf(BpfInlineAsmRegClass::wreg) => cx.type_i32(),
+        InlineAsmRegClass::Avr(AvrInlineAsmRegClass::reg) => cx.type_i8(),
+        InlineAsmRegClass::Avr(AvrInlineAsmRegClass::reg_upper) => cx.type_i8(),
+        InlineAsmRegClass::Avr(AvrInlineAsmRegClass::reg_pair) => cx.type_i16(),
+        InlineAsmRegClass::Avr(AvrInlineAsmRegClass::reg_iw) => cx.type_i16(),
+        InlineAsmRegClass::Avr(AvrInlineAsmRegClass::reg_ptr) => cx.type_i16(),
        InlineAsmRegClass::S390x(
            S390xInlineAsmRegClass::reg | S390xInlineAsmRegClass::reg_addr,
        ) => cx.type_i32(),
        InlineAsmRegClass::S390x(S390xInlineAsmRegClass::freg) => cx.type_f64(),
+        InlineAsmRegClass::Msp430(Msp430InlineAsmRegClass::reg) => cx.type_i16(),
+        InlineAsmRegClass::M68k(M68kInlineAsmRegClass::reg) => cx.type_i32(),
+        InlineAsmRegClass::M68k(M68kInlineAsmRegClass::reg_addr) => cx.type_i32(),
+        InlineAsmRegClass::M68k(M68kInlineAsmRegClass::reg_data) => cx.type_i32(),
+        InlineAsmRegClass::CSKY(CSKYInlineAsmRegClass::reg) => cx.type_i32(),
+        InlineAsmRegClass::CSKY(CSKYInlineAsmRegClass::freg) => cx.type_f32(),
+        InlineAsmRegClass::SpirV(SpirVInlineAsmRegClass::reg) => {
+            bug!("GCC backend does not support SPIR-V")
+        }
        InlineAsmRegClass::Err => unreachable!(),
    }
 }
--- a/compiler/rustc_codegen_gcc/src/attributes.rs
+++ b/compiler/rustc_codegen_gcc/src/attributes.rs
@ -92,7 +92,7 @@ pub fn from_fn_attrs<'gcc, 'tcx>(
    let mut function_features = function_features
        .iter()
        .flat_map(|feat| to_gcc_features(cx.tcx.sess, feat).into_iter())
-        .chain(codegen_fn_attrs.instruction_set.iter().map(|x| match x {
+        .chain(codegen_fn_attrs.instruction_set.iter().map(|x| match *x {
            InstructionSetAttr::ArmA32 => "-thumb-mode", // TODO(antoyo): support removing feature.
            InstructionSetAttr::ArmT32 => "thumb-mode",
        }))
@ -118,8 +118,8 @@ pub fn from_fn_attrs<'gcc, 'tcx>(

            if feature.starts_with('-') {
                Some(format!("no{}", feature))
-            } else if feature.starts_with('+') {
-                Some(feature[1..].to_string())
+            } else if let Some(stripped) = feature.strip_prefix('+') {
+                Some(stripped.to_string())
            } else {
                Some(feature.to_string())
            }
@ -128,6 +128,12 @@ pub fn from_fn_attrs<'gcc, 'tcx>(
        .join(",");
    if !target_features.is_empty() {
        #[cfg(feature = "master")]
-        func.add_attribute(FnAttribute::Target(&target_features));
+        match cx.sess().target.arch.as_ref() {
+            "x86" | "x86_64" | "powerpc" => {
+                func.add_attribute(FnAttribute::Target(&target_features))
+            }
+            // The target attribute is not supported on other targets in GCC.
+            _ => (),
+        }
    }
 }
--- a/compiler/rustc_codegen_gcc/src/back/lto.rs
+++ b/compiler/rustc_codegen_gcc/src/back/lto.rs
@ -16,13 +16,14 @@
 // /usr/bin/ld: warning: type of symbol `_RNvNvNvNtCs5JWOrf9uCus_5rayon11thread_pool19WORKER_THREAD_STATE7___getit5___KEY' changed from 1 to 6 in /tmp/ccKeUSiR.ltrans0.ltrans.o
 // /usr/bin/ld: warning: type of symbol `_RNvNvNvNvNtNtNtCsAj5i4SGTR7_3std4sync4mpmc5waker17current_thread_id5DUMMY7___getit5___KEY' changed from 1 to 6 in /tmp/ccKeUSiR.ltrans0.ltrans.o
 // /usr/bin/ld: warning: incremental linking of LTO and non-LTO objects; using -flinker-output=nolto-rel which will bypass whole program optimization
-use std::ffi::CString;
+use std::ffi::{CStr, CString};
 use std::fs::{self, File};
 use std::path::{Path, PathBuf};
+use std::sync::Arc;

-use gccjit::OutputKind;
+use gccjit::{Context, OutputKind};
 use object::read::archive::ArchiveFile;
-use rustc_codegen_ssa::back::lto::{LtoModuleCodegen, SerializedModule};
+use rustc_codegen_ssa::back::lto::{LtoModuleCodegen, SerializedModule, ThinModule, ThinShared};
 use rustc_codegen_ssa::back::symbol_export;
 use rustc_codegen_ssa::back::write::{CodegenContext, FatLtoInput};
 use rustc_codegen_ssa::traits::*;
@ -30,6 +31,7 @@ use rustc_codegen_ssa::{looks_like_rust_object_file, ModuleCodegen, ModuleKind};
 use rustc_data_structures::memmap::Mmap;
 use rustc_errors::{DiagCtxtHandle, FatalError};
 use rustc_hir::def_id::LOCAL_CRATE;
+use rustc_middle::bug;
 use rustc_middle::dep_graph::WorkProduct;
 use rustc_middle::middle::exported_symbols::{SymbolExportInfo, SymbolExportLevel};
 use rustc_session::config::{CrateType, Lto};
@ -37,7 +39,7 @@ use tempfile::{tempdir, TempDir};

 use crate::back::write::save_temp_bitcode;
 use crate::errors::{DynamicLinkingWithLTO, LtoBitcodeFromRlib, LtoDisallowed, LtoDylib};
-use crate::{to_gcc_opt_level, GccCodegenBackend, GccContext};
+use crate::{to_gcc_opt_level, GccCodegenBackend, GccContext, SyncContext};

 /// We keep track of the computed LTO cache keys from the previous
 /// session to determine which CGUs we can reuse.
@ -128,8 +130,7 @@ fn prepare_lto(
            }

            let archive_data = unsafe {
-                Mmap::map(File::open(&path).expect("couldn't open rlib"))
-                    .expect("couldn't map rlib")
+                Mmap::map(File::open(path).expect("couldn't open rlib")).expect("couldn't map rlib")
            };
            let archive = ArchiveFile::parse(&*archive_data).expect("wanted an rlib");
            let obj_files = archive
@ -349,6 +350,395 @@ impl ModuleBuffer {

 impl ModuleBufferMethods for ModuleBuffer {
    fn data(&self) -> &[u8] {
-        unimplemented!("data not needed for GCC codegen");
+        &[]
    }
 }
+
+/// Performs thin LTO by performing necessary global analysis and returning two
+/// lists, one of the modules that need optimization and another for modules that
+/// can simply be copied over from the incr. comp. cache.
+pub(crate) fn run_thin(
+    cgcx: &CodegenContext<GccCodegenBackend>,
+    modules: Vec<(String, ThinBuffer)>,
+    cached_modules: Vec<(SerializedModule<ModuleBuffer>, WorkProduct)>,
+) -> Result<(Vec<LtoModuleCodegen<GccCodegenBackend>>, Vec<WorkProduct>), FatalError> {
+    let dcx = cgcx.create_dcx();
+    let dcx = dcx.handle();
+    let lto_data = prepare_lto(cgcx, dcx)?;
+    /*let symbols_below_threshold =
+    symbols_below_threshold.iter().map(|c| c.as_ptr()).collect::<Vec<_>>();*/
+    if cgcx.opts.cg.linker_plugin_lto.enabled() {
+        unreachable!(
+            "We should never reach this case if the LTO step \
+                      is deferred to the linker"
+        );
+    }
+    thin_lto(
+        cgcx,
+        dcx,
+        modules,
+        lto_data.upstream_modules,
+        lto_data.tmp_path,
+        cached_modules, /*, &symbols_below_threshold*/
+    )
+}
+
+pub(crate) fn prepare_thin(
+    module: ModuleCodegen<GccContext>,
+    _emit_summary: bool,
+) -> (String, ThinBuffer) {
+    let name = module.name;
+    //let buffer = ThinBuffer::new(module.module_llvm.context, true, emit_summary);
+    let buffer = ThinBuffer::new(&module.module_llvm.context);
+    (name, buffer)
+}
+
+/// Prepare "thin" LTO to get run on these modules.
+///
+/// The general structure of ThinLTO is quite different from the structure of
+/// "fat" LTO above. With "fat" LTO all LLVM modules in question are merged into
+/// one giant LLVM module, and then we run more optimization passes over this
+/// big module after internalizing most symbols. Thin LTO, on the other hand,
+/// avoid this large bottleneck through more targeted optimization.
+///
+/// At a high level Thin LTO looks like:
+///
+///    1. Prepare a "summary" of each LLVM module in question which describes
+///       the values inside, cost of the values, etc.
+///    2. Merge the summaries of all modules in question into one "index"
+///    3. Perform some global analysis on this index
+///    4. For each module, use the index and analysis calculated previously to
+///       perform local transformations on the module, for example inlining
+///       small functions from other modules.
+///    5. Run thin-specific optimization passes over each module, and then code
+///       generate everything at the end.
+///
+/// The summary for each module is intended to be quite cheap, and the global
+/// index is relatively quite cheap to create as well. As a result, the goal of
+/// ThinLTO is to reduce the bottleneck on LTO and enable LTO to be used in more
+/// situations. For example one cheap optimization is that we can parallelize
+/// all codegen modules, easily making use of all the cores on a machine.
+///
+/// With all that in mind, the function here is designed at specifically just
+/// calculating the *index* for ThinLTO. This index will then be shared amongst
+/// all of the `LtoModuleCodegen` units returned below and destroyed once
+/// they all go out of scope.
+fn thin_lto(
+    cgcx: &CodegenContext<GccCodegenBackend>,
+    _dcx: DiagCtxtHandle<'_>,
+    modules: Vec<(String, ThinBuffer)>,
+    serialized_modules: Vec<(SerializedModule<ModuleBuffer>, CString)>,
+    tmp_path: TempDir,
+    cached_modules: Vec<(SerializedModule<ModuleBuffer>, WorkProduct)>,
+    //symbols_below_threshold: &[*const libc::c_char],
+) -> Result<(Vec<LtoModuleCodegen<GccCodegenBackend>>, Vec<WorkProduct>), FatalError> {
+    let _timer = cgcx.prof.generic_activity("LLVM_thin_lto_global_analysis");
+    info!("going for that thin, thin LTO");
+
+    /*let green_modules: FxHashMap<_, _> =
+    cached_modules.iter().map(|(_, wp)| (wp.cgu_name.clone(), wp.clone())).collect();*/
+
+    let full_scope_len = modules.len() + serialized_modules.len() + cached_modules.len();
+    let mut thin_buffers = Vec::with_capacity(modules.len());
+    let mut module_names = Vec::with_capacity(full_scope_len);
+    //let mut thin_modules = Vec::with_capacity(full_scope_len);
+
+    for (i, (name, buffer)) in modules.into_iter().enumerate() {
+        info!("local module: {} - {}", i, name);
+        let cname = CString::new(name.as_bytes()).unwrap();
+        /*thin_modules.push(llvm::ThinLTOModule {
+            identifier: cname.as_ptr(),
+            data: buffer.data().as_ptr(),
+            len: buffer.data().len(),
+        });*/
+        thin_buffers.push(buffer);
+        module_names.push(cname);
+    }
+
+    // FIXME: All upstream crates are deserialized internally in the
+    //        function below to extract their summary and modules. Note that
+    //        unlike the loop above we *must* decode and/or read something
+    //        here as these are all just serialized files on disk. An
+    //        improvement, however, to make here would be to store the
+    //        module summary separately from the actual module itself. Right
+    //        now this is store in one large bitcode file, and the entire
+    //        file is deflate-compressed. We could try to bypass some of the
+    //        decompression by storing the index uncompressed and only
+    //        lazily decompressing the bytecode if necessary.
+    //
+    //        Note that truly taking advantage of this optimization will
+    //        likely be further down the road. We'd have to implement
+    //        incremental ThinLTO first where we could actually avoid
+    //        looking at upstream modules entirely sometimes (the contents,
+    //        we must always unconditionally look at the index).
+    let mut serialized = Vec::with_capacity(serialized_modules.len() + cached_modules.len());
+
+    let cached_modules =
+        cached_modules.into_iter().map(|(sm, wp)| (sm, CString::new(wp.cgu_name).unwrap()));
+
+    for (module, name) in serialized_modules.into_iter().chain(cached_modules) {
+        info!("upstream or cached module {:?}", name);
+        /*thin_modules.push(llvm::ThinLTOModule {
+            identifier: name.as_ptr(),
+            data: module.data().as_ptr(),
+            len: module.data().len(),
+        });*/
+
+        match module {
+            SerializedModule::Local(_) => {
+                //let path = module_buffer.0.to_str().expect("path");
+                //let my_path = PathBuf::from(path);
+                //let exists = my_path.exists();
+                /*module.module_llvm.should_combine_object_files = true;
+                module
+                .module_llvm
+                .context
+                .add_driver_option(module_buffer.0.to_str().expect("path"));*/
+            }
+            SerializedModule::FromRlib(_) => unimplemented!("from rlib"),
+            SerializedModule::FromUncompressedFile(_) => {
+                unimplemented!("from uncompressed file")
+            }
+        }
+
+        serialized.push(module);
+        module_names.push(name);
+    }
+
+    // Sanity check
+    //assert_eq!(thin_modules.len(), module_names.len());
+
+    // Delegate to the C++ bindings to create some data here. Once this is a
+    // tried-and-true interface we may wish to try to upstream some of this
+    // to LLVM itself, right now we reimplement a lot of what they do
+    // upstream...
+    /*let data = llvm::LLVMRustCreateThinLTOData(
+        thin_modules.as_ptr(),
+        thin_modules.len() as u32,
+        symbols_below_threshold.as_ptr(),
+        symbols_below_threshold.len() as u32,
+    )
+    .ok_or_else(|| write::llvm_err(dcx, LlvmError::PrepareThinLtoContext))?;
+    */
+
+    let data = ThinData; //(Arc::new(tmp_path))/*(data)*/;
+
+    info!("thin LTO data created");
+
+    /*let (key_map_path, prev_key_map, curr_key_map) =
+        if let Some(ref incr_comp_session_dir) = cgcx.incr_comp_session_dir {
+            let path = incr_comp_session_dir.join(THIN_LTO_KEYS_INCR_COMP_FILE_NAME);
+            // If the previous file was deleted, or we get an IO error
+            // reading the file, then we'll just use `None` as the
+            // prev_key_map, which will force the code to be recompiled.
+            let prev =
+                if path.exists() { ThinLTOKeysMap::load_from_file(&path).ok() } else { None };
+            let curr = ThinLTOKeysMap::from_thin_lto_modules(&data, &thin_modules, &module_names);
+            (Some(path), prev, curr)
+        }
+        else {
+            // If we don't compile incrementally, we don't need to load the
+            // import data from LLVM.
+            assert!(green_modules.is_empty());
+            let curr = ThinLTOKeysMap::default();
+            (None, None, curr)
+        };
+    info!("thin LTO cache key map loaded");
+    info!("prev_key_map: {:#?}", prev_key_map);
+    info!("curr_key_map: {:#?}", curr_key_map);*/
+
+    // Throw our data in an `Arc` as we'll be sharing it across threads. We
+    // also put all memory referenced by the C++ data (buffers, ids, etc)
+    // into the arc as well. After this we'll create a thin module
+    // codegen per module in this data.
+    let shared =
+        Arc::new(ThinShared { data, thin_buffers, serialized_modules: serialized, module_names });
+
+    let copy_jobs = vec![];
+    let mut opt_jobs = vec![];
+
+    info!("checking which modules can be-reused and which have to be re-optimized.");
+    for (module_index, module_name) in shared.module_names.iter().enumerate() {
+        let module_name = module_name_to_str(module_name);
+        /*if let (Some(prev_key_map), true) =
+            (prev_key_map.as_ref(), green_modules.contains_key(module_name))
+        {
+            assert!(cgcx.incr_comp_session_dir.is_some());
+
+            // If a module exists in both the current and the previous session,
+            // and has the same LTO cache key in both sessions, then we can re-use it
+            if prev_key_map.keys.get(module_name) == curr_key_map.keys.get(module_name) {
+                let work_product = green_modules[module_name].clone();
+                copy_jobs.push(work_product);
+                info!(" - {}: re-used", module_name);
+                assert!(cgcx.incr_comp_session_dir.is_some());
+                continue;
+            }
+        }*/
+
+        info!(" - {}: re-compiled", module_name);
+        opt_jobs
+            .push(LtoModuleCodegen::Thin(ThinModule { shared: shared.clone(), idx: module_index }));
+    }
+
+    // Save the current ThinLTO import information for the next compilation
+    // session, overwriting the previous serialized data (if any).
+    /*if let Some(path) = key_map_path {
+        if let Err(err) = curr_key_map.save_to_file(&path) {
+            return Err(write::llvm_err(dcx, LlvmError::WriteThinLtoKey { err }));
+        }
+    }*/
+
+    // NOTE: save the temporary directory used by LTO so that it gets deleted after linking instead
+    // of now.
+    //module.module_llvm.temp_dir = Some(tmp_path);
+    // TODO: save the directory so that it gets deleted later.
+    std::mem::forget(tmp_path);
+
+    Ok((opt_jobs, copy_jobs))
+}
+
+pub unsafe fn optimize_thin_module(
+    thin_module: ThinModule<GccCodegenBackend>,
+    _cgcx: &CodegenContext<GccCodegenBackend>,
+) -> Result<ModuleCodegen<GccContext>, FatalError> {
+    //let dcx = cgcx.create_dcx();
+
+    //let module_name = &thin_module.shared.module_names[thin_module.idx];
+    /*let tm_factory_config = TargetMachineFactoryConfig::new(cgcx, module_name.to_str().unwrap());
+    let tm = (cgcx.tm_factory)(tm_factory_config).map_err(|e| write::llvm_err(&dcx, e))?;*/
+
+    // Right now the implementation we've got only works over serialized
+    // modules, so we create a fresh new LLVM context and parse the module
+    // into that context. One day, however, we may do this for upstream
+    // crates but for locally codegened modules we may be able to reuse
+    // that LLVM Context and Module.
+    //let llcx = llvm::LLVMRustContextCreate(cgcx.fewer_names);
+    //let llmod_raw = parse_module(llcx, module_name, thin_module.data(), &dcx)? as *const _;
+    let mut should_combine_object_files = false;
+    let context = match thin_module.shared.thin_buffers.get(thin_module.idx) {
+        Some(thin_buffer) => Arc::clone(&thin_buffer.context),
+        None => {
+            let context = Context::default();
+            let len = thin_module.shared.thin_buffers.len();
+            let module = &thin_module.shared.serialized_modules[thin_module.idx - len];
+            match *module {
+                SerializedModule::Local(ref module_buffer) => {
+                    let path = module_buffer.0.to_str().expect("path");
+                    context.add_driver_option(path);
+                    should_combine_object_files = true;
+                    /*module.module_llvm.should_combine_object_files = true;
+                    module
+                        .module_llvm
+                        .context
+                        .add_driver_option(module_buffer.0.to_str().expect("path"));*/
+                }
+                SerializedModule::FromRlib(_) => unimplemented!("from rlib"),
+                SerializedModule::FromUncompressedFile(_) => {
+                    unimplemented!("from uncompressed file")
+                }
+            }
+            Arc::new(SyncContext::new(context))
+        }
+    };
+    let module = ModuleCodegen {
+        module_llvm: GccContext { context, should_combine_object_files, temp_dir: None },
+        name: thin_module.name().to_string(),
+        kind: ModuleKind::Regular,
+    };
+    /*{
+        let target = &*module.module_llvm.tm;
+        let llmod = module.module_llvm.llmod();
+        save_temp_bitcode(cgcx, &module, "thin-lto-input");
+
+        // Up next comes the per-module local analyses that we do for Thin LTO.
+        // Each of these functions is basically copied from the LLVM
+        // implementation and then tailored to suit this implementation. Ideally
+        // each of these would be supported by upstream LLVM but that's perhaps
+        // a patch for another day!
+        //
+        // You can find some more comments about these functions in the LLVM
+        // bindings we've got (currently `PassWrapper.cpp`)
+        {
+            let _timer =
+                cgcx.prof.generic_activity_with_arg("LLVM_thin_lto_rename", thin_module.name());
+            if !llvm::LLVMRustPrepareThinLTORename(thin_module.shared.data.0, llmod, target) {
+                return Err(write::llvm_err(&dcx, LlvmError::PrepareThinLtoModule));
+            }
+            save_temp_bitcode(cgcx, &module, "thin-lto-after-rename");
+        }
+
+        {
+            let _timer = cgcx
+                .prof
+                .generic_activity_with_arg("LLVM_thin_lto_resolve_weak", thin_module.name());
+            if !llvm::LLVMRustPrepareThinLTOResolveWeak(thin_module.shared.data.0, llmod) {
+                return Err(write::llvm_err(&dcx, LlvmError::PrepareThinLtoModule));
+            }
+            save_temp_bitcode(cgcx, &module, "thin-lto-after-resolve");
+        }
+
+        {
+            let _timer = cgcx
+                .prof
+                .generic_activity_with_arg("LLVM_thin_lto_internalize", thin_module.name());
+            if !llvm::LLVMRustPrepareThinLTOInternalize(thin_module.shared.data.0, llmod) {
+                return Err(write::llvm_err(&dcx, LlvmError::PrepareThinLtoModule));
+            }
+            save_temp_bitcode(cgcx, &module, "thin-lto-after-internalize");
+        }
+
+        {
+            let _timer =
+                cgcx.prof.generic_activity_with_arg("LLVM_thin_lto_import", thin_module.name());
+            if !llvm::LLVMRustPrepareThinLTOImport(thin_module.shared.data.0, llmod, target) {
+                return Err(write::llvm_err(&dcx, LlvmError::PrepareThinLtoModule));
+            }
+            save_temp_bitcode(cgcx, &module, "thin-lto-after-import");
+        }
+
+        // Alright now that we've done everything related to the ThinLTO
+        // analysis it's time to run some optimizations! Here we use the same
+        // `run_pass_manager` as the "fat" LTO above except that we tell it to
+        // populate a thin-specific pass manager, which presumably LLVM treats a
+        // little differently.
+        {
+            info!("running thin lto passes over {}", module.name);
+            run_pass_manager(cgcx, &dcx, &mut module, true)?;
+            save_temp_bitcode(cgcx, &module, "thin-lto-after-pm");
+        }
+    }*/
+    Ok(module)
+}
+
+pub struct ThinBuffer {
+    context: Arc<SyncContext>,
+}
+
+// TODO: check if this makes sense to make ThinBuffer Send and Sync.
+unsafe impl Send for ThinBuffer {}
+unsafe impl Sync for ThinBuffer {}
+
+impl ThinBuffer {
+    pub(crate) fn new(context: &Arc<SyncContext>) -> Self {
+        Self { context: Arc::clone(context) }
+    }
+}
+
+impl ThinBufferMethods for ThinBuffer {
+    fn data(&self) -> &[u8] {
+        &[]
+    }
+
+    fn thin_link_data(&self) -> &[u8] {
+        unimplemented!();
+    }
+}
+
+pub struct ThinData; //(Arc<TempDir>);
+
+fn module_name_to_str(c_str: &CStr) -> &str {
+    c_str.to_str().unwrap_or_else(|e| {
+        bug!("Encountered non-utf8 GCC module name `{}`: {}", c_str.to_string_lossy(), e)
+    })
+}
--- a/compiler/rustc_codegen_gcc/src/back/write.rs
+++ b/compiler/rustc_codegen_gcc/src/back/write.rs
@ -31,6 +31,7 @@ pub(crate) unsafe fn codegen(

        // NOTE: Only generate object files with GIMPLE when this environment variable is set for
        // now because this requires a particular setup (same gcc/lto1/lto-wrapper commit as libgccjit).
+        // TODO: remove this environment variable.
        let fat_lto = env::var("EMBED_LTO_BITCODE").as_deref() == Ok("1");

        let bc_out = cgcx.output_filenames.temp_path(OutputType::Bitcode, module_name);
@ -56,6 +57,8 @@ pub(crate) unsafe fn codegen(
                    .generic_activity_with_arg("GCC_module_codegen_emit_bitcode", &*module.name);
                context.add_command_line_option("-flto=auto");
                context.add_command_line_option("-flto-partition=one");
+                // TODO: remove since we don't want fat objects when it is for Bitcode only.
+                context.add_command_line_option("-ffat-lto-objects");
                context
                    .compile_to_file(OutputKind::ObjectFile, bc_out.to_str().expect("path to str"));
            }
@ -104,7 +107,7 @@ pub(crate) unsafe fn codegen(
                    // FIXME(antoyo): segfault in dump_reproducer_to_file() might be caused by
                    // transmuting an rvalue to an lvalue.
                    // Segfault is actually in gcc::jit::reproducer::get_identifier_as_lvalue
-                    context.dump_reproducer_to_file(&format!("/tmp/reproducers/{}.c", module.name));
+                    context.dump_reproducer_to_file(format!("/tmp/reproducers/{}.c", module.name));
                    println!("Dumped reproducer {}", module.name);
                }
                if env::var("CG_GCCJIT_DUMP_TO_FILE").as_deref() == Ok("1") {
@ -113,17 +116,20 @@ pub(crate) unsafe fn codegen(
                    context.set_debug_info(true);
                    context.dump_to_file(path, true);
                }
-                if should_combine_object_files && fat_lto {
-                    context.add_command_line_option("-flto=auto");
-                    context.add_command_line_option("-flto-partition=one");
+                if should_combine_object_files {
+                    if fat_lto {
+                        context.add_command_line_option("-flto=auto");
+                        context.add_command_line_option("-flto-partition=one");
+
+                        // NOTE: without -fuse-linker-plugin, we get the following error:
+                        // lto1: internal compiler error: decompressed stream: Destination buffer is too small
+                        context.add_driver_option("-fuse-linker-plugin");
+                    }

                    context.add_driver_option("-Wl,-r");
                    // NOTE: we need -nostdlib, otherwise, we get the following error:
                    // /usr/bin/ld: cannot find -lgcc_s: No such file or directory
                    context.add_driver_option("-nostdlib");
-                    // NOTE: without -fuse-linker-plugin, we get the following error:
-                    // lto1: internal compiler error: decompressed stream: Destination buffer is too small
-                    context.add_driver_option("-fuse-linker-plugin");

                    // NOTE: this doesn't actually generate an executable. With the above flags, it combines the .o files together in another .o.
                    context.compile_to_file(
--- a/compiler/rustc_codegen_gcc/src/base.rs
+++ b/compiler/rustc_codegen_gcc/src/base.rs
@ -1,8 +1,9 @@
 use std::collections::HashSet;
 use std::env;
+use std::sync::Arc;
 use std::time::Instant;

-use gccjit::{FunctionType, GlobalKind};
+use gccjit::{CType, FunctionType, GlobalKind};
 use rustc_codegen_ssa::base::maybe_create_entry_wrapper;
 use rustc_codegen_ssa::mono_item::MonoItemExt;
 use rustc_codegen_ssa::traits::DebugInfoMethods;
@ -18,8 +19,8 @@ use rustc_target::spec::PanicStrategy;

 use crate::builder::Builder;
 use crate::context::CodegenCx;
-use crate::GccContext;
 use crate::{gcc_util, new_context, LockedTargetInfo};
+use crate::{GccContext, SyncContext};

 #[cfg(feature = "master")]
 pub fn visibility_to_gcc(linkage: Visibility) -> gccjit::Visibility {
@ -135,7 +136,7 @@ pub fn compile_codegen_unit(

        let target_cpu = gcc_util::target_cpu(tcx.sess);
        if target_cpu != "generic" {
-            context.add_command_line_option(&format!("-march={}", target_cpu));
+            context.add_command_line_option(format!("-march={}", target_cpu));
        }

        if tcx
@ -181,7 +182,24 @@ pub fn compile_codegen_unit(
        context.set_allow_unreachable_blocks(true);

        {
-            let cx = CodegenCx::new(&context, cgu, tcx, target_info.supports_128bit_int());
+            // TODO: to make it less error-prone (calling get_target_info() will add the flag
+            // -fsyntax-only), forbid the compilation when get_target_info() is called on a
+            // context.
+            let f16_type_supported = target_info.supports_target_dependent_type(CType::Float16);
+            let f32_type_supported = target_info.supports_target_dependent_type(CType::Float32);
+            let f64_type_supported = target_info.supports_target_dependent_type(CType::Float64);
+            let f128_type_supported = target_info.supports_target_dependent_type(CType::Float128);
+            // TODO: improve this to avoid passing that many arguments.
+            let cx = CodegenCx::new(
+                &context,
+                cgu,
+                tcx,
+                target_info.supports_128bit_int(),
+                f16_type_supported,
+                f32_type_supported,
+                f64_type_supported,
+                f128_type_supported,
+            );

            let mono_items = cgu.items_in_deterministic_order(tcx);
            for &(mono_item, data) in &mono_items {
@ -205,7 +223,11 @@ pub fn compile_codegen_unit(

        ModuleCodegen {
            name: cgu_name.to_string(),
-            module_llvm: GccContext { context, should_combine_object_files: false, temp_dir: None },
+            module_llvm: GccContext {
+                context: Arc::new(SyncContext::new(context)),
+                should_combine_object_files: false,
+                temp_dir: None,
+            },
            kind: ModuleKind::Regular,
        }
    }
--- a/compiler/rustc_codegen_gcc/src/builder.rs
+++ b/compiler/rustc_codegen_gcc/src/builder.rs
@ -25,7 +25,7 @@ use rustc_middle::ty::layout::{
    FnAbiError, FnAbiOfHelpers, FnAbiRequest, HasParamEnv, HasTyCtxt, LayoutError, LayoutOfHelpers,
    TyAndLayout,
 };
-use rustc_middle::ty::{ParamEnv, Ty, TyCtxt, Instance};
+use rustc_middle::ty::{Instance, ParamEnv, Ty, TyCtxt};
 use rustc_span::def_id::DefId;
 use rustc_span::Span;
 use rustc_target::abi::{
@ -68,7 +68,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
        src: RValue<'gcc>,
        order: AtomicOrdering,
    ) -> RValue<'gcc> {
-        let size = src.get_type().get_size();
+        let size = get_maybe_pointer_size(src);

        let func = self.current_func();

@ -138,7 +138,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
        failure_order: AtomicOrdering,
        weak: bool,
    ) -> RValue<'gcc> {
-        let size = src.get_type().get_size();
+        let size = get_maybe_pointer_size(src);
        let compare_exchange =
            self.context.get_builtin_function(&format!("__atomic_compare_exchange_{}", size));
        let order = self.context.new_rvalue_from_int(self.i32_type, order.to_gcc());
@ -153,7 +153,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {

        // NOTE: not sure why, but we have the wrong type here.
        let int_type = compare_exchange.get_param(2).to_rvalue().get_type();
-        let src = self.context.new_cast(self.location, src, int_type);
+        let src = self.context.new_bitcast(self.location, src, int_type);
        self.context.new_call(
            self.location,
            compare_exchange,
@ -190,8 +190,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
        let casted_args: Vec<_> = param_types
            .into_iter()
            .zip(args.iter())
-            .enumerate()
-            .map(|(_i, (expected_ty, &actual_val))| {
+            .map(|(expected_ty, &actual_val)| {
                let actual_ty = actual_val.get_type();
                if expected_ty != actual_ty {
                    self.bitcast(actual_val, expected_ty)
@ -225,7 +224,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {

        let mut on_stack_param_indices = FxHashSet::default();
        if let Some(indices) = self.on_stack_params.borrow().get(&gcc_func) {
-            on_stack_param_indices = indices.clone();
+            on_stack_param_indices.clone_from(indices);
        }

        if all_args_match {
@ -253,11 +252,26 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
                    {
                        self.context.new_cast(self.location, actual_val, expected_ty)
                    } else if on_stack_param_indices.contains(&index) {
-                        actual_val.dereference(self.location).to_rvalue()
+                        let ty = actual_val.get_type();
+                        // It's possible that the value behind the pointer is actually not exactly
+                        // the expected type, so to go around that, we add a cast before
+                        // dereferencing the value.
+                        if let Some(pointee_val) = ty.get_pointee()
+                            && pointee_val != expected_ty
+                        {
+                            let new_val = self.context.new_cast(
+                                self.location,
+                                actual_val,
+                                expected_ty.make_pointer(),
+                            );
+                            new_val.dereference(self.location).to_rvalue()
+                        } else {
+                            actual_val.dereference(self.location).to_rvalue()
+                        }
                    } else {
                        assert!(
-                            !((actual_ty.is_vector() && !expected_ty.is_vector())
-                                || (!actual_ty.is_vector() && expected_ty.is_vector())),
+                            (!expected_ty.is_vector() || actual_ty.is_vector())
+                                && (expected_ty.is_vector() || !actual_ty.is_vector()),
                            "{:?} ({}) -> {:?} ({}), index: {:?}[{}]",
                            actual_ty,
                            actual_ty.is_vector(),
@ -277,8 +291,8 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
            .collect();

        // NOTE: to take into account variadic functions.
-        for i in casted_args.len()..args.len() {
-            casted_args.push(args[i]);
+        for arg in args.iter().skip(casted_args.len()) {
+            casted_args.push(*arg);
        }

        Cow::Owned(casted_args)
@ -353,7 +367,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
            let function_address_names = self.function_address_names.borrow();
            let original_function_name = function_address_names.get(&func_ptr);
            llvm::adjust_intrinsic_arguments(
-                &self,
+                self,
                gcc_func,
                args.into(),
                &func_name,
@ -361,7 +375,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
            )
        };
        let args_adjusted = args.len() != previous_arg_count;
-        let args = self.check_ptr_call("call", func_ptr, &*args);
+        let args = self.check_ptr_call("call", func_ptr, &args);

        // gccjit requires to use the result of functions, even when it's not used.
        // That's why we assign the result to a local or call add_eval().
@ -373,7 +387,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
            unsafe { RETURN_VALUE_COUNT += 1 };
            let return_value = self.cx.context.new_call_through_ptr(self.location, func_ptr, &args);
            let return_value = llvm::adjust_intrinsic_return_value(
-                &self,
+                self,
                return_value,
                &func_name,
                &args,
@ -441,7 +455,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
        self.block.add_assignment(
            self.location,
            result,
-            self.cx.context.new_call(self.location, func, &args),
+            self.cx.context.new_call(self.location, func, args),
        );
        result.to_rvalue()
    }
@ -596,7 +610,7 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
    ) -> RValue<'gcc> {
        let try_block = self.current_func().new_block("try");

-        let current_block = self.block.clone();
+        let current_block = self.block;
        self.block = try_block;
        let call = self.call(typ, fn_attrs, None, func, args, None, instance); // TODO(antoyo): use funclet here?
        self.block = current_block;
@ -630,8 +644,9 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
        then: Block<'gcc>,
        catch: Block<'gcc>,
        _funclet: Option<&Funclet>,
+        instance: Option<Instance<'tcx>>,
    ) -> RValue<'gcc> {
-        let call_site = self.call(typ, fn_attrs, None, func, args, None);
+        let call_site = self.call(typ, fn_attrs, None, func, args, None, instance);
        let condition = self.context.new_rvalue_from_int(self.bool_type, 1);
        self.llbb().end_with_conditional(self.location, condition, then, catch);
        if let Some(_fn_abi) = fn_abi {
@ -749,6 +764,24 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
            // FIXME(antoyo): this seems to produce the wrong result.
            return self.context.new_call(self.location, fmodf, &[a, b]);
        }
+
+        #[cfg(feature = "master")]
+        match self.cx.type_kind(a_type) {
+            TypeKind::Half | TypeKind::Float => {
+                let fmodf = self.context.get_builtin_function("fmodf");
+                return self.context.new_call(self.location, fmodf, &[a, b]);
+            }
+            TypeKind::Double => {
+                let fmod = self.context.get_builtin_function("fmod");
+                return self.context.new_call(self.location, fmod, &[a, b]);
+            }
+            TypeKind::FP128 => {
+                let fmodl = self.context.get_builtin_function("fmodl");
+                return self.context.new_call(self.location, fmodl, &[a, b]);
+            }
+            _ => (),
+        }
+
        if let Some(vector_type) = a_type_unqualified.dyncast_vector() {
            assert_eq!(a_type_unqualified, b.get_type().unqualified());

@ -903,11 +936,7 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
        // TODO(antoyo): It might be better to return a LValue, but fixing the rustc API is non-trivial.
        self.stack_var_count.set(self.stack_var_count.get() + 1);
        self.current_func()
-            .new_local(
-                self.location,
-                ty,
-                &format!("stack_var_{}", self.stack_var_count.get()),
-            )
+            .new_local(self.location, ty, &format!("stack_var_{}", self.stack_var_count.get()))
            .get_address(self.location)
    }

@ -993,7 +1022,7 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
            }
        }

-        let val = if let Some(_) = place.val.llextra {
+        let val = if place.val.llextra.is_some() {
            // FIXME: Merge with the `else` below?
            OperandValue::Ref(place.val)
        } else if place.layout.is_gcc_immediate() {
@ -1125,7 +1154,7 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
        // the following cast is required to avoid this error:
        // gcc_jit_context_new_call: mismatching types for argument 2 of function "__atomic_store_4": assignment to param arg1 (type: int) from loadedValue3577 (type: unsigned int  __attribute__((aligned(4))))
        let int_type = atomic_store.get_param(1).to_rvalue().get_type();
-        let value = self.context.new_cast(self.location, value, int_type);
+        let value = self.context.new_bitcast(self.location, value, int_type);
        self.llbb().add_eval(
            self.location,
            self.context.new_call(self.location, atomic_store, &[ptr, value, ordering]),
@ -1172,7 +1201,7 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
        // NOTE: due to opaque pointers now being used, we need to cast here.
        let ptr = self.context.new_cast(self.location, ptr, typ.make_pointer());
        // NOTE: array indexing is always considered in bounds in GCC (TODO(antoyo): to be verified).
-        let mut indices = indices.into_iter();
+        let mut indices = indices.iter();
        let index = indices.next().expect("first index in inbounds_gep");
        let mut result = self.context.new_array_access(self.location, ptr, *index);
        for index in indices {
@ -1589,7 +1618,7 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
        src: RValue<'gcc>,
        order: AtomicOrdering,
    ) -> RValue<'gcc> {
-        let size = src.get_type().get_size();
+        let size = get_maybe_pointer_size(src);
        let name = match op {
            AtomicRmwBinOp::AtomicXchg => format!("__atomic_exchange_{}", size),
            AtomicRmwBinOp::AtomicAdd => format!("__atomic_fetch_add_{}", size),
@ -1620,7 +1649,7 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
        let dst = self.context.new_cast(self.location, dst, volatile_void_ptr_type);
        // FIXME(antoyo): not sure why, but we have the wrong type here.
        let new_src_type = atomic_function.get_param(1).to_rvalue().get_type();
-        let src = self.context.new_cast(self.location, src, new_src_type);
+        let src = self.context.new_bitcast(self.location, src, new_src_type);
        let res = self.context.new_call(self.location, atomic_function, &[dst, src, order]);
        self.context.new_cast(self.location, res, src.get_type())
    }
@ -1661,7 +1690,7 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
        _instance: Option<Instance<'tcx>>,
    ) -> RValue<'gcc> {
        // FIXME(antoyo): remove when having a proper API.
-        let gcc_func = unsafe { std::mem::transmute(func) };
+        let gcc_func = unsafe { std::mem::transmute::<RValue<'gcc>, Function<'gcc>>(func) };
        let call = if self.functions.borrow().values().any(|value| *value == gcc_func) {
            self.function_call(func, args, funclet)
        } else {
@ -1676,11 +1705,6 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {

    fn zext(&mut self, value: RValue<'gcc>, dest_typ: Type<'gcc>) -> RValue<'gcc> {
        // FIXME(antoyo): this does not zero-extend.
-        if value.get_type().is_bool() && dest_typ.is_i8(&self.cx) {
-            // FIXME(antoyo): hack because base::from_immediate converts i1 to i8.
-            // Fix the code in codegen_ssa::base::from_immediate.
-            return value;
-        }
        self.gcc_int_cast(value, dest_typ)
    }

@ -2049,7 +2073,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
                self.context.new_rvalue_from_vector(self.location, mask_type, &vector_elements);
            let shifted = self.context.new_rvalue_vector_perm(self.location, res, res, mask);
            shift *= 2;
-            res = op(res, shifted, &self.context);
+            res = op(res, shifted, self.context);
        }
        self.context
            .new_vector_access(self.location, res, self.context.new_rvalue_zero(self.int_type))
@ -2065,7 +2089,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
    }

    pub fn vector_reduce_op(&mut self, src: RValue<'gcc>, op: BinaryOp) -> RValue<'gcc> {
-        let loc = self.location.clone();
+        let loc = self.location;
        self.vector_reduce(src, |a, b, context| context.new_binary_op(loc, op, a.get_type(), a, b))
    }

@ -2082,7 +2106,6 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
        let vector_type = src.get_type().unqualified().dyncast_vector().expect("vector type");
        let element_count = vector_type.get_num_units();
        (0..element_count)
-            .into_iter()
            .map(|i| {
                self.context
                    .new_vector_access(
@ -2113,7 +2136,6 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
        let vector_type = src.get_type().unqualified().dyncast_vector().expect("vector type");
        let element_count = vector_type.get_num_units();
        (0..element_count)
-            .into_iter()
            .map(|i| {
                self.context
                    .new_vector_access(
@ -2133,7 +2155,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {

    // Inspired by Hacker's Delight min implementation.
    pub fn vector_reduce_min(&mut self, src: RValue<'gcc>) -> RValue<'gcc> {
-        let loc = self.location.clone();
+        let loc = self.location;
        self.vector_reduce(src, |a, b, context| {
            let differences_or_zeros = difference_or_zero(loc, a, b, context);
            context.new_binary_op(loc, BinaryOp::Plus, b.get_type(), b, differences_or_zeros)
@ -2142,7 +2164,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {

    // Inspired by Hacker's Delight max implementation.
    pub fn vector_reduce_max(&mut self, src: RValue<'gcc>) -> RValue<'gcc> {
-        let loc = self.location.clone();
+        let loc = self.location;
        self.vector_reduce(src, |a, b, context| {
            let differences_or_zeros = difference_or_zero(loc, a, b, context);
            context.new_binary_op(loc, BinaryOp::Minus, a.get_type(), a, differences_or_zeros)
@ -2337,7 +2359,13 @@ impl<'tcx> HasParamEnv<'tcx> for Builder<'_, '_, 'tcx> {

 impl<'tcx> HasTargetSpec for Builder<'_, '_, 'tcx> {
    fn target_spec(&self) -> &Target {
-        &self.cx.target_spec()
+        self.cx.target_spec()
+    }
+}
+
+impl<'tcx> HasWasmCAbiOpt for Builder<'_, '_, 'tcx> {
+    fn wasm_c_abi_opt(&self) -> WasmCAbi {
+        self.cx.wasm_c_abi_opt()
    }
 }

@ -2422,3 +2450,19 @@ impl ToGccOrdering for AtomicOrdering {
        ordering as i32
    }
 }
+
+// Needed because gcc 12 `get_size()` doesn't work on pointers.
+#[cfg(feature = "master")]
+fn get_maybe_pointer_size(value: RValue<'_>) -> u32 {
+    value.get_type().get_size()
+}
+
+#[cfg(not(feature = "master"))]
+fn get_maybe_pointer_size(value: RValue<'_>) -> u32 {
+    let type_ = value.get_type();
+    if type_.get_pointee().is_some() {
+        std::mem::size_of::<*const ()>() as _
+    } else {
+        type_.get_size()
+    }
+}
--- a/compiler/rustc_codegen_gcc/src/callee.rs
+++ b/compiler/rustc_codegen_gcc/src/callee.rs
@ -28,7 +28,7 @@ pub fn get_fn<'gcc, 'tcx>(cx: &CodegenCx<'gcc, 'tcx>, instance: Instance<'tcx>)

    let fn_abi = cx.fn_abi_of_instance(instance, ty::List::empty());

-    let func = if let Some(_func) = cx.get_declared_value(&sym) {
+    let func = if let Some(_func) = cx.get_declared_value(sym) {
        // FIXME(antoyo): we never reach this because get_declared_value only returns global variables
        // and here we try to get a function.
        unreachable!();
@ -68,7 +68,7 @@ pub fn get_fn<'gcc, 'tcx>(cx: &CodegenCx<'gcc, 'tcx>, instance: Instance<'tcx>)
        }*/
    } else {
        cx.linkage.set(FunctionType::Extern);
-        let func = cx.declare_fn(&sym, &fn_abi);
+        let func = cx.declare_fn(sym, fn_abi);

        attributes::from_fn_attrs(cx, func, instance);

--- a/compiler/rustc_codegen_gcc/src/common.rs
+++ b/compiler/rustc_codegen_gcc/src/common.rs
@ -21,7 +21,7 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {

    fn global_string(&self, string: &str) -> LValue<'gcc> {
        // TODO(antoyo): handle non-null-terminated strings.
-        let string = self.context.new_string_literal(&*string);
+        let string = self.context.new_string_literal(string);
        let sym = self.generate_local_symbol_name("str");
        let global = self.declare_private_global(&sym, self.val_ty(string));
        global.global_set_initializer_rvalue(string);
@ -187,7 +187,8 @@ impl<'gcc, 'tcx> ConstMethods<'tcx> for CodegenCx<'gcc, 'tcx> {
                    return self
                        .context
                        .new_rvalue_from_double(ty, f32::from_bits(data as u32) as f64);
-                } else if ty == self.double_type {
+                }
+                if ty == self.double_type {
                    return self.context.new_rvalue_from_double(ty, f64::from_bits(data as u64));
                }

@ -297,7 +298,7 @@ impl<'gcc, 'tcx> SignType<'gcc, 'tcx> for Type<'gcc> {
        } else if self.is_ulonglong(cx) {
            cx.longlong_type
        } else {
-            self.clone()
+            *self
        }
    }

@ -323,7 +324,7 @@ impl<'gcc, 'tcx> SignType<'gcc, 'tcx> for Type<'gcc> {
        } else if self.is_longlong(cx) {
            cx.ulonglong_type
        } else {
-            self.clone()
+            *self
        }
    }
 }
@ -436,7 +437,7 @@ impl<'gcc, 'tcx> TypeReflection<'gcc, 'tcx> for Type<'gcc> {
    }

    fn is_vector(&self) -> bool {
-        let mut typ = self.clone();
+        let mut typ = *self;
        loop {
            if typ.dyncast_vector().is_some() {
                return true;
--- a/compiler/rustc_codegen_gcc/src/consts.rs
+++ b/compiler/rustc_codegen_gcc/src/consts.rs
@ -1,15 +1,16 @@
 #[cfg(feature = "master")]
 use gccjit::{FnAttribute, VarAttribute, Visibility};
-use gccjit::{Function, GlobalKind, LValue, RValue, ToRValue};
-use rustc_codegen_ssa::traits::{BaseTypeMethods, ConstMethods, DerivedTypeMethods, StaticMethods};
+use gccjit::{Function, GlobalKind, LValue, RValue, ToRValue, Type};
+use rustc_codegen_ssa::traits::{BaseTypeMethods, ConstMethods, StaticMethods};
+use rustc_hir::def::DefKind;
+use rustc_middle::bug;
 use rustc_middle::middle::codegen_fn_attrs::{CodegenFnAttrFlags, CodegenFnAttrs};
 use rustc_middle::mir::interpret::{
    self, read_target_uint, ConstAllocation, ErrorHandled, Scalar as InterpScalar,
 };
-use rustc_middle::mir::mono::MonoItem;
 use rustc_middle::span_bug;
 use rustc_middle::ty::layout::LayoutOf;
-use rustc_middle::ty::{self, Instance, Ty};
+use rustc_middle::ty::{self, Instance};
 use rustc_span::def_id::DefId;
 use rustc_target::abi::{self, Align, HasDataLayout, Primitive, Size, WrappingRange};

@ -63,16 +64,15 @@ impl<'gcc, 'tcx> StaticMethods for CodegenCx<'gcc, 'tcx> {
        global_value
    }

+    #[cfg_attr(not(feature = "master"), allow(unused_mut))]
    fn codegen_static(&self, def_id: DefId) {
        let attrs = self.tcx.codegen_fn_attrs(def_id);

-        let value = match codegen_static_initializer(&self, def_id) {
-            Ok((value, _)) => value,
+        let Ok((value, alloc)) = codegen_static_initializer(self, def_id) else {
            // Error has already been reported
-            Err(_) => return,
+            return;
        };
-
-        let global = self.get_static(def_id);
+        let alloc = alloc.inner();

        // boolean SSA values are i1, but they have to be stored in i8 slots,
        // otherwise some LLVM optimization passes don't work as expected
@ -81,23 +81,25 @@ impl<'gcc, 'tcx> StaticMethods for CodegenCx<'gcc, 'tcx> {
            unimplemented!();
        };

-        let instance = Instance::mono(self.tcx, def_id);
-        let ty = instance.ty(self.tcx, ty::ParamEnv::reveal_all());
-        let gcc_type = self.layout_of(ty).gcc_type(self);
+        let is_thread_local = attrs.flags.contains(CodegenFnAttrFlags::THREAD_LOCAL);
+        let global = self.get_static_inner(def_id, val_llty);

-        set_global_alignment(self, global, self.align_of(ty));
+        #[cfg(feature = "master")]
+        if global.to_rvalue().get_type() != val_llty {
+            global.to_rvalue().set_type(val_llty);
+        }
+        set_global_alignment(self, global, alloc.align);

-        let value = self.bitcast_if_needed(value, gcc_type);
        global.global_set_initializer_rvalue(value);

        // As an optimization, all shared statics which do not have interior
        // mutability are placed into read-only memory.
-        if !self.tcx.static_mutability(def_id).unwrap().is_mut() && self.type_is_freeze(ty) {
+        if alloc.mutability.is_not() {
            #[cfg(feature = "master")]
            global.global_set_readonly();
        }

-        if attrs.flags.contains(CodegenFnAttrFlags::THREAD_LOCAL) {
+        if is_thread_local {
            // Do not allow LLVM to change the alignment of a TLS on macOS.
            //
            // By default a global's alignment can be freely increased.
@ -205,35 +207,49 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {

    pub fn get_static(&self, def_id: DefId) -> LValue<'gcc> {
        let instance = Instance::mono(self.tcx, def_id);
-        let fn_attrs = self.tcx.codegen_fn_attrs(def_id);
+        let DefKind::Static { nested, .. } = self.tcx.def_kind(def_id) else { bug!() };
+        // Nested statics do not have a type, so pick a random type and let `define_static` figure out
+        // the gcc type from the actual evaluated initializer.
+        let gcc_type = if nested {
+            self.type_i8()
+        } else {
+            let ty = instance.ty(self.tcx, ty::ParamEnv::reveal_all());
+            self.layout_of(ty).gcc_type(self)
+        };
+
+        self.get_static_inner(def_id, gcc_type)
+    }
+
+    pub(crate) fn get_static_inner(&self, def_id: DefId, gcc_type: Type<'gcc>) -> LValue<'gcc> {
+        let instance = Instance::mono(self.tcx, def_id);
        if let Some(&global) = self.instances.borrow().get(&instance) {
+            trace!("used cached value");
            return global;
        }

-        let defined_in_current_codegen_unit =
-            self.codegen_unit.items().contains_key(&MonoItem::Static(def_id));
-        assert!(
-            !defined_in_current_codegen_unit,
-            "consts::get_static() should always hit the cache for \
-                 statics defined in the same CGU, but did not for `{:?}`",
-            def_id
-        );
-
-        let ty = instance.ty(self.tcx, ty::ParamEnv::reveal_all());
+        // FIXME: Once we stop removing globals in `codegen_static`, we can uncomment this code.
+        // let defined_in_current_codegen_unit =
+        //     self.codegen_unit.items().contains_key(&MonoItem::Static(def_id));
+        // assert!(
+        //     !defined_in_current_codegen_unit,
+        //     "consts::get_static() should always hit the cache for \
+        //          statics defined in the same CGU, but did not for `{:?}`",
+        //     def_id
+        // );
        let sym = self.tcx.symbol_name(instance).name;
+        let fn_attrs = self.tcx.codegen_fn_attrs(def_id);

        let global = if def_id.is_local() && !self.tcx.is_foreign_item(def_id) {
-            let llty = self.layout_of(ty).gcc_type(self);
            if let Some(global) = self.get_declared_value(sym) {
-                if self.val_ty(global) != self.type_ptr_to(llty) {
+                if self.val_ty(global) != self.type_ptr_to(gcc_type) {
                    span_bug!(self.tcx.def_span(def_id), "Conflicting types for static");
                }
            }

            let is_tls = fn_attrs.flags.contains(CodegenFnAttrFlags::THREAD_LOCAL);
            let global = self.declare_global(
-                &sym,
-                llty,
+                sym,
+                gcc_type,
                GlobalKind::Exported,
                is_tls,
                fn_attrs.link_section,
@ -246,7 +262,7 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {

            global
        } else {
-            check_and_apply_linkage(&self, &fn_attrs, ty, sym)
+            check_and_apply_linkage(self, fn_attrs, gcc_type, sym)
        };

        if !def_id.is_local() {
@ -360,18 +376,14 @@ fn codegen_static_initializer<'gcc, 'tcx>(
 fn check_and_apply_linkage<'gcc, 'tcx>(
    cx: &CodegenCx<'gcc, 'tcx>,
    attrs: &CodegenFnAttrs,
-    ty: Ty<'tcx>,
+    gcc_type: Type<'gcc>,
    sym: &str,
 ) -> LValue<'gcc> {
    let is_tls = attrs.flags.contains(CodegenFnAttrFlags::THREAD_LOCAL);
-    let gcc_type = cx.layout_of(ty).gcc_type(cx);
    if let Some(linkage) = attrs.import_linkage {
        // Declare a symbol `foo` with the desired linkage.
-        let global1 = cx.declare_global_with_linkage(
-            &sym,
-            cx.type_i8(),
-            base::global_linkage_to_gcc(linkage),
-        );
+        let global1 =
+            cx.declare_global_with_linkage(sym, cx.type_i8(), base::global_linkage_to_gcc(linkage));

        // Declare an internal global `extern_with_linkage_foo` which
        // is initialized with the address of `foo`.  If `foo` is
@ -380,7 +392,7 @@ fn check_and_apply_linkage<'gcc, 'tcx>(
        // `extern_with_linkage_foo` will instead be initialized to
        // zero.
        let mut real_name = "_rust_extern_with_linkage_".to_string();
-        real_name.push_str(&sym);
+        real_name.push_str(sym);
        let global2 = cx.define_global(&real_name, gcc_type, is_tls, attrs.link_section);
        // TODO(antoyo): set linkage.
        let value = cx.const_ptrcast(global1.get_address(None), gcc_type);
@ -397,6 +409,6 @@ fn check_and_apply_linkage<'gcc, 'tcx>(
        // don't do this then linker errors can be generated where the linker
        // complains that one object files has a thread local version of the
        // symbol and another one doesn't.
-        cx.declare_global(&sym, gcc_type, GlobalKind::Imported, is_tls, attrs.link_section)
+        cx.declare_global(sym, gcc_type, GlobalKind::Imported, is_tls, attrs.link_section)
    }
 }
--- a/compiler/rustc_codegen_gcc/src/context.rs
+++ b/compiler/rustc_codegen_gcc/src/context.rs
@ -68,6 +68,10 @@ pub struct CodegenCx<'gcc, 'tcx> {
    pub sizet_type: Type<'gcc>,

    pub supports_128bit_integers: bool,
+    pub supports_f16_type: bool,
+    pub supports_f32_type: bool,
+    pub supports_f64_type: bool,
+    pub supports_f128_type: bool,

    pub float_type: Type<'gcc>,
    pub double_type: Type<'gcc>,
@ -110,7 +114,7 @@ pub struct CodegenCx<'gcc, 'tcx> {
    local_gen_sym_counter: Cell<usize>,

    eh_personality: Cell<Option<RValue<'gcc>>>,
-    #[cfg(feature="master")]
+    #[cfg(feature = "master")]
    pub rust_try_fn: Cell<Option<(Type<'gcc>, Function<'gcc>)>>,

    pub pointee_infos: RefCell<FxHashMap<(Ty<'tcx>, Size), Option<PointeeInfo>>>,
@ -122,16 +126,21 @@ pub struct CodegenCx<'gcc, 'tcx> {
    /// FIXME(antoyo): fix the rustc API to avoid having this hack.
    pub structs_as_pointer: RefCell<FxHashSet<RValue<'gcc>>>,

-    #[cfg(feature="master")]
+    #[cfg(feature = "master")]
    pub cleanup_blocks: RefCell<FxHashSet<Block<'gcc>>>,
 }

 impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
+    #[allow(clippy::too_many_arguments)]
    pub fn new(
        context: &'gcc Context<'gcc>,
        codegen_unit: &'tcx CodegenUnit<'tcx>,
        tcx: TyCtxt<'tcx>,
        supports_128bit_integers: bool,
+        supports_f16_type: bool,
+        supports_f32_type: bool,
+        supports_f64_type: bool,
+        supports_f128_type: bool,
    ) -> Self {
        let create_type = |ctype, rust_type| {
            let layout = tcx.layout_of(ParamEnv::reveal_all().and(rust_type)).unwrap();
@ -304,6 +313,10 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
            sizet_type,

            supports_128bit_integers,
+            supports_f16_type,
+            supports_f32_type,
+            supports_f64_type,
+            supports_f128_type,

            float_type,
            double_type,
@ -324,11 +337,11 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
            struct_types: Default::default(),
            local_gen_sym_counter: Cell::new(0),
            eh_personality: Cell::new(None),
-            #[cfg(feature="master")]
+            #[cfg(feature = "master")]
            rust_try_fn: Cell::new(None),
            pointee_infos: Default::default(),
            structs_as_pointer: Default::default(),
-            #[cfg(feature="master")]
+            #[cfg(feature = "master")]
            cleanup_blocks: Default::default(),
        };
        // TODO(antoyo): instead of doing this, add SsizeT to libgccjit.
@ -385,7 +398,7 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
    }

    pub fn sess(&self) -> &'tcx Session {
-        &self.tcx.sess
+        self.tcx.sess
    }

    pub fn bitcast_if_needed(
@ -432,7 +445,9 @@ impl<'gcc, 'tcx> MiscMethods<'tcx> for CodegenCx<'gcc, 'tcx> {
        let func_name = self.tcx.symbol_name(instance).name;

        let func = if self.intrinsics.borrow().contains_key(func_name) {
-            self.intrinsics.borrow()[func_name].clone()
+            self.intrinsics.borrow()[func_name]
+        } else if let Some(variable) = self.get_declared_value(func_name) {
+            return variable;
        } else {
            get_fn(self, instance)
        };
@ -485,7 +500,7 @@ impl<'gcc, 'tcx> MiscMethods<'tcx> for CodegenCx<'gcc, 'tcx> {
                let symbol_name = tcx.symbol_name(instance).name;
                let fn_abi = self.fn_abi_of_instance(instance, ty::List::empty());
                self.linkage.set(FunctionType::Extern);
-                let func = self.declare_fn(symbol_name, &fn_abi);
+                let func = self.declare_fn(symbol_name, fn_abi);
                let func: RValue<'gcc> = unsafe { std::mem::transmute(func) };
                func
            }
@ -496,7 +511,7 @@ impl<'gcc, 'tcx> MiscMethods<'tcx> for CodegenCx<'gcc, 'tcx> {
                    "rust_eh_personality"
                };
                let func = self.declare_func(name, self.type_i32(), &[], true);
-                unsafe { std::mem::transmute(func) }
+                unsafe { std::mem::transmute::<Function<'gcc>, RValue<'gcc>>(func) }
            }
        };
        // TODO(antoyo): apply target cpu attributes.
@ -505,7 +520,7 @@ impl<'gcc, 'tcx> MiscMethods<'tcx> for CodegenCx<'gcc, 'tcx> {
    }

    fn sess(&self) -> &Session {
-        &self.tcx.sess
+        self.tcx.sess
    }

    fn codegen_unit(&self) -> &'tcx CodegenUnit<'tcx> {
@ -522,7 +537,7 @@ impl<'gcc, 'tcx> MiscMethods<'tcx> for CodegenCx<'gcc, 'tcx> {

    fn declare_c_main(&self, fn_type: Self::Type) -> Option<Self::Function> {
        let entry_name = self.sess().target.entry_name.as_ref();
-        if self.get_declared_value(entry_name).is_none() {
+        if !self.functions.borrow().contains_key(entry_name) {
            Some(self.declare_entry_fn(entry_name, fn_type, ()))
        } else {
            // If the symbol already exists, it is an error: for example, the user wrote
@ -614,7 +629,7 @@ impl<'b, 'tcx> CodegenCx<'b, 'tcx> {
        // user defined names
        let mut name = String::with_capacity(prefix.len() + 6);
        name.push_str(prefix);
-        name.push_str(".");
+        name.push('.');
        name.push_str(&(idx as u64).to_base(ALPHANUMERIC_ONLY));
        name
    }
--- a/compiler/rustc_codegen_gcc/src/debuginfo.rs
+++ b/compiler/rustc_codegen_gcc/src/debuginfo.rs
@ -90,7 +90,7 @@ fn compute_mir_scopes<'gcc, 'tcx>(
 /// FIXME(tempdragon/?): Add Scope Support Here.
 fn make_mir_scope<'gcc, 'tcx>(
    cx: &CodegenCx<'gcc, 'tcx>,
-    instance: Instance<'tcx>,
+    _instance: Instance<'tcx>,
    mir: &Body<'tcx>,
    variables: &Option<BitSet<SourceScope>>,
    debug_context: &mut FunctionDebugContext<'tcx, (), Location<'gcc>>,
@ -103,7 +103,7 @@ fn make_mir_scope<'gcc, 'tcx>(

    let scope_data = &mir.source_scopes[scope];
    let parent_scope = if let Some(parent) = scope_data.parent_scope {
-        make_mir_scope(cx, instance, mir, variables, debug_context, instantiated, parent);
+        make_mir_scope(cx, _instance, mir, variables, debug_context, instantiated, parent);
        debug_context.scopes[parent]
    } else {
        // The root is the function itself.
@ -117,7 +117,7 @@ fn make_mir_scope<'gcc, 'tcx>(
        return;
    };

-    if let Some(vars) = variables {
+    if let Some(ref vars) = *variables {
        if !vars.contains(scope) && scope_data.inlined.is_none() {
            // Do not create a DIScope if there are no variables defined in this
            // MIR `SourceScope`, and it's not `inlined`, to avoid debuginfo bloat.
@ -135,8 +135,14 @@ fn make_mir_scope<'gcc, 'tcx>(
    let inlined_at = scope_data.inlined.map(|(_, callsite_span)| {
        // FIXME(eddyb) this doesn't account for the macro-related
        // `Span` fixups that `rustc_codegen_ssa::mir::debuginfo` does.
-        let callsite_scope = parent_scope.adjust_dbg_scope_for_span(cx, callsite_span);
-        cx.dbg_loc(callsite_scope, parent_scope.inlined_at, callsite_span)
+
+        // TODO(tempdragon): Add scope support and then revert to cg_llvm version of this closure
+        // NOTE: These variables passed () here.
+        // Changed to comply to clippy.
+
+        /* let callsite_scope =  */
+        parent_scope.adjust_dbg_scope_for_span(cx, callsite_span);
+        cx.dbg_loc(/* callsite_scope */ (), parent_scope.inlined_at, callsite_span)
    });
    let p_inlined_at = parent_scope.inlined_at;
    // TODO(tempdragon): dbg_scope: Add support for scope extension here.
@ -224,7 +230,7 @@ impl<'gcc, 'tcx> DebugInfoMethods<'tcx> for CodegenCx<'gcc, 'tcx> {
            file_end_pos: BytePos(0),
        };
        let mut fn_debug_context = FunctionDebugContext {
-            scopes: IndexVec::from_elem(empty_scope, &mir.source_scopes.as_slice()),
+            scopes: IndexVec::from_elem(empty_scope, mir.source_scopes.as_slice()),
            inlined_function_scopes: Default::default(),
        };

@ -273,16 +279,19 @@ impl<'gcc, 'tcx> DebugInfoMethods<'tcx> for CodegenCx<'gcc, 'tcx> {
    ) -> Self::DILocation {
        let pos = span.lo();
        let DebugLoc { file, line, col } = self.lookup_debug_loc(pos);
-        let loc = match &file.name {
-            rustc_span::FileName::Real(name) => match name {
-                rustc_span::RealFileName::LocalPath(name) => {
+        let loc = match file.name {
+            rustc_span::FileName::Real(ref name) => match *name {
+                rustc_span::RealFileName::LocalPath(ref name) => {
                    if let Some(name) = name.to_str() {
                        self.context.new_location(name, line as i32, col as i32)
                    } else {
                        Location::null()
                    }
                }
-                rustc_span::RealFileName::Remapped { local_path, virtual_name: _ } => {
+                rustc_span::RealFileName::Remapped {
+                    ref local_path,
+                    virtual_name: ref _unused,
+                } => {
                    if let Some(name) = local_path.as_ref() {
                        if let Some(name) = name.to_str() {
                            self.context.new_location(name, line as i32, col as i32)
--- a/compiler/rustc_codegen_gcc/src/declare.rs
+++ b/compiler/rustc_codegen_gcc/src/declare.rs
@ -35,7 +35,7 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {

    pub fn declare_unnamed_global(&self, ty: Type<'gcc>) -> LValue<'gcc> {
        let name = self.generate_local_symbol_name("global");
-        self.context.new_global(None, GlobalKind::Internal, ty, &name)
+        self.context.new_global(None, GlobalKind::Internal, ty, name)
    }

    pub fn declare_global_with_linkage(
@ -176,16 +176,14 @@ fn declare_raw_fn<'gcc>(
        cx.functions.borrow()[name]
    } else {
        let params: Vec<_> = param_types
-            .into_iter()
+            .iter()
            .enumerate()
-            .map(|(index, param)| {
-                cx.context.new_parameter(None, *param, &format!("param{}", index))
-            }) // TODO(antoyo): set name.
+            .map(|(index, param)| cx.context.new_parameter(None, *param, format!("param{}", index))) // TODO(antoyo): set name.
            .collect();
        #[cfg(not(feature = "master"))]
-        let name = mangle_name(name);
+        let name = &mangle_name(name);
        let func =
-            cx.context.new_function(None, cx.linkage.get(), return_type, &params, &name, variadic);
+            cx.context.new_function(None, cx.linkage.get(), return_type, &params, name, variadic);
        cx.functions.borrow_mut().insert(name.to_string(), func);

        #[cfg(feature = "master")]
@ -200,10 +198,10 @@ fn declare_raw_fn<'gcc>(
            // create a wrapper function that calls rust_eh_personality.

            let params: Vec<_> = param_types
-                .into_iter()
+                .iter()
                .enumerate()
                .map(|(index, param)| {
-                    cx.context.new_parameter(None, *param, &format!("param{}", index))
+                    cx.context.new_parameter(None, *param, format!("param{}", index))
                }) // TODO(antoyo): set name.
                .collect();
            let gcc_func = cx.context.new_function(
--- a/compiler/rustc_codegen_gcc/src/int.rs
+++ b/compiler/rustc_codegen_gcc/src/int.rs
@ -2,8 +2,6 @@
 //! This module exists because some integer types are not supported on some gcc platforms, e.g.
 //! 128-bit integers on 32-bit platforms and thus require to be handled manually.

-use std::convert::TryFrom;
-
 use gccjit::{BinaryOp, ComparisonOp, FunctionType, Location, RValue, ToRValue, Type, UnaryOp};
 use rustc_codegen_ssa::common::{IntPredicate, TypeKind};
 use rustc_codegen_ssa::traits::{BackendTypes, BaseTypeMethods, BuilderMethods, OverflowOp};
@ -40,7 +38,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
            self.cx.context.new_unary_op(self.location, operation, typ, a)
        } else {
            let element_type = typ.dyncast_array().expect("element type");
-            self.from_low_high_rvalues(
+            self.concat_low_high_rvalues(
                typ,
                self.cx.context.new_unary_op(
                    self.location,
@ -83,7 +81,19 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
                let b = self.context.new_cast(self.location, b, a_type);
                a >> b
            } else {
-                a >> b
+                let a_size = a_type.get_size();
+                let b_size = b_type.get_size();
+                match a_size.cmp(&b_size) {
+                    std::cmp::Ordering::Less => {
+                        let a = self.context.new_cast(self.location, a, b_type);
+                        a >> b
+                    }
+                    std::cmp::Ordering::Equal => a >> b,
+                    std::cmp::Ordering::Greater => {
+                        let b = self.context.new_cast(self.location, b, a_type);
+                        a >> b
+                    }
+                }
            }
        } else if a_type.is_vector() && a_type.is_vector() {
            a >> b
@ -114,7 +124,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
            let shift_value = self.gcc_sub(b, sixty_four);
            let high = self.high(a);
            let sign = if a_type.is_signed(self) { high >> sixty_three } else { zero };
-            let array_value = self.from_low_high_rvalues(a_type, high >> shift_value, sign);
+            let array_value = self.concat_low_high_rvalues(a_type, high >> shift_value, sign);
            then_block.add_assignment(self.location, result, array_value);
            then_block.end_with_jump(self.location, after_block);

@ -126,12 +136,15 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {

            let shift_value = self.gcc_sub(sixty_four, b);
            // NOTE: cast low to its unsigned type in order to perform a logical right shift.
-            let unsigned_type = native_int_type.to_unsigned(&self.cx);
+            let unsigned_type = native_int_type.to_unsigned(self.cx);
            let casted_low = self.context.new_cast(self.location, self.low(a), unsigned_type);
            let shifted_low = casted_low >> self.context.new_cast(self.location, b, unsigned_type);
            let shifted_low = self.context.new_cast(self.location, shifted_low, native_int_type);
-            let array_value =
-                self.from_low_high_rvalues(a_type, (high << shift_value) | shifted_low, high >> b);
+            let array_value = self.concat_low_high_rvalues(
+                a_type,
+                (high << shift_value) | shifted_low,
+                high >> b,
+            );
            actual_else_block.add_assignment(self.location, result, array_value);
            actual_else_block.end_with_jump(self.location, after_block);

@ -255,10 +268,10 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
    ) -> (<Self as BackendTypes>::Value, <Self as BackendTypes>::Value) {
        use rustc_middle::ty::{Int, IntTy::*, Uint, UintTy::*};

-        let new_kind = match typ.kind() {
+        let new_kind = match *typ.kind() {
            Int(t @ Isize) => Int(t.normalize(self.tcx.sess.target.pointer_width)),
            Uint(t @ Usize) => Uint(t.normalize(self.tcx.sess.target.pointer_width)),
-            t @ (Uint(_) | Int(_)) => t.clone(),
+            t @ (Uint(_) | Int(_)) => t,
            _ => panic!("tried to get overflow intrinsic for op applied to non-int type"),
        };

@ -344,7 +357,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
            }
        };

-        let intrinsic = self.context.get_builtin_function(&name);
+        let intrinsic = self.context.get_builtin_function(name);
        let res = self
            .current_func()
            // TODO(antoyo): is it correct to use rhs type instead of the parameter typ?
@ -454,7 +467,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
            let native_int_type = a_type.dyncast_array().expect("get element type");
            // NOTE: cast low to its unsigned type in order to perform a comparison correctly (e.g.
            // the sign is only on high).
-            let unsigned_type = native_int_type.to_unsigned(&self.cx);
+            let unsigned_type = native_int_type.to_unsigned(self.cx);

            let lhs_low = self.context.new_cast(self.location, self.low(lhs), unsigned_type);
            let rhs_low = self.context.new_cast(self.location, self.low(rhs), unsigned_type);
@ -589,7 +602,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
                | IntPredicate::IntULT
                | IntPredicate::IntULE => {
                    if !a_type.is_vector() {
-                        let unsigned_type = a_type.to_unsigned(&self.cx);
+                        let unsigned_type = a_type.to_unsigned(self.cx);
                        lhs = self.context.new_cast(self.location, lhs, unsigned_type);
                        rhs = self.context.new_cast(self.location, rhs, unsigned_type);
                    }
@ -612,7 +625,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
        {
            a ^ b
        } else {
-            self.from_low_high_rvalues(
+            self.concat_low_high_rvalues(
                a_type,
                self.low(a) ^ self.low(b),
                self.high(a) ^ self.high(b),
@ -635,7 +648,19 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
                let b = self.context.new_cast(self.location, b, a_type);
                a << b
            } else {
-                a << b
+                let a_size = a_type.get_size();
+                let b_size = b_type.get_size();
+                match a_size.cmp(&b_size) {
+                    std::cmp::Ordering::Less => {
+                        let a = self.context.new_cast(self.location, a, b_type);
+                        a << b
+                    }
+                    std::cmp::Ordering::Equal => a << b,
+                    std::cmp::Ordering::Greater => {
+                        let b = self.context.new_cast(self.location, b, a_type);
+                        a << b
+                    }
+                }
            }
        } else if a_type.is_vector() && a_type.is_vector() {
            a << b
@ -661,7 +686,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
            self.llbb().end_with_conditional(self.location, condition, then_block, else_block);

            let array_value =
-                self.from_low_high_rvalues(a_type, zero, self.low(a) << (b - sixty_four));
+                self.concat_low_high_rvalues(a_type, zero, self.low(a) << (b - sixty_four));
            then_block.add_assignment(self.location, result, array_value);
            then_block.end_with_jump(self.location, after_block);

@ -673,13 +698,13 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {

            // NOTE: cast low to its unsigned type in order to perform a logical right shift.
            // TODO(antoyo): adjust this ^ comment.
-            let unsigned_type = native_int_type.to_unsigned(&self.cx);
+            let unsigned_type = native_int_type.to_unsigned(self.cx);
            let casted_low = self.context.new_cast(self.location, self.low(a), unsigned_type);
            let shift_value = self.context.new_cast(self.location, sixty_four - b, unsigned_type);
            let high_low =
                self.context.new_cast(self.location, casted_low >> shift_value, native_int_type);

-            let array_value = self.from_low_high_rvalues(
+            let array_value = self.concat_low_high_rvalues(
                a_type,
                self.low(a) << b,
                (self.high(a) << b) | high_low,
@ -708,7 +733,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {

            // NOTE: we also need to swap the two elements here, in addition to swapping inside
            // the elements themselves like done above.
-            return self.from_low_high_rvalues(arg_type, swapped_msb, swapped_lsb);
+            return self.concat_low_high_rvalues(arg_type, swapped_msb, swapped_lsb);
        }

        // TODO(antoyo): check if it's faster to use string literals and a
@ -727,10 +752,10 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
 impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
    pub fn gcc_int(&self, typ: Type<'gcc>, int: i64) -> RValue<'gcc> {
        if self.is_native_int_type_or_bool(typ) {
-            self.context.new_rvalue_from_long(typ, i64::try_from(int).expect("i64::try_from"))
+            self.context.new_rvalue_from_long(typ, int)
        } else {
            // NOTE: set the sign in high.
-            self.from_low_high(typ, int, -(int.is_negative() as i64))
+            self.concat_low_high(typ, int, -(int.is_negative() as i64))
        }
    }

@ -740,10 +765,9 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
            let num = self.context.new_rvalue_from_long(self.u64_type, int as i64);
            self.gcc_int_cast(num, typ)
        } else if self.is_native_int_type_or_bool(typ) {
-            self.context
-                .new_rvalue_from_long(typ, u64::try_from(int).expect("u64::try_from") as i64)
+            self.context.new_rvalue_from_long(typ, int as i64)
        } else {
-            self.from_low_high(typ, int as i64, 0)
+            self.concat_low_high(typ, int as i64, 0)
        }
    }

@ -760,7 +784,7 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
                let shift = high << sixty_four;
                shift | self.context.new_cast(None, low, typ)
            } else {
-                self.from_low_high(typ, low as i64, high as i64)
+                self.concat_low_high(typ, low as i64, high as i64)
            }
        } else if typ.is_i128(self) {
            // FIXME(antoyo): libgccjit cannot create 128-bit values yet.
@ -775,7 +799,7 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
        if self.is_native_int_type_or_bool(typ) {
            self.context.new_rvalue_zero(typ)
        } else {
-            self.from_low_high(typ, 0, 0)
+            self.concat_low_high(typ, 0, 0)
        }
    }

@ -813,7 +837,7 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
                "both types should either be native or non-native for or operation"
            );
            let native_int_type = a_type.dyncast_array().expect("get element type");
-            self.from_low_high_rvalues(
+            self.concat_low_high_rvalues(
                a_type,
                self.context.new_binary_op(
                    loc,
@ -858,7 +882,7 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
            let is_negative =
                self.context.new_comparison(None, ComparisonOp::LessThan, value, zero);
            let is_negative = self.gcc_int_cast(is_negative, dest_element_type);
-            self.from_low_high_rvalues(
+            self.concat_low_high_rvalues(
                dest_typ,
                self.context.new_cast(None, value, dest_element_type),
                self.context.new_unary_op(None, UnaryOp::Minus, dest_element_type, is_negative),
@ -926,7 +950,7 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
            return self.context.new_cast(None, value, dest_typ);
        }

-        debug_assert!(value_type.dyncast_array().is_some());
+        debug_assert!(dest_typ.dyncast_array().is_some());
        let name_suffix = match self.type_kind(value_type) {
            TypeKind::Float => "sfti",
            TypeKind::Double => "dfti",
@ -978,7 +1002,7 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
            .to_rvalue()
    }

-    fn from_low_high_rvalues(
+    fn concat_low_high_rvalues(
        &self,
        typ: Type<'gcc>,
        low: RValue<'gcc>,
@ -993,7 +1017,7 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
        self.context.new_array_constructor(None, typ, &values)
    }

-    fn from_low_high(&self, typ: Type<'gcc>, low: i64, high: i64) -> RValue<'gcc> {
+    fn concat_low_high(&self, typ: Type<'gcc>, low: i64, high: i64) -> RValue<'gcc> {
        let (first, last) = match self.sess().target.options.endian {
            Endian::Little => (low, high),
            Endian::Big => (high, low),
--- a/compiler/rustc_codegen_gcc/src/intrinsic/archs.rs
+++ b/compiler/rustc_codegen_gcc/src/intrinsic/archs.rs
@ -74,6 +74,10 @@ match name {
    "llvm.amdgcn.cvt.sr.bf8.f32" => "__builtin_amdgcn_cvt_sr_bf8_f32",
    "llvm.amdgcn.cvt.sr.fp8.f32" => "__builtin_amdgcn_cvt_sr_fp8_f32",
    "llvm.amdgcn.dispatch.id" => "__builtin_amdgcn_dispatch_id",
+    "llvm.amdgcn.dot4.f32.bf8.bf8" => "__builtin_amdgcn_dot4_f32_bf8_bf8",
+    "llvm.amdgcn.dot4.f32.bf8.fp8" => "__builtin_amdgcn_dot4_f32_bf8_fp8",
+    "llvm.amdgcn.dot4.f32.fp8.bf8" => "__builtin_amdgcn_dot4_f32_fp8_bf8",
+    "llvm.amdgcn.dot4.f32.fp8.fp8" => "__builtin_amdgcn_dot4_f32_fp8_fp8",
    "llvm.amdgcn.ds.add.gs.reg.rtn" => "__builtin_amdgcn_ds_add_gs_reg_rtn",
    "llvm.amdgcn.ds.bpermute" => "__builtin_amdgcn_ds_bpermute",
    "llvm.amdgcn.ds.fadd.v2bf16" => "__builtin_amdgcn_ds_atomic_fadd_v2bf16",
@ -2291,6 +2295,10 @@ match name {
    "llvm.loongarch.csrxchg.d" => "__builtin_loongarch_csrxchg_d",
    "llvm.loongarch.csrxchg.w" => "__builtin_loongarch_csrxchg_w",
    "llvm.loongarch.dbar" => "__builtin_loongarch_dbar",
+    "llvm.loongarch.frecipe.d" => "__builtin_loongarch_frecipe_d",
+    "llvm.loongarch.frecipe.s" => "__builtin_loongarch_frecipe_s",
+    "llvm.loongarch.frsqrte.d" => "__builtin_loongarch_frsqrte_d",
+    "llvm.loongarch.frsqrte.s" => "__builtin_loongarch_frsqrte_s",
    "llvm.loongarch.ibar" => "__builtin_loongarch_ibar",
    "llvm.loongarch.iocsrrd.b" => "__builtin_loongarch_iocsrrd_b",
    "llvm.loongarch.iocsrrd.d" => "__builtin_loongarch_iocsrrd_d",
@ -2529,6 +2537,8 @@ match name {
    "llvm.loongarch.lasx.xvfnmsub.s" => "__builtin_lasx_xvfnmsub_s",
    "llvm.loongarch.lasx.xvfrecip.d" => "__builtin_lasx_xvfrecip_d",
    "llvm.loongarch.lasx.xvfrecip.s" => "__builtin_lasx_xvfrecip_s",
+    "llvm.loongarch.lasx.xvfrecipe.d" => "__builtin_lasx_xvfrecipe_d",
+    "llvm.loongarch.lasx.xvfrecipe.s" => "__builtin_lasx_xvfrecipe_s",
    "llvm.loongarch.lasx.xvfrint.d" => "__builtin_lasx_xvfrint_d",
    "llvm.loongarch.lasx.xvfrint.s" => "__builtin_lasx_xvfrint_s",
    "llvm.loongarch.lasx.xvfrintrm.d" => "__builtin_lasx_xvfrintrm_d",
@ -2541,6 +2551,8 @@ match name {
    "llvm.loongarch.lasx.xvfrintrz.s" => "__builtin_lasx_xvfrintrz_s",
    "llvm.loongarch.lasx.xvfrsqrt.d" => "__builtin_lasx_xvfrsqrt_d",
    "llvm.loongarch.lasx.xvfrsqrt.s" => "__builtin_lasx_xvfrsqrt_s",
+    "llvm.loongarch.lasx.xvfrsqrte.d" => "__builtin_lasx_xvfrsqrte_d",
+    "llvm.loongarch.lasx.xvfrsqrte.s" => "__builtin_lasx_xvfrsqrte_s",
    "llvm.loongarch.lasx.xvfrstp.b" => "__builtin_lasx_xvfrstp_b",
    "llvm.loongarch.lasx.xvfrstp.h" => "__builtin_lasx_xvfrstp_h",
    "llvm.loongarch.lasx.xvfrstpi.b" => "__builtin_lasx_xvfrstpi_b",
@ -3255,6 +3267,8 @@ match name {
    "llvm.loongarch.lsx.vfnmsub.s" => "__builtin_lsx_vfnmsub_s",
    "llvm.loongarch.lsx.vfrecip.d" => "__builtin_lsx_vfrecip_d",
    "llvm.loongarch.lsx.vfrecip.s" => "__builtin_lsx_vfrecip_s",
+    "llvm.loongarch.lsx.vfrecipe.d" => "__builtin_lsx_vfrecipe_d",
+    "llvm.loongarch.lsx.vfrecipe.s" => "__builtin_lsx_vfrecipe_s",
    "llvm.loongarch.lsx.vfrint.d" => "__builtin_lsx_vfrint_d",
    "llvm.loongarch.lsx.vfrint.s" => "__builtin_lsx_vfrint_s",
    "llvm.loongarch.lsx.vfrintrm.d" => "__builtin_lsx_vfrintrm_d",
@ -3267,6 +3281,8 @@ match name {
    "llvm.loongarch.lsx.vfrintrz.s" => "__builtin_lsx_vfrintrz_s",
    "llvm.loongarch.lsx.vfrsqrt.d" => "__builtin_lsx_vfrsqrt_d",
    "llvm.loongarch.lsx.vfrsqrt.s" => "__builtin_lsx_vfrsqrt_s",
+    "llvm.loongarch.lsx.vfrsqrte.d" => "__builtin_lsx_vfrsqrte_d",
+    "llvm.loongarch.lsx.vfrsqrte.s" => "__builtin_lsx_vfrsqrte_s",
    "llvm.loongarch.lsx.vfrstp.b" => "__builtin_lsx_vfrstp_b",
    "llvm.loongarch.lsx.vfrstp.h" => "__builtin_lsx_vfrstp_h",
    "llvm.loongarch.lsx.vfrstpi.b" => "__builtin_lsx_vfrstpi_b",
@ -4434,6 +4450,7 @@ match name {
    "llvm.nvvm.abs.bf16x2" => "__nvvm_abs_bf16x2",
    "llvm.nvvm.abs.i" => "__nvvm_abs_i",
    "llvm.nvvm.abs.ll" => "__nvvm_abs_ll",
+    "llvm.nvvm.activemask" => "__nvvm_activemask",
    "llvm.nvvm.add.rm.d" => "__nvvm_add_rm_d",
    "llvm.nvvm.add.rm.f" => "__nvvm_add_rm_f",
    "llvm.nvvm.add.rm.ftz.f" => "__nvvm_add_rm_ftz_f",
@ -4522,6 +4539,7 @@ match name {
    "llvm.nvvm.ex2.approx.d" => "__nvvm_ex2_approx_d",
    "llvm.nvvm.ex2.approx.f" => "__nvvm_ex2_approx_f",
    "llvm.nvvm.ex2.approx.ftz.f" => "__nvvm_ex2_approx_ftz_f",
+    "llvm.nvvm.exit" => "__nvvm_exit",
    "llvm.nvvm.f2bf16.rn" => "__nvvm_f2bf16_rn",
    "llvm.nvvm.f2bf16.rn.relu" => "__nvvm_f2bf16_rn_relu",
    "llvm.nvvm.f2bf16.rz" => "__nvvm_f2bf16_rz",
@ -4722,8 +4740,11 @@ match name {
    "llvm.nvvm.mul24.ui" => "__nvvm_mul24_ui",
    "llvm.nvvm.mulhi.i" => "__nvvm_mulhi_i",
    "llvm.nvvm.mulhi.ll" => "__nvvm_mulhi_ll",
+    "llvm.nvvm.mulhi.s" => "__nvvm_mulhi_s",
    "llvm.nvvm.mulhi.ui" => "__nvvm_mulhi_ui",
    "llvm.nvvm.mulhi.ull" => "__nvvm_mulhi_ull",
+    "llvm.nvvm.mulhi.us" => "__nvvm_mulhi_us",
+    "llvm.nvvm.nanosleep" => "__nvvm_nanosleep",
    "llvm.nvvm.neg.bf16" => "__nvvm_neg_bf16",
    "llvm.nvvm.neg.bf16x2" => "__nvvm_neg_bf16x2",
    "llvm.nvvm.popc.i" => "__nvvm_popc_i",
@ -4783,6 +4804,7 @@ match name {
    "llvm.nvvm.read.ptx.sreg.envreg7" => "__nvvm_read_ptx_sreg_envreg7",
    "llvm.nvvm.read.ptx.sreg.envreg8" => "__nvvm_read_ptx_sreg_envreg8",
    "llvm.nvvm.read.ptx.sreg.envreg9" => "__nvvm_read_ptx_sreg_envreg9",
+    "llvm.nvvm.read.ptx.sreg.globaltimer" => "__nvvm_read_ptx_sreg_globaltimer",
    "llvm.nvvm.read.ptx.sreg.gridid" => "__nvvm_read_ptx_sreg_gridid",
    // [DUPLICATE]: "llvm.nvvm.read.ptx.sreg.gridid" => "__nvvm_read_ptx_sreg_",
    "llvm.nvvm.read.ptx.sreg.laneid" => "__nvvm_read_ptx_sreg_laneid",
@ -4835,6 +4857,7 @@ match name {
    "llvm.nvvm.redux.sync.umax" => "__nvvm_redux_sync_umax",
    "llvm.nvvm.redux.sync.umin" => "__nvvm_redux_sync_umin",
    "llvm.nvvm.redux.sync.xor" => "__nvvm_redux_sync_xor",
+    "llvm.nvvm.reflect" => "__nvvm_reflect",
    "llvm.nvvm.rotate.b32" => "__nvvm_rotate_b32",
    "llvm.nvvm.rotate.b64" => "__nvvm_rotate_b64",
    "llvm.nvvm.rotate.right.b64" => "__nvvm_rotate_right_b64",
@ -4845,7 +4868,11 @@ match name {
    "llvm.nvvm.rsqrt.approx.f" => "__nvvm_rsqrt_approx_f",
    "llvm.nvvm.rsqrt.approx.ftz.f" => "__nvvm_rsqrt_approx_ftz_f",
    "llvm.nvvm.sad.i" => "__nvvm_sad_i",
+    "llvm.nvvm.sad.ll" => "__nvvm_sad_ll",
+    "llvm.nvvm.sad.s" => "__nvvm_sad_s",
    "llvm.nvvm.sad.ui" => "__nvvm_sad_ui",
+    "llvm.nvvm.sad.ull" => "__nvvm_sad_ull",
+    "llvm.nvvm.sad.us" => "__nvvm_sad_us",
    "llvm.nvvm.saturate.d" => "__nvvm_saturate_d",
    "llvm.nvvm.saturate.f" => "__nvvm_saturate_f",
    "llvm.nvvm.saturate.ftz.f" => "__nvvm_saturate_ftz_f",
@ -5471,6 +5498,7 @@ match name {
    "llvm.ppc.fctiwz" => "__builtin_ppc_fctiwz",
    "llvm.ppc.fctudz" => "__builtin_ppc_fctudz",
    "llvm.ppc.fctuwz" => "__builtin_ppc_fctuwz",
+    "llvm.ppc.fence" => "__builtin_ppc_fence",
    "llvm.ppc.fmaf128.round.to.odd" => "__builtin_fmaf128_round_to_odd",
    "llvm.ppc.fmsub" => "__builtin_ppc_fmsub",
    "llvm.ppc.fmsubs" => "__builtin_ppc_fmsubs",
@ -5599,6 +5627,9 @@ match name {
    "llvm.ppc.qpx.qvstfs" => "__builtin_qpx_qvstfs",
    "llvm.ppc.qpx.qvstfsa" => "__builtin_qpx_qvstfsa",
    "llvm.ppc.readflm" => "__builtin_readflm",
+    "llvm.ppc.rldimi" => "__builtin_ppc_rldimi",
+    "llvm.ppc.rlwimi" => "__builtin_ppc_rlwimi",
+    "llvm.ppc.rlwnm" => "__builtin_ppc_rlwnm",
    "llvm.ppc.scalar.extract.expq" => "__builtin_vsx_scalar_extract_expq",
    "llvm.ppc.scalar.insert.exp.qp" => "__builtin_vsx_scalar_insert_exp_qp",
    "llvm.ppc.set.texasr" => "__builtin_set_texasr",
@ -5912,6 +5943,8 @@ match name {
    "llvm.s390.vupllb" => "__builtin_s390_vupllb",
    "llvm.s390.vupllf" => "__builtin_s390_vupllf",
    "llvm.s390.vupllh" => "__builtin_s390_vupllh",
+    // spv
+    "llvm.spv.create.handle" => "__builtin_hlsl_create_handle",
    // ve
    "llvm.ve.vl.andm.MMM" => "__builtin_ve_vl_andm_MMM",
    "llvm.ve.vl.andm.mmm" => "__builtin_ve_vl_andm_mmm",
--- a/compiler/rustc_codegen_gcc/src/intrinsic/llvm.rs
+++ b/compiler/rustc_codegen_gcc/src/intrinsic/llvm.rs
@ -15,7 +15,7 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(
    // Some LLVM intrinsics do not map 1-to-1 to GCC intrinsics, so we add the missing
    // arguments here.
    if gcc_func.get_param_count() != args.len() {
-        match &*func_name {
+        match func_name {
            // NOTE: the following intrinsics have a different number of parameters in LLVM and GCC.
            "__builtin_ia32_prold512_mask"
            | "__builtin_ia32_pmuldq512_mask"
@ -380,7 +380,7 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(
            _ => (),
        }
    } else {
-        match &*func_name {
+        match func_name {
            "__builtin_ia32_rndscaless_mask_round" | "__builtin_ia32_rndscalesd_mask_round" => {
                let new_args = args.to_vec();
                let arg3_type = gcc_func.get_param_type(2);
@ -629,17 +629,22 @@ pub fn intrinsic<'gcc, 'tcx>(name: &str, cx: &CodegenCx<'gcc, 'tcx>) -> Function

 #[cfg(feature = "master")]
 pub fn intrinsic<'gcc, 'tcx>(name: &str, cx: &CodegenCx<'gcc, 'tcx>) -> Function<'gcc> {
-    match name {
+    let gcc_name = match name {
        "llvm.prefetch" => {
            let gcc_name = "__builtin_prefetch";
            let func = cx.context.get_builtin_function(gcc_name);
            cx.functions.borrow_mut().insert(gcc_name.to_string(), func);
            return func;
        }
-        _ => (),
-    }

-    let gcc_name = match name {
+        "llvm.aarch64.isb" => {
+            // FIXME: GCC doesn't support __builtin_arm_isb yet, check if this builtin is OK.
+            let gcc_name = "__atomic_thread_fence";
+            let func = cx.context.get_builtin_function(gcc_name);
+            cx.functions.borrow_mut().insert(gcc_name.to_string(), func);
+            return func;
+        }
+
        "llvm.x86.xgetbv" => "__builtin_ia32_xgetbv",
        // NOTE: this doc specifies the equivalent GCC builtins: http://huonw.github.io/llvmint/llvmint/x86/index.html
        "llvm.sqrt.v2f64" => "__builtin_ia32_sqrtpd",
--- a/compiler/rustc_codegen_gcc/src/intrinsic/mod.rs
+++ b/compiler/rustc_codegen_gcc/src/intrinsic/mod.rs
@ -91,7 +91,7 @@ fn get_simple_intrinsic<'gcc, 'tcx>(
        sym::abort => "abort",
        _ => return None,
    };
-    Some(cx.context.get_builtin_function(&gcc_name))
+    Some(cx.context.get_builtin_function(gcc_name))
 }

 impl<'a, 'gcc, 'tcx> IntrinsicCallMethods<'tcx> for Builder<'a, 'gcc, 'tcx> {
@ -122,10 +122,17 @@ impl<'a, 'gcc, 'tcx> IntrinsicCallMethods<'tcx> for Builder<'a, 'gcc, 'tcx> {
        let result = PlaceRef::new_sized(llresult, fn_abi.ret.layout);

        let simple = get_simple_intrinsic(self, name);
+
+        // FIXME(tempdragon): Re-enable `clippy::suspicious_else_formatting` if the following issue is solved:
+        // https://github.com/rust-lang/rust-clippy/issues/12497
+        // and leave `else if use_integer_compare` to be placed "as is".
+        #[allow(clippy::suspicious_else_formatting)]
        let llval = match name {
            _ if simple.is_some() => {
                // FIXME(antoyo): remove this cast when the API supports function.
-                let func = unsafe { std::mem::transmute(simple.expect("simple")) };
+                let func = unsafe {
+                    std::mem::transmute::<Function<'gcc>, RValue<'gcc>>(simple.expect("simple"))
+                };
                self.call(
                    self.type_void(),
                    None,
@ -167,7 +174,7 @@ impl<'a, 'gcc, 'tcx> IntrinsicCallMethods<'tcx> for Builder<'a, 'gcc, 'tcx> {
            sym::volatile_load | sym::unaligned_volatile_load => {
                let tp_ty = fn_args.type_at(0);
                let ptr = args[0].immediate();
-                let load = if let PassMode::Cast { cast: ty, pad_i32: _ } = &fn_abi.ret.mode {
+                let load = if let PassMode::Cast { cast: ref ty, pad_i32: _ } = fn_abi.ret.mode {
                    let gcc_ty = ty.gcc_type(self);
                    self.volatile_load(gcc_ty, ptr)
                } else {
@ -213,12 +220,12 @@ impl<'a, 'gcc, 'tcx> IntrinsicCallMethods<'tcx> for Builder<'a, 'gcc, 'tcx> {
                            let after_block = func.new_block("after");

                            let arg = args[0].immediate();
-                            let result = func.new_local(None, arg.get_type(), "zeros");
+                            let result = func.new_local(None, self.u32_type, "zeros");
                            let zero = self.cx.gcc_zero(arg.get_type());
                            let cond = self.gcc_icmp(IntPredicate::IntEQ, arg, zero);
                            self.llbb().end_with_conditional(None, cond, then_block, else_block);

-                            let zero_result = self.cx.gcc_uint(arg.get_type(), width);
+                            let zero_result = self.cx.gcc_uint(self.u32_type, width);
                            then_block.add_assignment(None, result, zero_result);
                            then_block.end_with_jump(None, after_block);

@ -386,7 +393,7 @@ impl<'a, 'gcc, 'tcx> IntrinsicCallMethods<'tcx> for Builder<'a, 'gcc, 'tcx> {
        };

        if !fn_abi.ret.is_ignore() {
-            if let PassMode::Cast { cast: ty, .. } = &fn_abi.ret.mode {
+            if let PassMode::Cast { cast: ref ty, .. } = fn_abi.ret.mode {
                let ptr_llty = self.type_ptr_to(ty.gcc_type(self));
                let ptr = self.pointercast(result.val.llval, ptr_llty);
                self.store(llval, ptr, result.val.align);
@ -592,7 +599,7 @@ fn int_type_width_signed<'gcc, 'tcx>(
    ty: Ty<'tcx>,
    cx: &CodegenCx<'gcc, 'tcx>,
 ) -> Option<(u64, bool)> {
-    match ty.kind() {
+    match *ty.kind() {
        ty::Int(t) => Some((
            match t {
                rustc_middle::ty::IntTy::Isize => u64::from(cx.tcx.sess.target.pointer_width),
@ -698,16 +705,17 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
    fn count_leading_zeroes(&mut self, width: u64, arg: RValue<'gcc>) -> RValue<'gcc> {
        // TODO(antoyo): use width?
        let arg_type = arg.get_type();
+        let result_type = self.u32_type;
        let count_leading_zeroes =
            // TODO(antoyo): write a new function Type::is_compatible_with(&Type) and use it here
            // instead of using is_uint().
-            if arg_type.is_uint(&self.cx) {
+            if arg_type.is_uint(self.cx) {
                "__builtin_clz"
            }
-            else if arg_type.is_ulong(&self.cx) {
+            else if arg_type.is_ulong(self.cx) {
                "__builtin_clzl"
            }
-            else if arg_type.is_ulonglong(&self.cx) {
+            else if arg_type.is_ulonglong(self.cx) {
                "__builtin_clzll"
            }
            else if width == 128 {
@ -755,7 +763,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {

                let res = self.context.new_array_access(self.location, result, index);

-                return self.gcc_int_cast(res.to_rvalue(), arg_type);
+                return self.gcc_int_cast(res.to_rvalue(), result_type);
            }
            else {
                let count_leading_zeroes = self.context.get_builtin_function("__builtin_clzll");
@ -763,17 +771,18 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
                let diff = self.ulonglong_type.get_size() as i64 - arg_type.get_size() as i64;
                let diff = self.context.new_rvalue_from_long(self.int_type, diff * 8);
                let res = self.context.new_call(self.location, count_leading_zeroes, &[arg]) - diff;
-                return self.context.new_cast(self.location, res, arg_type);
+                return self.context.new_cast(self.location, res, result_type);
            };
        let count_leading_zeroes = self.context.get_builtin_function(count_leading_zeroes);
        let res = self.context.new_call(self.location, count_leading_zeroes, &[arg]);
-        self.context.new_cast(self.location, res, arg_type)
+        self.context.new_cast(self.location, res, result_type)
    }

    fn count_trailing_zeroes(&mut self, _width: u64, arg: RValue<'gcc>) -> RValue<'gcc> {
-        let result_type = arg.get_type();
-        let arg = if result_type.is_signed(self.cx) {
-            let new_type = result_type.to_unsigned(self.cx);
+        let arg_type = arg.get_type();
+        let result_type = self.u32_type;
+        let arg = if arg_type.is_signed(self.cx) {
+            let new_type = arg_type.to_unsigned(self.cx);
            self.gcc_int_cast(arg, new_type)
        } else {
            arg
@ -782,17 +791,17 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
        let (count_trailing_zeroes, expected_type) =
            // TODO(antoyo): write a new function Type::is_compatible_with(&Type) and use it here
            // instead of using is_uint().
-            if arg_type.is_uchar(&self.cx) || arg_type.is_ushort(&self.cx) || arg_type.is_uint(&self.cx) {
+            if arg_type.is_uchar(self.cx) || arg_type.is_ushort(self.cx) || arg_type.is_uint(self.cx) {
                // NOTE: we don't need to & 0xFF for uchar because the result is undefined on zero.
                ("__builtin_ctz", self.cx.uint_type)
            }
-            else if arg_type.is_ulong(&self.cx) {
+            else if arg_type.is_ulong(self.cx) {
                ("__builtin_ctzl", self.cx.ulong_type)
            }
-            else if arg_type.is_ulonglong(&self.cx) {
+            else if arg_type.is_ulonglong(self.cx) {
                ("__builtin_ctzll", self.cx.ulonglong_type)
            }
-            else if arg_type.is_u128(&self.cx) {
+            else if arg_type.is_u128(self.cx) {
                // Adapted from the algorithm to count leading zeroes from: https://stackoverflow.com/a/28433850/389119
                let array_type = self.context.new_array_type(None, arg_type, 3);
                let result = self.current_func()
@ -863,18 +872,16 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {

    fn pop_count(&mut self, value: RValue<'gcc>) -> RValue<'gcc> {
        // TODO(antoyo): use the optimized version with fewer operations.
-        let result_type = value.get_type();
-        let value_type = result_type.to_unsigned(self.cx);
+        let result_type = self.u32_type;
+        let arg_type = value.get_type();
+        let value_type = arg_type.to_unsigned(self.cx);

-        let value = if result_type.is_signed(self.cx) {
-            self.gcc_int_cast(value, value_type)
-        } else {
-            value
-        };
+        let value =
+            if arg_type.is_signed(self.cx) { self.gcc_int_cast(value, value_type) } else { value };

        // only break apart 128-bit ints if they're not natively supported
        // TODO(antoyo): remove this if/when native 128-bit integers land in libgccjit
-        if value_type.is_u128(&self.cx) && !self.cx.supports_128bit_integers {
+        if value_type.is_u128(self.cx) && !self.cx.supports_128bit_integers {
            let sixty_four = self.gcc_int(value_type, 64);
            let right_shift = self.gcc_lshr(value, sixty_four);
            let high = self.gcc_int_cast(right_shift, self.cx.ulonglong_type);
@ -997,7 +1004,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {

            // Return `result_type`'s maximum or minimum value on overflow
            // NOTE: convert the type to unsigned to have an unsigned shift.
-            let unsigned_type = result_type.to_unsigned(&self.cx);
+            let unsigned_type = result_type.to_unsigned(self.cx);
            let shifted = self.gcc_lshr(
                self.gcc_int_cast(lhs, unsigned_type),
                self.gcc_int(unsigned_type, width as i64 - 1),
@ -1189,7 +1196,7 @@ fn codegen_gnu_try<'gcc>(
        bx.invoke(try_func_ty, None, None, try_func, &[data], then, catch, None, None);
    });

-    let func = unsafe { std::mem::transmute(func) };
+    let func = unsafe { std::mem::transmute::<Function<'gcc>, RValue<'gcc>>(func) };

    // Note that no invoke is used here because by definition this function
    // can't panic (that's what it's catching).
@ -1263,7 +1270,7 @@ fn gen_fn<'a, 'gcc, 'tcx>(
    // FIXME(eddyb) find a nicer way to do this.
    cx.linkage.set(FunctionType::Internal);
    let func = cx.declare_fn(name, fn_abi);
-    let func_val = unsafe { std::mem::transmute(func) };
+    let func_val = unsafe { std::mem::transmute::<Function<'gcc>, RValue<'gcc>>(func) };
    cx.set_frame_pointer_type(func_val);
    cx.apply_target_cpu_attr(func_val);
    let block = Builder::append_block(cx, func_val, "entry-block");
--- a/compiler/rustc_codegen_gcc/src/intrinsic/simd.rs
+++ b/compiler/rustc_codegen_gcc/src/intrinsic/simd.rs
@ -13,6 +13,7 @@ use rustc_codegen_ssa::errors::InvalidMonomorphization;
 use rustc_codegen_ssa::mir::operand::OperandRef;
 use rustc_codegen_ssa::mir::place::PlaceRef;
 use rustc_codegen_ssa::traits::{BaseTypeMethods, BuilderMethods};
+#[cfg(feature = "master")]
 use rustc_hir as hir;
 use rustc_middle::mir::BinOp;
 use rustc_middle::span_bug;
@ -72,11 +73,11 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
        let expected_bytes = len / 8 + ((len % 8 > 0) as u64);

        let mask_ty = arg_tys[0];
-        let mut mask = match mask_ty.kind() {
+        let mut mask = match *mask_ty.kind() {
            ty::Int(i) if i.bit_width() == Some(expected_int_bits) => args[0].immediate(),
            ty::Uint(i) if i.bit_width() == Some(expected_int_bits) => args[0].immediate(),
            ty::Array(elem, len)
-                if matches!(elem.kind(), ty::Uint(ty::UintTy::U8))
+                if matches!(*elem.kind(), ty::Uint(ty::UintTy::U8))
                    && len.try_eval_target_usize(bx.tcx, ty::ParamEnv::reveal_all())
                        == Some(expected_bytes) =>
            {
@ -309,10 +310,9 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
                })
                .collect();
            return Ok(bx.context.new_rvalue_from_vector(None, v_type, &elems));
-        } else {
-            // avoid the unnecessary truncation as an optimization.
-            return Ok(bx.context.new_bitcast(None, result, v_type));
        }
+        // avoid the unnecessary truncation as an optimization.
+        return Ok(bx.context.new_bitcast(None, result, v_type));
    }
    // since gcc doesn't have vector shuffle methods available in non-patched builds, fallback to
    // component-wise bitreverses if they're not available.
@ -342,11 +342,13 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
            .map(|i| {
                let index = bx.context.new_rvalue_from_long(bx.i32_type, i as i64);
                let value = bx.extract_element(vector, index).to_rvalue();
-                if name == sym::simd_ctlz {
-                    bx.count_leading_zeroes(value.get_type().get_size() as u64 * 8, value)
+                let value_type = value.get_type();
+                let element = if name == sym::simd_ctlz {
+                    bx.count_leading_zeroes(value_type.get_size() as u64 * 8, value)
                } else {
-                    bx.count_trailing_zeroes(value.get_type().get_size() as u64 * 8, value)
-                }
+                    bx.count_trailing_zeroes(value_type.get_size() as u64 * 8, value)
+                };
+                bx.context.new_cast(None, element, value_type)
            })
            .collect();
        return Ok(bx.context.new_rvalue_from_vector(None, vector.get_type(), &elements));
@ -355,8 +357,8 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
    if name == sym::simd_shuffle {
        // Make sure this is actually an array, since typeck only checks the length-suffixed
        // version of this intrinsic.
-        let n: u64 = match args[2].layout.ty.kind() {
-            ty::Array(ty, len) if matches!(ty.kind(), ty::Uint(ty::UintTy::U32)) => {
+        let n: u64 = match *args[2].layout.ty.kind() {
+            ty::Array(ty, len) if matches!(*ty.kind(), ty::Uint(ty::UintTy::U32)) => {
                len.try_eval_target_usize(bx.cx.tcx, ty::ParamEnv::reveal_all()).unwrap_or_else(
                    || span_bug!(span, "could not evaluate shuffle index array length"),
                )
@ -429,13 +431,148 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
            m_len == v_len,
            InvalidMonomorphization::MismatchedLengths { span, name, m_len, v_len }
        );
-        match m_elem_ty.kind() {
+        match *m_elem_ty.kind() {
            ty::Int(_) => {}
            _ => return_error!(InvalidMonomorphization::MaskType { span, name, ty: m_elem_ty }),
        }
        return Ok(bx.vector_select(args[0].immediate(), args[1].immediate(), args[2].immediate()));
    }

+    if name == sym::simd_cast_ptr {
+        require_simd!(ret_ty, InvalidMonomorphization::SimdReturn { span, name, ty: ret_ty });
+        let (out_len, out_elem) = ret_ty.simd_size_and_type(bx.tcx());
+
+        require!(
+            in_len == out_len,
+            InvalidMonomorphization::ReturnLengthInputType {
+                span,
+                name,
+                in_len,
+                in_ty,
+                ret_ty,
+                out_len
+            }
+        );
+
+        match *in_elem.kind() {
+            ty::RawPtr(p_ty, _) => {
+                let metadata = p_ty.ptr_metadata_ty(bx.tcx, |ty| {
+                    bx.tcx.normalize_erasing_regions(ty::ParamEnv::reveal_all(), ty)
+                });
+                require!(
+                    metadata.is_unit(),
+                    InvalidMonomorphization::CastFatPointer { span, name, ty: in_elem }
+                );
+            }
+            _ => {
+                return_error!(InvalidMonomorphization::ExpectedPointer { span, name, ty: in_elem })
+            }
+        }
+        match *out_elem.kind() {
+            ty::RawPtr(p_ty, _) => {
+                let metadata = p_ty.ptr_metadata_ty(bx.tcx, |ty| {
+                    bx.tcx.normalize_erasing_regions(ty::ParamEnv::reveal_all(), ty)
+                });
+                require!(
+                    metadata.is_unit(),
+                    InvalidMonomorphization::CastFatPointer { span, name, ty: out_elem }
+                );
+            }
+            _ => {
+                return_error!(InvalidMonomorphization::ExpectedPointer { span, name, ty: out_elem })
+            }
+        }
+
+        let arg = args[0].immediate();
+        let elem_type = llret_ty.dyncast_vector().expect("vector return type").get_element_type();
+        let values: Vec<_> = (0..in_len)
+            .map(|i| {
+                let idx = bx.gcc_int(bx.usize_type, i as _);
+                let value = bx.extract_element(arg, idx);
+                bx.pointercast(value, elem_type)
+            })
+            .collect();
+        return Ok(bx.context.new_rvalue_from_vector(bx.location, llret_ty, &values));
+    }
+
+    if name == sym::simd_expose_provenance {
+        require_simd!(ret_ty, InvalidMonomorphization::SimdReturn { span, name, ty: ret_ty });
+        let (out_len, out_elem) = ret_ty.simd_size_and_type(bx.tcx());
+
+        require!(
+            in_len == out_len,
+            InvalidMonomorphization::ReturnLengthInputType {
+                span,
+                name,
+                in_len,
+                in_ty,
+                ret_ty,
+                out_len
+            }
+        );
+
+        match *in_elem.kind() {
+            ty::RawPtr(_, _) => {}
+            _ => {
+                return_error!(InvalidMonomorphization::ExpectedPointer { span, name, ty: in_elem })
+            }
+        }
+        match *out_elem.kind() {
+            ty::Uint(ty::UintTy::Usize) => {}
+            _ => return_error!(InvalidMonomorphization::ExpectedUsize { span, name, ty: out_elem }),
+        }
+
+        let arg = args[0].immediate();
+        let elem_type = llret_ty.dyncast_vector().expect("vector return type").get_element_type();
+        let values: Vec<_> = (0..in_len)
+            .map(|i| {
+                let idx = bx.gcc_int(bx.usize_type, i as _);
+                let value = bx.extract_element(arg, idx);
+                bx.ptrtoint(value, elem_type)
+            })
+            .collect();
+        return Ok(bx.context.new_rvalue_from_vector(bx.location, llret_ty, &values));
+    }
+
+    if name == sym::simd_with_exposed_provenance {
+        require_simd!(ret_ty, InvalidMonomorphization::SimdReturn { span, name, ty: ret_ty });
+        let (out_len, out_elem) = ret_ty.simd_size_and_type(bx.tcx());
+
+        require!(
+            in_len == out_len,
+            InvalidMonomorphization::ReturnLengthInputType {
+                span,
+                name,
+                in_len,
+                in_ty,
+                ret_ty,
+                out_len
+            }
+        );
+
+        match *in_elem.kind() {
+            ty::Uint(ty::UintTy::Usize) => {}
+            _ => return_error!(InvalidMonomorphization::ExpectedUsize { span, name, ty: in_elem }),
+        }
+        match *out_elem.kind() {
+            ty::RawPtr(_, _) => {}
+            _ => {
+                return_error!(InvalidMonomorphization::ExpectedPointer { span, name, ty: out_elem })
+            }
+        }
+
+        let arg = args[0].immediate();
+        let elem_type = llret_ty.dyncast_vector().expect("vector return type").get_element_type();
+        let values: Vec<_> = (0..in_len)
+            .map(|i| {
+                let idx = bx.gcc_int(bx.usize_type, i as _);
+                let value = bx.extract_element(arg, idx);
+                bx.inttoptr(value, elem_type)
+            })
+            .collect();
+        return Ok(bx.context.new_rvalue_from_vector(bx.location, llret_ty, &values));
+    }
+
    #[cfg(feature = "master")]
    if name == sym::simd_cast || name == sym::simd_as {
        require_simd!(ret_ty, InvalidMonomorphization::SimdReturn { span, name, ty: ret_ty });
@ -462,13 +599,13 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
            Unsupported,
        }

-        let in_style = match in_elem.kind() {
+        let in_style = match *in_elem.kind() {
            ty::Int(_) | ty::Uint(_) => Style::Int,
            ty::Float(_) => Style::Float,
            _ => Style::Unsupported,
        };

-        let out_style = match out_elem.kind() {
+        let out_style = match *out_elem.kind() {
            ty::Int(_) | ty::Uint(_) => Style::Int,
            ty::Float(_) => Style::Float,
            _ => Style::Unsupported,
@ -495,7 +632,7 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
    macro_rules! arith_binary {
        ($($name: ident: $($($p: ident),* => $call: ident),*;)*) => {
            $(if name == sym::$name {
-                match in_elem.kind() {
+                match *in_elem.kind() {
                    $($(ty::$p(_))|* => {
                        return Ok(bx.$call(args[0].immediate(), args[1].immediate()))
                    })*
@ -533,7 +670,6 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
        let sign_shift = bx.context.new_rvalue_from_int(elem_type, elem_size as i32 - 1);
        let one = bx.context.new_rvalue_one(elem_type);

-        let mut shift = 0;
        for i in 0..in_len {
            let elem =
                bx.extract_element(vector, bx.context.new_rvalue_from_int(bx.int_type, i as i32));
@ -541,17 +677,16 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
            let masked = shifted & one;
            result = result
                | (bx.context.new_cast(None, masked, result_type)
-                    << bx.context.new_rvalue_from_int(result_type, shift));
-            shift += 1;
+                    << bx.context.new_rvalue_from_int(result_type, i as i32));
        }

-        match ret_ty.kind() {
+        match *ret_ty.kind() {
            ty::Uint(i) if i.bit_width() == Some(expected_int_bits) => {
                // Zero-extend iN to the bitmask type:
                return Ok(result);
            }
            ty::Array(elem, len)
-                if matches!(elem.kind(), ty::Uint(ty::UintTy::U8))
+                if matches!(*elem.kind(), ty::Uint(ty::UintTy::U8))
                    && len.try_eval_target_usize(bx.tcx, ty::ParamEnv::reveal_all())
                        == Some(expected_bytes) =>
            {
@ -590,7 +725,7 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
                return Err(());
            }};
        }
-        let (elem_ty_str, elem_ty) = if let ty::Float(f) = in_elem.kind() {
+        let (elem_ty_str, elem_ty) = if let ty::Float(ref f) = *in_elem.kind() {
            let elem_ty = bx.cx.type_float_from_ty(*f);
            match f.bit_width() {
                32 => ("f", elem_ty),
@ -816,7 +951,9 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
        let (_, element_ty0) = arg_tys[0].simd_size_and_type(bx.tcx());
        let (_, element_ty1) = arg_tys[1].simd_size_and_type(bx.tcx());
        let (pointer_count, underlying_ty) = match *element_ty1.kind() {
-            ty::RawPtr(p_ty, _) if p_ty == in_elem => (ptr_count(element_ty1), non_ptr(element_ty1)),
+            ty::RawPtr(p_ty, _) if p_ty == in_elem => {
+                (ptr_count(element_ty1), non_ptr(element_ty1))
+            }
            _ => {
                require!(
                    false,
@ -839,7 +976,7 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(

        // The element type of the third argument must be a signed integer type of any width:
        let (_, element_ty2) = arg_tys[2].simd_size_and_type(bx.tcx());
-        match element_ty2.kind() {
+        match *element_ty2.kind() {
            ty::Int(_) => (),
            _ => {
                require!(
@ -955,7 +1092,7 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
        assert_eq!(underlying_ty, non_ptr(element_ty0));

        // The element type of the third argument must be a signed integer type of any width:
-        match element_ty2.kind() {
+        match *element_ty2.kind() {
            ty::Int(_) => (),
            _ => {
                require!(
@ -1013,7 +1150,7 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
    macro_rules! arith_unary {
        ($($name: ident: $($($p: ident),* => $call: ident),*;)*) => {
            $(if name == sym::$name {
-                match in_elem.kind() {
+                match *in_elem.kind() {
                    $($(ty::$p(_))|* => {
                        return Ok(bx.$call(args[0].immediate()))
                    })*
@ -1137,7 +1274,7 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
                    ret_ty == in_elem,
                    InvalidMonomorphization::ReturnType { span, name, in_elem, in_ty, ret_ty }
                );
-                return match in_elem.kind() {
+                return match *in_elem.kind() {
                    ty::Int(_) | ty::Uint(_) => {
                        let r = bx.vector_reduce_op(args[0].immediate(), $vec_op);
                        if $ordered {
@ -1206,7 +1343,7 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
                    ret_ty == in_elem,
                    InvalidMonomorphization::ReturnType { span, name, in_elem, in_ty, ret_ty }
                );
-                return match in_elem.kind() {
+                return match *in_elem.kind() {
                    ty::Int(_) | ty::Uint(_) => Ok(bx.$int_red(args[0].immediate())),
                    ty::Float(_) => Ok(bx.$float_red(args[0].immediate())),
                    _ => return_error!(InvalidMonomorphization::UnsupportedSymbol {
@ -1235,7 +1372,7 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
                    );
                    args[0].immediate()
                } else {
-                    match in_elem.kind() {
+                    match *in_elem.kind() {
                        ty::Int(_) | ty::Uint(_) => {}
                        _ => return_error!(InvalidMonomorphization::UnsupportedSymbol {
                            span,
@ -1249,7 +1386,7 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(

                    args[0].immediate()
                };
-                return match in_elem.kind() {
+                return match *in_elem.kind() {
                    ty::Int(_) | ty::Uint(_) => {
                        let r = bx.vector_reduce_op(input, $op);
                        Ok(if !$boolean {
--- a/compiler/rustc_codegen_gcc/src/lib.rs
+++ b/compiler/rustc_codegen_gcc/src/lib.rs
@ -4,7 +4,7 @@
 * TODO(antoyo): support LTO (gcc's equivalent to Full LTO is -flto -flto-partition=one — https://documentation.suse.com/sbp/all/html/SBP-GCC-10/index.html).
 * For Thin LTO, this might be helpful:
 * In gcc 4.6 -fwhopr was removed and became default with -flto. The non-whopr path can still be executed via -flto-partition=none.
- * Or the new incremental LTO?
+ * Or the new incremental LTO (https://www.phoronix.com/news/GCC-Incremental-LTO-Patches)?
 *
 * Maybe some missing optizations enabled by rustc's LTO is in there: https://gcc.gnu.org/onlinedocs/gcc/Optimize-Options.html
 * Like -fipa-icf (should be already enabled) and maybe -fdevirtualize-at-ltrans.
@ -16,12 +16,13 @@
 #![allow(internal_features)]
 #![doc(rust_logo)]
 #![feature(rustdoc_internals)]
-#![feature(rustc_private, decl_macro, never_type, trusted_len, hash_raw_entry)]
+#![feature(rustc_private, decl_macro, never_type, trusted_len, hash_raw_entry, let_chains)]
 #![allow(broken_intra_doc_links)]
 #![recursion_limit = "256"]
 #![warn(rust_2018_idioms)]
 #![warn(unused_lifetimes)]
 #![deny(clippy::pattern_type_mismatch)]
+#![allow(clippy::needless_lifetimes)]

 extern crate rustc_apfloat;
 extern crate rustc_ast;
@ -73,6 +74,7 @@ mod type_of;

 use std::any::Any;
 use std::fmt::Debug;
+use std::ops::Deref;
 #[cfg(not(feature = "master"))]
 use std::sync::atomic::AtomicBool;
 #[cfg(not(feature = "master"))]
@ -80,8 +82,9 @@ use std::sync::atomic::Ordering;
 use std::sync::Arc;
 use std::sync::Mutex;

+use back::lto::ThinBuffer;
+use back::lto::ThinData;
 use errors::LTONotSupported;
-#[cfg(not(feature = "master"))]
 use gccjit::CType;
 use gccjit::{Context, OptimizationLevel};
 #[cfg(feature = "master")]
@ -92,9 +95,7 @@ use rustc_codegen_ssa::back::write::{
    CodegenContext, FatLtoInput, ModuleConfig, TargetMachineFactoryFn,
 };
 use rustc_codegen_ssa::base::codegen_crate;
-use rustc_codegen_ssa::traits::{
-    CodegenBackend, ExtraBackendMethods, ThinBufferMethods, WriteBackendMethods,
-};
+use rustc_codegen_ssa::traits::{CodegenBackend, ExtraBackendMethods, WriteBackendMethods};
 use rustc_codegen_ssa::{CodegenResults, CompiledModule, ModuleCodegen};
 use rustc_data_structures::fx::FxIndexMap;
 use rustc_data_structures::sync::IntoDynSyncSend;
@ -139,6 +140,10 @@ impl TargetInfo {
    fn supports_128bit_int(&self) -> bool {
        self.supports_128bit_integers.load(Ordering::SeqCst)
    }
+
+    fn supports_target_dependent_type(&self, _typ: CType) -> bool {
+        false
+    }
 }

 #[derive(Clone)]
@ -160,6 +165,10 @@ impl LockedTargetInfo {
    fn supports_128bit_int(&self) -> bool {
        self.info.lock().expect("lock").supports_128bit_int()
    }
+
+    fn supports_target_dependent_type(&self, typ: CType) -> bool {
+        self.info.lock().expect("lock").supports_target_dependent_type(typ)
+    }
 }

 #[derive(Clone)]
@ -188,6 +197,7 @@ impl CodegenBackend for GccCodegenBackend {

        #[cfg(feature = "master")]
        gccjit::set_global_personality_function_name(b"rust_eh_personality\0");
+
        if sess.lto() == Lto::Thin {
            sess.dcx().emit_warn(LTONotSupported {});
        }
@ -293,7 +303,7 @@ impl ExtraBackendMethods for GccCodegenBackend {
        alloc_error_handler_kind: AllocatorKind,
    ) -> Self::Module {
        let mut mods = GccContext {
-            context: new_context(tcx),
+            context: Arc::new(SyncContext::new(new_context(tcx))),
            should_combine_object_files: false,
            temp_dir: None,
        };
@ -323,35 +333,42 @@ impl ExtraBackendMethods for GccCodegenBackend {
    }
 }

-pub struct ThinBuffer;
-
-impl ThinBufferMethods for ThinBuffer {
-    fn data(&self) -> &[u8] {
-        unimplemented!();
-    }
-
-    fn thin_link_data(&self) -> &[u8] {
-        unimplemented!();
-    }
-}
-
 pub struct GccContext {
-    context: Context<'static>,
+    context: Arc<SyncContext>,
    should_combine_object_files: bool,
    // Temporary directory used by LTO. We keep it here so that it's not removed before linking.
    temp_dir: Option<TempDir>,
 }

-unsafe impl Send for GccContext {}
-// FIXME(antoyo): that shouldn't be Sync. Parallel compilation is currently disabled with "-Zno-parallel-llvm". Try to disable it here.
-unsafe impl Sync for GccContext {}
+struct SyncContext {
+    context: Context<'static>,
+}
+
+impl SyncContext {
+    fn new(context: Context<'static>) -> Self {
+        Self { context }
+    }
+}
+
+impl Deref for SyncContext {
+    type Target = Context<'static>;
+
+    fn deref(&self) -> &Self::Target {
+        &self.context
+    }
+}
+
+unsafe impl Send for SyncContext {}
+// FIXME(antoyo): that shouldn't be Sync. Parallel compilation is currently disabled with "-Zno-parallel-llvm".
+// TODO: disable it here by returing false in CodegenBackend::supports_parallel().
+unsafe impl Sync for SyncContext {}

 impl WriteBackendMethods for GccCodegenBackend {
    type Module = GccContext;
    type TargetMachine = ();
    type TargetMachineError = ();
    type ModuleBuffer = ModuleBuffer;
-    type ThinData = ();
+    type ThinData = ThinData;
    type ThinBuffer = ThinBuffer;

    fn run_fat_lto(
@ -363,11 +380,11 @@ impl WriteBackendMethods for GccCodegenBackend {
    }

    fn run_thin_lto(
-        _cgcx: &CodegenContext<Self>,
-        _modules: Vec<(String, Self::ThinBuffer)>,
-        _cached_modules: Vec<(SerializedModule<Self::ModuleBuffer>, WorkProduct)>,
+        cgcx: &CodegenContext<Self>,
+        modules: Vec<(String, Self::ThinBuffer)>,
+        cached_modules: Vec<(SerializedModule<Self::ModuleBuffer>, WorkProduct)>,
    ) -> Result<(Vec<LtoModuleCodegen<Self>>, Vec<WorkProduct>), FatalError> {
-        unimplemented!();
+        back::lto::run_thin(cgcx, modules, cached_modules)
    }

    fn print_pass_timings(&self) {
@ -397,10 +414,10 @@ impl WriteBackendMethods for GccCodegenBackend {
    }

    unsafe fn optimize_thin(
-        _cgcx: &CodegenContext<Self>,
-        _thin: ThinModule<Self>,
+        cgcx: &CodegenContext<Self>,
+        thin: ThinModule<Self>,
    ) -> Result<ModuleCodegen<Self::Module>, FatalError> {
-        unimplemented!();
+        back::lto::optimize_thin_module(thin, cgcx)
    }

    unsafe fn codegen(
@ -413,10 +430,10 @@ impl WriteBackendMethods for GccCodegenBackend {
    }

    fn prepare_thin(
-        _module: ModuleCodegen<Self::Module>,
-        _emit_summary: bool,
+        module: ModuleCodegen<Self::Module>,
+        emit_summary: bool,
    ) -> (String, Self::ThinBuffer) {
-        unimplemented!();
+        back::lto::prepare_thin(module, emit_summary)
    }

    fn serialize_module(_module: ModuleCodegen<Self::Module>) -> (String, Self::ModuleBuffer) {
@ -437,7 +454,8 @@ impl WriteBackendMethods for GccCodegenBackend {
 pub fn __rustc_codegen_backend() -> Box<dyn CodegenBackend> {
    #[cfg(feature = "master")]
    let info = {
-        // Check whether the target supports 128-bit integers.
+        // Check whether the target supports 128-bit integers, and sized floating point types (like
+        // Float16).
        let context = Context::default();
        Arc::new(Mutex::new(IntoDynSyncSend(context.get_target_info())))
    };
@ -467,6 +485,7 @@ pub fn target_features(
    allow_unstable: bool,
    target_info: &LockedTargetInfo,
 ) -> Vec<Symbol> {
+    // TODO(antoyo): use global_gcc_features.
    sess.target
        .supported_target_features()
        .iter()
@ -477,8 +496,12 @@ pub fn target_features(
                None
            }
        })
-        .filter(|_feature| {
-            target_info.cpu_supports(_feature)
+        .filter(|feature| {
+            // TODO: we disable Neon for now since we don't support the LLVM intrinsics for it.
+            if *feature == "neon" {
+                return false;
+            }
+            target_info.cpu_supports(feature)
            /*
              adx, aes, avx, avx2, avx512bf16, avx512bitalg, avx512bw, avx512cd, avx512dq, avx512er, avx512f, avx512fp16, avx512ifma,
              avx512pf, avx512vbmi, avx512vbmi2, avx512vl, avx512vnni, avx512vp2intersect, avx512vpopcntdq,
--- a/compiler/rustc_codegen_gcc/src/mono_item.rs
+++ b/compiler/rustc_codegen_gcc/src/mono_item.rs
@ -81,6 +81,6 @@ impl<'gcc, 'tcx> PreDefineMethods<'tcx> for CodegenCx<'gcc, 'tcx> {
        // TODO(antoyo): use inline attribute from there in linkage.set() above.

        self.functions.borrow_mut().insert(symbol_name.to_string(), decl);
-        self.function_instances.borrow_mut().insert(instance, unsafe { std::mem::transmute(decl) });
+        self.function_instances.borrow_mut().insert(instance, decl);
    }
 }
--- a/compiler/rustc_codegen_gcc/src/type_.rs
+++ b/compiler/rustc_codegen_gcc/src/type_.rs
@ -1,3 +1,8 @@
+#[cfg(feature = "master")]
+use std::convert::TryInto;
+
+#[cfg(feature = "master")]
+use gccjit::CType;
 use gccjit::{RValue, Struct, Type};
 use rustc_codegen_ssa::common::TypeKind;
 use rustc_codegen_ssa::traits::{BaseTypeMethods, DerivedTypeMethods, TypeMembershipMethods};
@ -142,25 +147,76 @@ impl<'gcc, 'tcx> BaseTypeMethods<'tcx> for CodegenCx<'gcc, 'tcx> {
    }

    fn type_f16(&self) -> Type<'gcc> {
-        unimplemented!("f16_f128")
+        #[cfg(feature = "master")]
+        if self.supports_f16_type {
+            return self.context.new_c_type(CType::Float16);
+        }
+        bug!("unsupported float width 16")
    }

    fn type_f32(&self) -> Type<'gcc> {
+        #[cfg(feature = "master")]
+        if self.supports_f32_type {
+            return self.context.new_c_type(CType::Float32);
+        }
        self.float_type
    }

    fn type_f64(&self) -> Type<'gcc> {
+        #[cfg(feature = "master")]
+        if self.supports_f64_type {
+            return self.context.new_c_type(CType::Float64);
+        }
        self.double_type
    }

    fn type_f128(&self) -> Type<'gcc> {
-        unimplemented!("f16_f128")
+        #[cfg(feature = "master")]
+        if self.supports_f128_type {
+            return self.context.new_c_type(CType::Float128);
+        }
+        bug!("unsupported float width 128")
    }

    fn type_func(&self, params: &[Type<'gcc>], return_type: Type<'gcc>) -> Type<'gcc> {
        self.context.new_function_pointer_type(None, return_type, params, false)
    }

+    #[cfg(feature = "master")]
+    fn type_kind(&self, typ: Type<'gcc>) -> TypeKind {
+        if self.is_int_type_or_bool(typ) {
+            TypeKind::Integer
+        } else if typ.get_pointee().is_some() {
+            TypeKind::Pointer
+        } else if typ.is_vector() {
+            TypeKind::Vector
+        } else if typ.dyncast_array().is_some() {
+            TypeKind::Array
+        } else if typ.is_struct().is_some() {
+            TypeKind::Struct
+        } else if typ.dyncast_function_ptr_type().is_some() {
+            TypeKind::Function
+        } else if typ.is_compatible_with(self.float_type) {
+            TypeKind::Float
+        } else if typ.is_compatible_with(self.double_type) {
+            TypeKind::Double
+        } else if typ.is_floating_point() {
+            match typ.get_size() {
+                2 => TypeKind::Half,
+                4 => TypeKind::Float,
+                8 => TypeKind::Double,
+                16 => TypeKind::FP128,
+                size => unreachable!("Floating-point type of size {}", size),
+            }
+        } else if typ == self.type_void() {
+            TypeKind::Void
+        } else {
+            // TODO(antoyo): support other types.
+            unimplemented!();
+        }
+    }
+
+    #[cfg(not(feature = "master"))]
    fn type_kind(&self, typ: Type<'gcc>) -> TypeKind {
        if self.is_int_type_or_bool(typ) {
            TypeKind::Integer
@ -170,9 +226,19 @@ impl<'gcc, 'tcx> BaseTypeMethods<'tcx> for CodegenCx<'gcc, 'tcx> {
            TypeKind::Double
        } else if typ.is_vector() {
            TypeKind::Vector
+        } else if typ.get_pointee().is_some() {
+            TypeKind::Pointer
+        } else if typ.dyncast_array().is_some() {
+            TypeKind::Array
+        } else if typ.is_struct().is_some() {
+            TypeKind::Struct
+        } else if typ.dyncast_function_ptr_type().is_some() {
+            TypeKind::Function
+        } else if typ == self.type_void() {
+            TypeKind::Void
        } else {
            // TODO(antoyo): support other types.
-            TypeKind::Void
+            unimplemented!();
        }
    }

@ -200,6 +266,16 @@ impl<'gcc, 'tcx> BaseTypeMethods<'tcx> for CodegenCx<'gcc, 'tcx> {
        unimplemented!();
    }

+    #[cfg(feature = "master")]
+    fn float_width(&self, typ: Type<'gcc>) -> usize {
+        if typ.is_floating_point() {
+            (typ.get_size() * u8::BITS).try_into().unwrap()
+        } else {
+            panic!("Cannot get width of float type {:?}", typ);
+        }
+    }
+
+    #[cfg(not(feature = "master"))]
    fn float_width(&self, typ: Type<'gcc>) -> usize {
        let f32 = self.context.new_type::<f32>();
        let f64 = self.context.new_type::<f64>();
--- a/compiler/rustc_codegen_gcc/src/type_of.rs
+++ b/compiler/rustc_codegen_gcc/src/type_of.rs
@ -8,7 +8,7 @@ use rustc_middle::ty::print::with_no_trimmed_paths;
 use rustc_middle::ty::{self, CoroutineArgsExt, Ty, TypeVisitableExt};
 use rustc_target::abi::call::{CastTarget, FnAbi, Reg};
 use rustc_target::abi::{
-    self, Abi, Align, FieldsShape, Float, Int, Integer, PointeeInfo, Pointer, Size, TyAbiInterface,
+    self, Abi, FieldsShape, Float, Int, Integer, PointeeInfo, Pointer, Size, TyAbiInterface,
    Variants,
 };

@ -53,12 +53,6 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
    }
 }

-impl<'a, 'tcx> CodegenCx<'a, 'tcx> {
-    pub fn align_of(&self, ty: Ty<'tcx>) -> Align {
-        self.layout_of(ty).align.abi
-    }
-}
-
 fn uncached_gcc_type<'gcc, 'tcx>(
    cx: &CodegenCx<'gcc, 'tcx>,
    layout: TyAndLayout<'tcx>,
@ -90,7 +84,7 @@ fn uncached_gcc_type<'gcc, 'tcx>(
        Abi::Uninhabited | Abi::Aggregate { .. } => {}
    }

-    let name = match layout.ty.kind() {
+    let name = match *layout.ty.kind() {
        // FIXME(eddyb) producing readable type names for trait objects can result
        // in problematically distinct types due to HRTB and subtyping (see #47638).
        // ty::Dynamic(..) |
@ -220,7 +214,7 @@ impl<'tcx> LayoutGccExt<'tcx> for TyAndLayout<'tcx> {
                // to fn_ptr_backend_type handle the on-stack attribute.
                // TODO(antoyo): find a less hackish way to hande the on-stack attribute.
                ty::FnPtr(sig) => {
-                    cx.fn_ptr_backend_type(&cx.fn_abi_of_fn_ptr(sig, ty::List::empty()))
+                    cx.fn_ptr_backend_type(cx.fn_abi_of_fn_ptr(sig, ty::List::empty()))
                }
                _ => self.scalar_gcc_type_at(cx, scalar, Size::ZERO),
            };