From 7a01c7f676cbffba4f14540ae90e4fd82f7b0af1 Mon Sep 17 00:00:00 2001
From: bjorn3 <17426603+bjorn3@users.noreply.github.com>
Date: Thu, 28 Aug 2025 13:01:40 +0000
Subject: [PATCH 1/3] Export __rdl_* symbols to the allocator shim when doing
 LTO

---
 compiler/rustc_codegen_ssa/src/back/lto.rs | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/compiler/rustc_codegen_ssa/src/back/lto.rs b/compiler/rustc_codegen_ssa/src/back/lto.rs
index e6df6a2469f37..f4a9037940a67 100644
--- a/compiler/rustc_codegen_ssa/src/back/lto.rs
+++ b/compiler/rustc_codegen_ssa/src/back/lto.rs
@@ -1,6 +1,7 @@
 use std::ffi::CString;
 use std::sync::Arc;
 
+use rustc_ast::expand::allocator::AllocatorKind;
 use rustc_data_structures::memmap::Mmap;
 use rustc_hir::def_id::{CrateNum, LOCAL_CRATE};
 use rustc_middle::middle::exported_symbols::{ExportedSymbol, SymbolExportInfo, SymbolExportLevel};
@@ -95,6 +96,19 @@ pub(super) fn exported_symbols_for_lto(
             .filter_map(|&(s, info): &(ExportedSymbol<'_>, SymbolExportInfo)| {
                 if info.level.is_below_threshold(export_threshold) || info.used {
                     Some(symbol_name_for_instance_in_crate(tcx, s, cnum))
+                } else if export_threshold == SymbolExportLevel::C
+                    && info.rustc_std_internal_symbol
+                    && let Some(AllocatorKind::Default) = allocator_kind_for_codegen(tcx)
+                {
+                    // Export the __rdl_* exports for usage by the allocator shim when not using
+                    // #[global_allocator]. Most of the conditions above are only used to avoid
+                    // unnecessary expensive symbol_name_for_instance_in_crate calls.
+                    let sym = symbol_name_for_instance_in_crate(tcx, s, cnum);
+                    if sym.contains("__rdl_") || sym.contains("__rg_oom") {
+                        Some(sym)
+                    } else {
+                        None
+                    }
                 } else {
                     None
                 }

From eea81b5d752f5310fcbd5ddae722afc7de0b8fa1 Mon Sep 17 00:00:00 2001
From: bjorn3 <17426603+bjorn3@users.noreply.github.com>
Date: Thu, 28 Aug 2025 09:54:42 +0000
Subject: [PATCH 2/3] Ensure the allocator shim never participates in LTO

Making it participate in LTO would be incorrect if you compile a crate
as both a dylib (which needs it) and rlib (which must not include it) in
the same rustc invocation. With linker plugin LTO, the allocator shim
will still participate in LTO as it is safe to do so in that case.
---
 compiler/rustc_codegen_ssa/src/back/write.rs | 20 +++++++++++---------
 compiler/rustc_codegen_ssa/src/base.rs       | 11 ++---------
 2 files changed, 13 insertions(+), 18 deletions(-)

diff --git a/compiler/rustc_codegen_ssa/src/back/write.rs b/compiler/rustc_codegen_ssa/src/back/write.rs
index 8586615f7c764..d6f289b3eee71 100644
--- a/compiler/rustc_codegen_ssa/src/back/write.rs
+++ b/compiler/rustc_codegen_ssa/src/back/write.rs
@@ -809,19 +809,12 @@ pub(crate) fn compute_per_cgu_lto_type(
     sess_lto: &Lto,
     opts: &config::Options,
     sess_crate_types: &[CrateType],
-    module_kind: ModuleKind,
 ) -> ComputedLtoType {
     // If the linker does LTO, we don't have to do it. Note that we
     // keep doing full LTO, if it is requested, as not to break the
     // assumption that the output will be a single module.
     let linker_does_lto = opts.cg.linker_plugin_lto.enabled();
 
-    // When we're automatically doing ThinLTO for multi-codegen-unit
-    // builds we don't actually want to LTO the allocator modules if
-    // it shows up. This is due to various linker shenanigans that
-    // we'll encounter later.
-    let is_allocator = module_kind == ModuleKind::Allocator;
-
     // We ignore a request for full crate graph LTO if the crate type
     // is only an rlib, as there is no full crate graph to process,
     // that'll happen later.
@@ -833,7 +826,7 @@ pub(crate) fn compute_per_cgu_lto_type(
     let is_rlib = matches!(sess_crate_types, [CrateType::Rlib]);
 
     match sess_lto {
-        Lto::ThinLocal if !linker_does_lto && !is_allocator => ComputedLtoType::Thin,
+        Lto::ThinLocal if !linker_does_lto => ComputedLtoType::Thin,
         Lto::Thin if !linker_does_lto && !is_rlib => ComputedLtoType::Thin,
         Lto::Fat if !is_rlib => ComputedLtoType::Fat,
         _ => ComputedLtoType::No,
@@ -855,7 +848,16 @@ fn execute_optimize_work_item<B: ExtraBackendMethods>(
     // back to the coordinator thread for further LTO processing (which
     // has to wait for all the initial modules to be optimized).
 
-    let lto_type = compute_per_cgu_lto_type(&cgcx.lto, &cgcx.opts, &cgcx.crate_types, module.kind);
+    // When we're automatically doing ThinLTO for multi-codegen-unit
+    // builds we don't actually want to LTO the allocator modules if
+    // it shows up. This is due to various linker shenanigans that
+    // we'll encounter later.
+    if module.kind == ModuleKind::Allocator {
+        let module = B::codegen(cgcx, module, module_config);
+        return WorkItemResult::Finished(module);
+    }
+
+    let lto_type = compute_per_cgu_lto_type(&cgcx.lto, &cgcx.opts, &cgcx.crate_types);
 
     // If we're doing some form of incremental LTO then we need to be sure to
     // save our module to disk first.
diff --git a/compiler/rustc_codegen_ssa/src/base.rs b/compiler/rustc_codegen_ssa/src/base.rs
index 97cdf8b697348..071bd09249ae3 100644
--- a/compiler/rustc_codegen_ssa/src/base.rs
+++ b/compiler/rustc_codegen_ssa/src/base.rs
@@ -46,9 +46,7 @@ use crate::meth::load_vtable;
 use crate::mir::operand::OperandValue;
 use crate::mir::place::PlaceRef;
 use crate::traits::*;
-use crate::{
-    CachedModuleCodegen, CodegenLintLevels, CrateInfo, ModuleCodegen, ModuleKind, errors, meth, mir,
-};
+use crate::{CachedModuleCodegen, CodegenLintLevels, CrateInfo, ModuleCodegen, errors, meth, mir};
 
 pub(crate) fn bin_op_to_icmp_predicate(op: BinOp, signed: bool) -> IntPredicate {
     match (op, signed) {
@@ -1135,12 +1133,7 @@ pub fn determine_cgu_reuse<'tcx>(tcx: TyCtxt<'tcx>, cgu: &CodegenUnit<'tcx>) ->
         // We can re-use either the pre- or the post-thinlto state. If no LTO is
         // being performed then we can use post-LTO artifacts, otherwise we must
         // reuse pre-LTO artifacts
-        match compute_per_cgu_lto_type(
-            &tcx.sess.lto(),
-            &tcx.sess.opts,
-            tcx.crate_types(),
-            ModuleKind::Regular,
-        ) {
+        match compute_per_cgu_lto_type(&tcx.sess.lto(), &tcx.sess.opts, tcx.crate_types()) {
             ComputedLtoType::No => CguReuse::PostLto,
             _ => CguReuse::PreLto,
         }

From 319fe230f0d960b343be31a1182dc0f10753156c Mon Sep 17 00:00:00 2001
From: bjorn3 <17426603+bjorn3@users.noreply.github.com>
Date: Thu, 28 Aug 2025 10:26:29 +0000
Subject: [PATCH 3/3] Special case allocator module submission to avoid special
 casing it elsewhere

A lot of places had special handling just in case they would get an
allocator module even though most of these places could never get one or
would have a trivial implementation for the allocator module. Moving all
handling of the allocator module to a single place simplifies things a
fair bit.
---
 .../rustc_codegen_cranelift/src/driver/aot.rs |  11 +-
 compiler/rustc_codegen_gcc/src/back/lto.rs    |   2 +-
 compiler/rustc_codegen_llvm/src/back/lto.rs   |  22 +--
 compiler/rustc_codegen_ssa/src/back/write.rs  | 135 +++++++-----------
 compiler/rustc_codegen_ssa/src/base.rs        |  53 +++----
 compiler/rustc_codegen_ssa/src/lib.rs         |   2 -
 6 files changed, 79 insertions(+), 146 deletions(-)

diff --git a/compiler/rustc_codegen_cranelift/src/driver/aot.rs b/compiler/rustc_codegen_cranelift/src/driver/aot.rs
index 7e77781dc2fc1..c3adb5e767e25 100644
--- a/compiler/rustc_codegen_cranelift/src/driver/aot.rs
+++ b/compiler/rustc_codegen_cranelift/src/driver/aot.rs
@@ -12,9 +12,7 @@ use cranelift_object::{ObjectBuilder, ObjectModule};
 use rustc_codegen_ssa::assert_module_sources::CguReuse;
 use rustc_codegen_ssa::back::link::ensure_removed;
 use rustc_codegen_ssa::base::determine_cgu_reuse;
-use rustc_codegen_ssa::{
-    CodegenResults, CompiledModule, CrateInfo, ModuleKind, errors as ssa_errors,
-};
+use rustc_codegen_ssa::{CodegenResults, CompiledModule, CrateInfo, errors as ssa_errors};
 use rustc_data_structures::profiling::SelfProfilerRef;
 use rustc_data_structures::stable_hasher::{HashStable, StableHasher};
 use rustc_data_structures::sync::{IntoDynSyncSend, par_map};
@@ -363,7 +361,6 @@ fn emit_cgu(
         invocation_temp,
         prof,
         product.object,
-        ModuleKind::Regular,
         name.clone(),
         producer,
     )?;
@@ -372,7 +369,6 @@ fn emit_cgu(
         module_regular,
         module_global_asm: global_asm_object_file.map(|global_asm_object_file| CompiledModule {
             name: format!("{name}.asm"),
-            kind: ModuleKind::Regular,
             object: Some(global_asm_object_file),
             dwarf_object: None,
             bytecode: None,
@@ -389,7 +385,6 @@ fn emit_module(
     invocation_temp: Option<&str>,
     prof: &SelfProfilerRef,
     mut object: cranelift_object::object::write::Object<'_>,
-    kind: ModuleKind,
     name: String,
     producer_str: &str,
 ) -> Result<CompiledModule, String> {
@@ -430,7 +425,6 @@ fn emit_module(
 
     Ok(CompiledModule {
         name,
-        kind,
         object: Some(tmp_file),
         dwarf_object: None,
         bytecode: None,
@@ -485,7 +479,6 @@ fn reuse_workproduct_for_cgu(
     Ok(ModuleCodegenResult {
         module_regular: CompiledModule {
             name: cgu.name().to_string(),
-            kind: ModuleKind::Regular,
             object: Some(obj_out_regular),
             dwarf_object: None,
             bytecode: None,
@@ -495,7 +488,6 @@ fn reuse_workproduct_for_cgu(
         },
         module_global_asm: source_file_global_asm.map(|source_file| CompiledModule {
             name: cgu.name().to_string(),
-            kind: ModuleKind::Regular,
             object: Some(obj_out_global_asm),
             dwarf_object: None,
             bytecode: None,
@@ -651,7 +643,6 @@ fn emit_allocator_module(tcx: TyCtxt<'_>) -> Option<CompiledModule> {
             tcx.sess.invocation_temp.as_deref(),
             &tcx.sess.prof,
             product.object,
-            ModuleKind::Allocator,
             "allocator_shim".to_owned(),
             &crate::debuginfo::producer(tcx.sess),
         ) {
diff --git a/compiler/rustc_codegen_gcc/src/back/lto.rs b/compiler/rustc_codegen_gcc/src/back/lto.rs
index fcee6b6df6234..9d8ce2383f2b1 100644
--- a/compiler/rustc_codegen_gcc/src/back/lto.rs
+++ b/compiler/rustc_codegen_gcc/src/back/lto.rs
@@ -204,7 +204,7 @@ fn fat_lto(
             let path = tmp_path.path().to_path_buf().join(&module.name);
             let path = path.to_str().expect("path");
             let context = &module.module_llvm.context;
-            let config = cgcx.config(module.kind);
+            let config = &cgcx.module_config;
             // NOTE: we need to set the optimization level here in order for LTO to do its job.
             context.set_optimization_level(to_gcc_opt_level(config.opt_level));
             context.add_command_line_option("-flto=auto");
diff --git a/compiler/rustc_codegen_llvm/src/back/lto.rs b/compiler/rustc_codegen_llvm/src/back/lto.rs
index fc38c4f3e5138..326b876e7e689 100644
--- a/compiler/rustc_codegen_llvm/src/back/lto.rs
+++ b/compiler/rustc_codegen_llvm/src/back/lto.rs
@@ -11,7 +11,7 @@ use object::{Object, ObjectSection};
 use rustc_codegen_ssa::back::lto::{SerializedModule, ThinModule, ThinShared};
 use rustc_codegen_ssa::back::write::{CodegenContext, FatLtoInput};
 use rustc_codegen_ssa::traits::*;
-use rustc_codegen_ssa::{ModuleCodegen, ModuleKind, looks_like_rust_object_file};
+use rustc_codegen_ssa::{ModuleCodegen, looks_like_rust_object_file};
 use rustc_data_structures::fx::FxHashMap;
 use rustc_data_structures::memmap::Mmap;
 use rustc_errors::DiagCtxtHandle;
@@ -43,9 +43,7 @@ fn prepare_lto(
         .map(|symbol| CString::new(symbol.to_owned()).unwrap())
         .collect::<Vec<CString>>();
 
-    if cgcx.regular_module_config.instrument_coverage
-        || cgcx.regular_module_config.pgo_gen.enabled()
-    {
+    if cgcx.module_config.instrument_coverage || cgcx.module_config.pgo_gen.enabled() {
         // These are weak symbols that point to the profile version and the
         // profile name, which need to be treated as exported so LTO doesn't nix
         // them.
@@ -55,15 +53,15 @@ fn prepare_lto(
         symbols_below_threshold.extend(PROFILER_WEAK_SYMBOLS.iter().map(|&sym| sym.to_owned()));
     }
 
-    if cgcx.regular_module_config.sanitizer.contains(SanitizerSet::MEMORY) {
+    if cgcx.module_config.sanitizer.contains(SanitizerSet::MEMORY) {
         let mut msan_weak_symbols = Vec::new();
 
         // Similar to profiling, preserve weak msan symbol during LTO.
-        if cgcx.regular_module_config.sanitizer_recover.contains(SanitizerSet::MEMORY) {
+        if cgcx.module_config.sanitizer_recover.contains(SanitizerSet::MEMORY) {
             msan_weak_symbols.push(c"__msan_keep_going");
         }
 
-        if cgcx.regular_module_config.sanitizer_memory_track_origins != 0 {
+        if cgcx.module_config.sanitizer_memory_track_origins != 0 {
             msan_weak_symbols.push(c"__msan_track_origins");
         }
 
@@ -227,15 +225,9 @@ fn fat_lto(
     // All the other modules will be serialized and reparsed into the new
     // context, so this hopefully avoids serializing and parsing the largest
     // codegen unit.
-    //
-    // Additionally use a regular module as the base here to ensure that various
-    // file copy operations in the backend work correctly. The only other kind
-    // of module here should be an allocator one, and if your crate is smaller
-    // than the allocator module then the size doesn't really matter anyway.
     let costliest_module = in_memory
         .iter()
         .enumerate()
-        .filter(|&(_, module)| module.kind == ModuleKind::Regular)
         .map(|(i, module)| {
             let cost = unsafe { llvm::LLVMRustModuleCost(module.module_llvm.llmod()) };
             (cost, i)
@@ -583,7 +575,7 @@ pub(crate) fn run_pass_manager(
     thin: bool,
 ) {
     let _timer = cgcx.prof.generic_activity_with_arg("LLVM_lto_optimize", &*module.name);
-    let config = cgcx.config(module.kind);
+    let config = &cgcx.module_config;
 
     // Now we have one massive module inside of llmod. Time to run the
     // LTO-specific optimization passes that LLVM provides.
@@ -745,7 +737,7 @@ pub(crate) fn optimize_thin_module(
     let module_llvm = ModuleLlvm::parse(cgcx, module_name, thin_module.data(), dcx);
     let mut module = ModuleCodegen::new_regular(thin_module.name(), module_llvm);
     // Given that the newly created module lacks a thinlto buffer for embedding, we need to re-add it here.
-    if cgcx.config(ModuleKind::Regular).embed_bitcode() {
+    if cgcx.module_config.embed_bitcode() {
         module.thin_lto_buffer = Some(thin_module.data().to_vec());
     }
     {
diff --git a/compiler/rustc_codegen_ssa/src/back/write.rs b/compiler/rustc_codegen_ssa/src/back/write.rs
index d6f289b3eee71..f637e7f58dbf7 100644
--- a/compiler/rustc_codegen_ssa/src/back/write.rs
+++ b/compiler/rustc_codegen_ssa/src/back/write.rs
@@ -333,8 +333,7 @@ pub struct CodegenContext<B: WriteBackendMethods> {
     pub crate_types: Vec<CrateType>,
     pub output_filenames: Arc<OutputFilenames>,
     pub invocation_temp: Option<String>,
-    pub regular_module_config: Arc<ModuleConfig>,
-    pub allocator_module_config: Arc<ModuleConfig>,
+    pub module_config: Arc<ModuleConfig>,
     pub tm_factory: TargetMachineFactoryFn<B>,
     pub msvc_imps_needed: bool,
     pub is_pe_coff: bool,
@@ -372,13 +371,6 @@ impl<B: WriteBackendMethods> CodegenContext<B> {
     pub fn create_dcx(&self) -> DiagCtxt {
         DiagCtxt::new(Box::new(self.diag_emitter.clone()))
     }
-
-    pub fn config(&self, kind: ModuleKind) -> &ModuleConfig {
-        match kind {
-            ModuleKind::Regular => &self.regular_module_config,
-            ModuleKind::Allocator => &self.allocator_module_config,
-        }
-    }
 }
 
 fn generate_thin_lto_work<B: ExtraBackendMethods>(
@@ -442,6 +434,7 @@ pub(crate) fn start_async_codegen<B: ExtraBackendMethods>(
     backend: B,
     tcx: TyCtxt<'_>,
     target_cpu: String,
+    allocator_module: Option<ModuleCodegen<B::Module>>,
 ) -> OngoingCodegen<B> {
     let (coordinator_send, coordinator_receive) = channel();
 
@@ -465,6 +458,7 @@ pub(crate) fn start_async_codegen<B: ExtraBackendMethods>(
         coordinator_receive,
         Arc::new(regular_config),
         Arc::new(allocator_config),
+        allocator_module,
         coordinator_send.clone(),
     );
 
@@ -495,7 +489,7 @@ fn copy_all_cgu_workproducts_to_incr_comp_cache_dir(
 
     let _timer = sess.timer("copy_all_cgu_workproducts_to_incr_comp_cache_dir");
 
-    for module in compiled_modules.modules.iter().filter(|m| m.kind == ModuleKind::Regular) {
+    for module in &compiled_modules.modules {
         let mut files = Vec::new();
         if let Some(object_file_path) = &module.object {
             files.push((OutputType::Object.extension(), object_file_path.as_path()));
@@ -720,15 +714,6 @@ pub(crate) enum WorkItem<B: WriteBackendMethods> {
 }
 
 impl<B: WriteBackendMethods> WorkItem<B> {
-    fn module_kind(&self) -> ModuleKind {
-        match *self {
-            WorkItem::Optimize(ref m) => m.kind,
-            WorkItem::CopyPostLtoArtifacts(_) | WorkItem::FatLto { .. } | WorkItem::ThinLto(_) => {
-                ModuleKind::Regular
-            }
-        }
-    }
-
     /// Generate a short description of this work item suitable for use as a thread name.
     fn short_description(&self) -> String {
         // `pthread_setname()` on *nix ignores anything beyond the first 15
@@ -836,32 +821,22 @@ pub(crate) fn compute_per_cgu_lto_type(
 fn execute_optimize_work_item<B: ExtraBackendMethods>(
     cgcx: &CodegenContext<B>,
     mut module: ModuleCodegen<B::Module>,
-    module_config: &ModuleConfig,
 ) -> WorkItemResult<B> {
     let dcx = cgcx.create_dcx();
     let dcx = dcx.handle();
 
-    B::optimize(cgcx, dcx, &mut module, module_config);
+    B::optimize(cgcx, dcx, &mut module, &cgcx.module_config);
 
     // After we've done the initial round of optimizations we need to
     // decide whether to synchronously codegen this module or ship it
     // back to the coordinator thread for further LTO processing (which
     // has to wait for all the initial modules to be optimized).
 
-    // When we're automatically doing ThinLTO for multi-codegen-unit
-    // builds we don't actually want to LTO the allocator modules if
-    // it shows up. This is due to various linker shenanigans that
-    // we'll encounter later.
-    if module.kind == ModuleKind::Allocator {
-        let module = B::codegen(cgcx, module, module_config);
-        return WorkItemResult::Finished(module);
-    }
-
     let lto_type = compute_per_cgu_lto_type(&cgcx.lto, &cgcx.opts, &cgcx.crate_types);
 
     // If we're doing some form of incremental LTO then we need to be sure to
     // save our module to disk first.
-    let bitcode = if cgcx.config(module.kind).emit_pre_lto_bc {
+    let bitcode = if cgcx.module_config.emit_pre_lto_bc {
         let filename = pre_lto_bitcode_filename(&module.name);
         cgcx.incr_comp_session_dir.as_ref().map(|path| path.join(&filename))
     } else {
@@ -870,7 +845,7 @@ fn execute_optimize_work_item<B: ExtraBackendMethods>(
 
     match lto_type {
         ComputedLtoType::No => {
-            let module = B::codegen(cgcx, module, module_config);
+            let module = B::codegen(cgcx, module, &cgcx.module_config);
             WorkItemResult::Finished(module)
         }
         ComputedLtoType::Thin => {
@@ -901,7 +876,6 @@ fn execute_optimize_work_item<B: ExtraBackendMethods>(
 fn execute_copy_from_cache_work_item<B: ExtraBackendMethods>(
     cgcx: &CodegenContext<B>,
     module: CachedModuleCodegen,
-    module_config: &ModuleConfig,
 ) -> WorkItemResult<B> {
     let incr_comp_session_dir = cgcx.incr_comp_session_dir.as_ref().unwrap();
 
@@ -961,6 +935,7 @@ fn execute_copy_from_cache_work_item<B: ExtraBackendMethods>(
         }
     };
 
+    let module_config = &cgcx.module_config;
     let should_emit_obj = module_config.emit_obj != EmitObj::None;
     let assembly = load_from_incr_cache(module_config.emit_asm, OutputType::Assembly);
     let llvm_ir = load_from_incr_cache(module_config.emit_ir, OutputType::LlvmAssembly);
@@ -973,7 +948,6 @@ fn execute_copy_from_cache_work_item<B: ExtraBackendMethods>(
     WorkItemResult::Finished(CompiledModule {
         links_from_incr_cache,
         name: module.name,
-        kind: ModuleKind::Regular,
         object,
         dwarf_object,
         bytecode,
@@ -988,7 +962,6 @@ fn execute_fat_lto_work_item<B: ExtraBackendMethods>(
     each_linked_rlib_for_lto: &[PathBuf],
     mut needs_fat_lto: Vec<FatLtoInput<B>>,
     import_only_modules: Vec<(SerializedModule<B::ModuleBuffer>, WorkProduct)>,
-    module_config: &ModuleConfig,
 ) -> WorkItemResult<B> {
     for (module, wp) in import_only_modules {
         needs_fat_lto.push(FatLtoInput::Serialized { name: wp.cgu_name, buffer: module })
@@ -1000,17 +973,16 @@ fn execute_fat_lto_work_item<B: ExtraBackendMethods>(
         each_linked_rlib_for_lto,
         needs_fat_lto,
     );
-    let module = B::codegen(cgcx, module, module_config);
+    let module = B::codegen(cgcx, module, &cgcx.module_config);
     WorkItemResult::Finished(module)
 }
 
 fn execute_thin_lto_work_item<B: ExtraBackendMethods>(
     cgcx: &CodegenContext<B>,
     module: lto::ThinModule<B>,
-    module_config: &ModuleConfig,
 ) -> WorkItemResult<B> {
     let module = B::optimize_thin(cgcx, module);
-    let module = B::codegen(cgcx, module, module_config);
+    let module = B::codegen(cgcx, module, &cgcx.module_config);
     WorkItemResult::Finished(module)
 }
 
@@ -1095,6 +1067,7 @@ fn start_executing_work<B: ExtraBackendMethods>(
     coordinator_receive: Receiver<Message<B>>,
     regular_config: Arc<ModuleConfig>,
     allocator_config: Arc<ModuleConfig>,
+    allocator_module: Option<ModuleCodegen<B::Module>>,
     tx_to_llvm_workers: Sender<Message<B>>,
 ) -> thread::JoinHandle<Result<CompiledModules, ()>> {
     let coordinator_send = tx_to_llvm_workers;
@@ -1159,8 +1132,7 @@ fn start_executing_work<B: ExtraBackendMethods>(
         expanded_args: tcx.sess.expanded_args.clone(),
         diag_emitter: shared_emitter.clone(),
         output_filenames: Arc::clone(tcx.output_filenames(())),
-        regular_module_config: regular_config,
-        allocator_module_config: allocator_config,
+        module_config: regular_config,
         tm_factory: backend.target_machine_factory(tcx.sess, ol, backend_features),
         msvc_imps_needed: msvc_imps_needed(tcx),
         is_pe_coff: tcx.sess.target.is_like_windows,
@@ -1175,6 +1147,11 @@ fn start_executing_work<B: ExtraBackendMethods>(
         invocation_temp: sess.invocation_temp.clone(),
     };
 
+    let compiled_allocator_module = allocator_module.map(|mut allocator_module| {
+        B::optimize(&cgcx, tcx.sess.dcx(), &mut allocator_module, &allocator_config);
+        B::codegen(&cgcx, allocator_module, &allocator_config)
+    });
+
     // This is the "main loop" of parallel work happening for parallel codegen.
     // It's here that we manage parallelism, schedule work, and work with
     // messages coming from clients.
@@ -1314,7 +1291,6 @@ fn start_executing_work<B: ExtraBackendMethods>(
         // This is where we collect codegen units that have gone all the way
         // through codegen and LLVM.
         let mut compiled_modules = vec![];
-        let mut compiled_allocator_module = None;
         let mut needs_fat_lto = Vec::new();
         let mut needs_thin_lto = Vec::new();
         let mut lto_import_only_modules = Vec::new();
@@ -1588,15 +1564,7 @@ fn start_executing_work<B: ExtraBackendMethods>(
 
                     match result {
                         Ok(WorkItemResult::Finished(compiled_module)) => {
-                            match compiled_module.kind {
-                                ModuleKind::Regular => {
-                                    compiled_modules.push(compiled_module);
-                                }
-                                ModuleKind::Allocator => {
-                                    assert!(compiled_allocator_module.is_none());
-                                    compiled_allocator_module = Some(compiled_module);
-                                }
-                            }
+                            compiled_modules.push(compiled_module);
                         }
                         Ok(WorkItemResult::NeedsFatLto(fat_lto_input)) => {
                             assert!(!started_lto);
@@ -1724,45 +1692,38 @@ fn spawn_work<'a, B: ExtraBackendMethods>(
     let cgcx = cgcx.clone();
 
     B::spawn_named_thread(cgcx.time_trace, work.short_description(), move || {
-        let result = std::panic::catch_unwind(AssertUnwindSafe(|| {
-            let module_config = cgcx.config(work.module_kind());
-
-            match work {
-                WorkItem::Optimize(m) => {
-                    let _timer =
-                        cgcx.prof.generic_activity_with_arg("codegen_module_optimize", &*m.name);
-                    execute_optimize_work_item(&cgcx, m, module_config)
-                }
-                WorkItem::CopyPostLtoArtifacts(m) => {
-                    let _timer = cgcx.prof.generic_activity_with_arg(
-                        "codegen_copy_artifacts_from_incr_cache",
-                        &*m.name,
-                    );
-                    execute_copy_from_cache_work_item(&cgcx, m, module_config)
-                }
-                WorkItem::FatLto {
-                    exported_symbols_for_lto,
-                    each_linked_rlib_for_lto,
+        let result = std::panic::catch_unwind(AssertUnwindSafe(|| match work {
+            WorkItem::Optimize(m) => {
+                let _timer =
+                    cgcx.prof.generic_activity_with_arg("codegen_module_optimize", &*m.name);
+                execute_optimize_work_item(&cgcx, m)
+            }
+            WorkItem::CopyPostLtoArtifacts(m) => {
+                let _timer = cgcx
+                    .prof
+                    .generic_activity_with_arg("codegen_copy_artifacts_from_incr_cache", &*m.name);
+                execute_copy_from_cache_work_item(&cgcx, m)
+            }
+            WorkItem::FatLto {
+                exported_symbols_for_lto,
+                each_linked_rlib_for_lto,
+                needs_fat_lto,
+                import_only_modules,
+            } => {
+                let _timer =
+                    cgcx.prof.generic_activity_with_arg("codegen_module_perform_lto", "everything");
+                execute_fat_lto_work_item(
+                    &cgcx,
+                    &exported_symbols_for_lto,
+                    &each_linked_rlib_for_lto,
                     needs_fat_lto,
                     import_only_modules,
-                } => {
-                    let _timer = cgcx
-                        .prof
-                        .generic_activity_with_arg("codegen_module_perform_lto", "everything");
-                    execute_fat_lto_work_item(
-                        &cgcx,
-                        &exported_symbols_for_lto,
-                        &each_linked_rlib_for_lto,
-                        needs_fat_lto,
-                        import_only_modules,
-                        module_config,
-                    )
-                }
-                WorkItem::ThinLto(m) => {
-                    let _timer =
-                        cgcx.prof.generic_activity_with_arg("codegen_module_perform_lto", m.name());
-                    execute_thin_lto_work_item(&cgcx, m, module_config)
-                }
+                )
+            }
+            WorkItem::ThinLto(m) => {
+                let _timer =
+                    cgcx.prof.generic_activity_with_arg("codegen_module_perform_lto", m.name());
+                execute_thin_lto_work_item(&cgcx, m)
             }
         }));
 
diff --git a/compiler/rustc_codegen_ssa/src/base.rs b/compiler/rustc_codegen_ssa/src/base.rs
index 071bd09249ae3..a9a2ae1b3dbca 100644
--- a/compiler/rustc_codegen_ssa/src/base.rs
+++ b/compiler/rustc_codegen_ssa/src/base.rs
@@ -662,7 +662,7 @@ pub fn codegen_crate<B: ExtraBackendMethods>(
 ) -> OngoingCodegen<B> {
     // Skip crate items and just output metadata in -Z no-codegen mode.
     if tcx.sess.opts.unstable_opts.no_codegen || !tcx.sess.opts.output_types.should_codegen() {
-        let ongoing_codegen = start_async_codegen(backend, tcx, target_cpu);
+        let ongoing_codegen = start_async_codegen(backend, tcx, target_cpu, None);
 
         ongoing_codegen.codegen_finished(tcx);
 
@@ -693,7 +693,27 @@ pub fn codegen_crate<B: ExtraBackendMethods>(
         }
     }
 
-    let ongoing_codegen = start_async_codegen(backend.clone(), tcx, target_cpu);
+    // Codegen an allocator shim, if necessary.
+    let allocator_module = if let Some(kind) = allocator_kind_for_codegen(tcx) {
+        let llmod_id =
+            cgu_name_builder.build_cgu_name(LOCAL_CRATE, &["crate"], Some("allocator")).to_string();
+
+        tcx.sess.time("write_allocator_module", || {
+            let module = backend.codegen_allocator(
+                tcx,
+                &llmod_id,
+                kind,
+                // If allocator_kind is Some then alloc_error_handler_kind must
+                // also be Some.
+                tcx.alloc_error_handler_kind(()).unwrap(),
+            );
+            Some(ModuleCodegen::new_allocator(llmod_id, module))
+        })
+    } else {
+        None
+    };
+
+    let ongoing_codegen = start_async_codegen(backend.clone(), tcx, target_cpu, allocator_module);
 
     // For better throughput during parallel processing by LLVM, we used to sort
     // CGUs largest to smallest. This would lead to better thread utilization
@@ -810,35 +830,6 @@ pub fn codegen_crate<B: ExtraBackendMethods>(
         }
     }
 
-    // Codegen an allocator shim, if necessary.
-    // Do this last to ensure the LLVM_passes timer doesn't start while no CGUs have been codegened
-    // yet for the backend to optimize.
-    if let Some(kind) = allocator_kind_for_codegen(tcx) {
-        let llmod_id =
-            cgu_name_builder.build_cgu_name(LOCAL_CRATE, &["crate"], Some("allocator")).to_string();
-        let module_llvm = tcx.sess.time("write_allocator_module", || {
-            backend.codegen_allocator(
-                tcx,
-                &llmod_id,
-                kind,
-                // If allocator_kind is Some then alloc_error_handler_kind must
-                // also be Some.
-                tcx.alloc_error_handler_kind(()).unwrap(),
-            )
-        });
-
-        ongoing_codegen.wait_for_signal_to_codegen_item();
-        ongoing_codegen.check_for_errors(tcx.sess);
-
-        // These modules are generally cheap and won't throw off scheduling.
-        let cost = 0;
-        submit_codegened_module_to_llvm(
-            &ongoing_codegen.coordinator,
-            ModuleCodegen::new_allocator(llmod_id, module_llvm),
-            cost,
-        );
-    }
-
     ongoing_codegen.codegen_finished(tcx);
 
     // Since the main thread is sometimes blocked during codegen, we keep track
diff --git a/compiler/rustc_codegen_ssa/src/lib.rs b/compiler/rustc_codegen_ssa/src/lib.rs
index fe0500a5d4cf7..5b90ffaa05666 100644
--- a/compiler/rustc_codegen_ssa/src/lib.rs
+++ b/compiler/rustc_codegen_ssa/src/lib.rs
@@ -120,7 +120,6 @@ impl<M> ModuleCodegen<M> {
 
         CompiledModule {
             name: self.name.clone(),
-            kind: self.kind,
             object,
             dwarf_object,
             bytecode,
@@ -134,7 +133,6 @@ impl<M> ModuleCodegen<M> {
 #[derive(Debug, Encodable, Decodable)]
 pub struct CompiledModule {
     pub name: String,
-    pub kind: ModuleKind,
     pub object: Option<PathBuf>,
     pub dwarf_object: Option<PathBuf>,
     pub bytecode: Option<PathBuf>,