@@ -656,30 +656,91 @@ impl Session {
656656 return n as usize
657657 }
658658
659+ // Why is 16 codegen units the default all the time?
660+ //
661+ // The main reason for enabling multiple codegen units by default is to
662+ // leverage the ability for the trans backend to do translation and
663+ // codegen in parallel. This allows us, especially for large crates, to
664+ // make good use of all available resources on the machine once we've
665+ // hit that stage of compilation. Large crates especially then often
666+ // take a long time in trans/codegen and this helps us amortize that
667+ // cost.
668+ //
669+ // Note that a high number here doesn't mean that we'll be spawning a
670+ // large number of threads in parallel. The backend of rustc contains
671+ // global rate limiting through the `jobserver` crate so we'll never
672+ // overload the system with too much work, but rather we'll only be
673+ // optimizing when we're otherwise cooperating with other instances of
674+ // rustc.
675+ //
676+ // Rather a high number here means that we should be able to keep a lot
677+ // of idle cpus busy. By ensuring that no codegen unit takes *too* long
678+ // to build we'll be guaranteed that all cpus will finish pretty closely
679+ // to one another and we should make relatively optimal use of system
680+ // resources
681+ //
682+ // Note that the main cost of codegen units is that it prevents LLVM
683+ // from inlining across codegen units. Users in general don't have a lot
684+ // of control over how codegen units are split up so it's our job in the
685+ // compiler to ensure that undue performance isn't lost when using
686+ // codegen units (aka we can't require everyone to slap `#[inline]` on
687+ // everything).
688+ //
689+ // If we're compiling at `-O0` then the number doesn't really matter too
690+ // much because performance doesn't matter and inlining is ok to lose.
691+ // In debug mode we just want to try to guarantee that no cpu is stuck
692+ // doing work that could otherwise be farmed to others.
693+ //
694+ // In release mode, however (O1 and above) performance does indeed
695+ // matter! To recover the loss in performance due to inlining we'll be
696+ // enabling ThinLTO by default (the function for which is just below).
697+ // This will ensure that we recover any inlining wins we otherwise lost
698+ // through codegen unit partitioning.
699+ //
700+ // ---
701+ //
702+ // Ok that's a lot of words but the basic tl;dr; is that we want a high
703+ // number here -- but not too high. Additionally we're "safe" to have it
704+ // always at the same number at all optimization levels.
705+ //
706+ // As a result 16 was chosen here! Mostly because it was a power of 2
707+ // and most benchmarks agreed it was roughly a local optimum. Not very
708+ // scientific.
659709 match self . opts . optimize {
660- // If we're compiling at `-O0` then default to 16 codegen units.
661- // The number here shouldn't matter too too much as debug mode
662- // builds don't rely on performance at all, meaning that lost
663- // opportunities for inlining through multiple codegen units is
664- // a non-issue.
665- //
666- // Note that the high number here doesn't mean that we'll be
667- // spawning a large number of threads in parallel. The backend
668- // of rustc contains global rate limiting through the
669- // `jobserver` crate so we'll never overload the system with too
670- // much work, but rather we'll only be optimizing when we're
671- // otherwise cooperating with other instances of rustc.
672- //
673- // Rather the high number here means that we should be able to
674- // keep a lot of idle cpus busy. By ensuring that no codegen
675- // unit takes *too* long to build we'll be guaranteed that all
676- // cpus will finish pretty closely to one another and we should
677- // make relatively optimal use of system resources
678710 config:: OptLevel :: No => 16 ,
711+ _ => 1 , // FIXME(#46346) this should be 16
712+ }
713+ }
679714
680- // All other optimization levels default use one codegen unit,
681- // the historical default in Rust for a Long Time.
682- _ => 1 ,
715+ /// Returns whether ThinLTO is enabled for this compilation
716+ pub fn thinlto ( & self ) -> bool {
717+ // If processing command line options determined that we're incompatible
718+ // with ThinLTO (e.g. `-C lto --emit llvm-ir`) then return that option.
719+ if let Some ( enabled) = self . opts . cli_forced_thinlto {
720+ return enabled
721+ }
722+
723+ // If explicitly specified, use that with the next highest priority
724+ if let Some ( enabled) = self . opts . debugging_opts . thinlto {
725+ return enabled
726+ }
727+
728+ // If there's only one codegen unit and LTO isn't enabled then there's
729+ // no need for ThinLTO so just return false.
730+ if self . codegen_units ( ) == 1 && !self . lto ( ) {
731+ return false
732+ }
733+
734+ // Right now ThinLTO isn't compatible with incremental compilation.
735+ if self . opts . incremental . is_some ( ) {
736+ return false
737+ }
738+
739+ // Now we're in "defaults" territory. By default we enable ThinLTO for
740+ // optimized compiles (anything greater than O0).
741+ match self . opts . optimize {
742+ config:: OptLevel :: No => false ,
743+ _ => true ,
683744 }
684745 }
685746}
0 commit comments