@@ -27,8 +27,9 @@ use rustc_session::config::{
2727} ;
2828use rustc_span:: { BytePos , InnerSpan , Pos , SpanData , SyntaxContext , sym} ;
2929use rustc_target:: spec:: { CodeModel , RelocModel , SanitizerSet , SplitDebuginfo , TlsModel } ;
30- use tracing:: debug;
30+ use tracing:: { debug, trace } ;
3131
32+ //use crate::back::autodiff::*;
3233use crate :: back:: lto:: ThinBuffer ;
3334use crate :: back:: owned_target_machine:: OwnedTargetMachine ;
3435use crate :: back:: profiling:: {
@@ -529,9 +530,35 @@ pub(crate) unsafe fn llvm_optimize(
529530 config : & ModuleConfig ,
530531 opt_level : config:: OptLevel ,
531532 opt_stage : llvm:: OptStage ,
533+ skip_size_increasing_opts : bool ,
532534) -> Result < ( ) , FatalError > {
533- let unroll_loops =
534- opt_level != config:: OptLevel :: Size && opt_level != config:: OptLevel :: SizeMin ;
535+ // Enzyme:
536+ // The whole point of compiler based AD is to differentiate optimized IR instead of unoptimized
537+ // source code. However, benchmarks show that optimizations increasing the code size
538+ // tend to reduce AD performance. Therefore deactivate them before AD, then differentiate the code
539+ // and finally re-optimize the module, now with all optimizations available.
540+ // FIXME(ZuseZ4): In a future update we could figure out how to only optimize individual functions getting
541+ // differentiated.
542+
543+ let unroll_loops;
544+ let vectorize_slp;
545+ let vectorize_loop;
546+
547+ // When we build rustc with enzyme/autodiff support, we want to postpone size-increasing
548+ // optimizations until after differentiation. FIXME(ZuseZ4): Before shipping on nightly,
549+ // we should make this more granular, or at least check that the user has at least one autodiff
550+ // call in their code, to justify altering the compilation pipeline.
551+ if skip_size_increasing_opts && cfg ! ( llvm_enzyme) {
552+ unroll_loops = false ;
553+ vectorize_slp = false ;
554+ vectorize_loop = false ;
555+ } else {
556+ unroll_loops =
557+ opt_level != config:: OptLevel :: Size && opt_level != config:: OptLevel :: SizeMin ;
558+ vectorize_slp = config. vectorize_slp ;
559+ vectorize_loop = config. vectorize_loop ;
560+ }
561+ trace ! ( ?unroll_loops, ?vectorize_slp, ?vectorize_loop) ;
535562 let using_thin_buffers = opt_stage == llvm:: OptStage :: PreLinkThinLTO || config. bitcode_needed ( ) ;
536563 let pgo_gen_path = get_pgo_gen_path ( config) ;
537564 let pgo_use_path = get_pgo_use_path ( config) ;
@@ -595,8 +622,8 @@ pub(crate) unsafe fn llvm_optimize(
595622 using_thin_buffers,
596623 config. merge_functions ,
597624 unroll_loops,
598- config . vectorize_slp ,
599- config . vectorize_loop ,
625+ vectorize_slp,
626+ vectorize_loop,
600627 config. no_builtins ,
601628 config. emit_lifetime_markers ,
602629 sanitizer_options. as_ref ( ) ,
@@ -640,14 +667,29 @@ pub(crate) unsafe fn optimize(
640667 unsafe { llvm:: LLVMWriteBitcodeToFile ( llmod, out. as_ptr ( ) ) } ;
641668 }
642669
670+ // FIXME(ZuseZ4): support SanitizeHWAddress and prevent illegal/unsupported opts
671+
643672 if let Some ( opt_level) = config. opt_level {
644673 let opt_stage = match cgcx. lto {
645674 Lto :: Fat => llvm:: OptStage :: PreLinkFatLTO ,
646675 Lto :: Thin | Lto :: ThinLocal => llvm:: OptStage :: PreLinkThinLTO ,
647676 _ if cgcx. opts . cg . linker_plugin_lto . enabled ( ) => llvm:: OptStage :: PreLinkThinLTO ,
648677 _ => llvm:: OptStage :: PreLinkNoLTO ,
649678 } ;
650- return unsafe { llvm_optimize ( cgcx, dcx, module, config, opt_level, opt_stage) } ;
679+
680+ // If we know that we will later run AD, then we disable vectorization and loop unrolling
681+ let skip_size_increasing_opts = cfg ! ( llvm_enzyme) ;
682+ return unsafe {
683+ llvm_optimize (
684+ cgcx,
685+ dcx,
686+ module,
687+ config,
688+ opt_level,
689+ opt_stage,
690+ skip_size_increasing_opts,
691+ )
692+ } ;
651693 }
652694 Ok ( ( ) )
653695}
0 commit comments