@@ -27,7 +27,7 @@ use rustc_session::config::{
2727} ;
2828use rustc_span:: { BytePos , InnerSpan , Pos , SpanData , SyntaxContext , sym} ;
2929use rustc_target:: spec:: { CodeModel , FloatAbi , RelocModel , SanitizerSet , SplitDebuginfo , TlsModel } ;
30- use tracing:: debug;
30+ use tracing:: { debug, trace } ;
3131
3232use crate :: back:: lto:: ThinBuffer ;
3333use crate :: back:: owned_target_machine:: OwnedTargetMachine ;
@@ -537,9 +537,35 @@ pub(crate) unsafe fn llvm_optimize(
537537 config : & ModuleConfig ,
538538 opt_level : config:: OptLevel ,
539539 opt_stage : llvm:: OptStage ,
540+ skip_size_increasing_opts : bool ,
540541) -> Result < ( ) , FatalError > {
541- let unroll_loops =
542- opt_level != config:: OptLevel :: Size && opt_level != config:: OptLevel :: SizeMin ;
542+ // Enzyme:
543+ // The whole point of compiler based AD is to differentiate optimized IR instead of unoptimized
544+ // source code. However, benchmarks show that optimizations increasing the code size
545+ // tend to reduce AD performance. Therefore deactivate them before AD, then differentiate the code
546+ // and finally re-optimize the module, now with all optimizations available.
547+ // FIXME(ZuseZ4): In a future update we could figure out how to only optimize individual functions getting
548+ // differentiated.
549+
550+ let unroll_loops;
551+ let vectorize_slp;
552+ let vectorize_loop;
553+
554+ // When we build rustc with enzyme/autodiff support, we want to postpone size-increasing
555+ // optimizations until after differentiation. FIXME(ZuseZ4): Before shipping on nightly,
556+ // we should make this more granular, or at least check that the user has at least one autodiff
557+ // call in their code, to justify altering the compilation pipeline.
558+ if skip_size_increasing_opts && cfg ! ( llvm_enzyme) {
559+ unroll_loops = false ;
560+ vectorize_slp = false ;
561+ vectorize_loop = false ;
562+ } else {
563+ unroll_loops =
564+ opt_level != config:: OptLevel :: Size && opt_level != config:: OptLevel :: SizeMin ;
565+ vectorize_slp = config. vectorize_slp ;
566+ vectorize_loop = config. vectorize_loop ;
567+ }
568+ trace ! ( ?unroll_loops, ?vectorize_slp, ?vectorize_loop) ;
543569 let using_thin_buffers = opt_stage == llvm:: OptStage :: PreLinkThinLTO || config. bitcode_needed ( ) ;
544570 let pgo_gen_path = get_pgo_gen_path ( config) ;
545571 let pgo_use_path = get_pgo_use_path ( config) ;
@@ -603,8 +629,8 @@ pub(crate) unsafe fn llvm_optimize(
603629 using_thin_buffers,
604630 config. merge_functions ,
605631 unroll_loops,
606- config . vectorize_slp ,
607- config . vectorize_loop ,
632+ vectorize_slp,
633+ vectorize_loop,
608634 config. no_builtins ,
609635 config. emit_lifetime_markers ,
610636 sanitizer_options. as_ref ( ) ,
@@ -648,14 +674,29 @@ pub(crate) unsafe fn optimize(
648674 unsafe { llvm:: LLVMWriteBitcodeToFile ( llmod, out. as_ptr ( ) ) } ;
649675 }
650676
677+ // FIXME(ZuseZ4): support SanitizeHWAddress and prevent illegal/unsupported opts
678+
651679 if let Some ( opt_level) = config. opt_level {
652680 let opt_stage = match cgcx. lto {
653681 Lto :: Fat => llvm:: OptStage :: PreLinkFatLTO ,
654682 Lto :: Thin | Lto :: ThinLocal => llvm:: OptStage :: PreLinkThinLTO ,
655683 _ if cgcx. opts . cg . linker_plugin_lto . enabled ( ) => llvm:: OptStage :: PreLinkThinLTO ,
656684 _ => llvm:: OptStage :: PreLinkNoLTO ,
657685 } ;
658- return unsafe { llvm_optimize ( cgcx, dcx, module, config, opt_level, opt_stage) } ;
686+
687+ // If we know that we will later run AD, then we disable vectorization and loop unrolling
688+ let skip_size_increasing_opts = cfg ! ( llvm_enzyme) ;
689+ return unsafe {
690+ llvm_optimize (
691+ cgcx,
692+ dcx,
693+ module,
694+ config,
695+ opt_level,
696+ opt_stage,
697+ skip_size_increasing_opts,
698+ )
699+ } ;
659700 }
660701 Ok ( ( ) )
661702}
0 commit comments