@@ -20,7 +20,7 @@ use tracing::debug;
2020
2121use crate :: dep_graph:: { DepNode , WorkProduct , WorkProductId } ;
2222use crate :: middle:: codegen_fn_attrs:: CodegenFnAttrFlags ;
23- use crate :: ty:: { GenericArgs , Instance , InstanceKind , SymbolName , TyCtxt } ;
23+ use crate :: ty:: { self , GenericArgs , Instance , InstanceKind , SymbolName , Ty , TyCtxt } ;
2424
2525/// Describes how a monomorphization will be instantiated in object files.
2626#[ derive( PartialEq ) ]
@@ -54,6 +54,39 @@ pub enum MonoItem<'tcx> {
5454 GlobalAsm ( ItemId ) ,
5555}
5656
57+ fn opt_incr_drop_glue_mode < ' tcx > ( tcx : TyCtxt < ' tcx > , ty : Ty < ' tcx > ) -> InstantiationMode {
58+ // Non-ADTs can't have a Drop impl. This case is mostly hit by closures whose captures require
59+ // dropping.
60+ let ty:: Adt ( adt_def, _) = ty. kind ( ) else {
61+ return InstantiationMode :: LocalCopy ;
62+ } ;
63+
64+ // Types that don't have a direct Drop impl, but have fields that require dropping.
65+ let Some ( dtor) = adt_def. destructor ( tcx) else {
66+ // We use LocalCopy for drops of enums only; this code is inherited from
67+ // https://github.com/rust-lang/rust/pull/67332 and the theory is that we get to optimize
68+ // out code like drop_in_place(Option::None) before crate-local ThinLTO, which improves
69+ // compile time. At the time of writing, simply removing this entire check does seem to
70+ // regress incr-opt compile times. But it sure seems like a more sophisticated check could
71+ // do better here.
72+ if adt_def. is_enum ( ) {
73+ return InstantiationMode :: LocalCopy ;
74+ } else {
75+ return InstantiationMode :: GloballyShared { may_conflict : true } ;
76+ }
77+ } ;
78+
79+ // We've gotten to a drop_in_place for a type that directly implements Drop.
80+ // The drop glue is a wrapper for the Drop::drop impl, and we are an optimized build, so in an
81+ // effort to coordinate with the mode that the actual impl will get, we make the glue also
82+ // LocalCopy.
83+ if tcx. cross_crate_inlinable ( dtor. did ) {
84+ InstantiationMode :: LocalCopy
85+ } else {
86+ InstantiationMode :: GloballyShared { may_conflict : true }
87+ }
88+ }
89+
5790impl < ' tcx > MonoItem < ' tcx > {
5891 /// Returns `true` if the mono item is user-defined (i.e. not compiler-generated, like shims).
5992 pub fn is_user_defined ( & self ) -> bool {
@@ -123,16 +156,36 @@ impl<'tcx> MonoItem<'tcx> {
123156 return InstantiationMode :: GloballyShared { may_conflict : false } ;
124157 }
125158
126- // FIXME: The logic for which functions are permitted to get LocalCopy is actually spread
127- // across 4 functions:
128- // * cross_crate_inlinable(def_id)
129- // * InstanceKind::requires_inline
130- // * InstanceKind::generate_cgu_internal_copy
131- // * MonoItem::instantiation_mode
132- // Since reachable_non_generics calls InstanceKind::generates_cgu_internal_copy to decide
133- // which symbols this crate exports, we are obligated to only generate LocalCopy when
134- // generates_cgu_internal_copy returns true.
135- if !instance. def . generates_cgu_internal_copy ( tcx) {
159+ // This is technically a heuristic even though it's in the "not a heuristic" part of
160+ // instantiation mode selection.
161+ // It is surely possible to untangle this; the root problem is that the way we instantiate
162+ // InstanceKind other than Item is very complicated.
163+ //
164+ // The fallback case is to give everything else GloballyShared at OptLevel::No and
165+ // LocalCopy at all other opt levels. This is a good default, except for one specific build
166+ // configuration: Optimized incremental builds.
167+ // In the current compiler architecture there is a fundamental tension between
168+ // optimizations (which want big CGUs with as many things LocalCopy as possible) and
169+ // incrementality (which wants small CGUs with as many things GloballyShared as possible).
170+ // The heuristics implemented here do better than a completely naive approach in the
171+ // compiler benchmark suite, but there is no reason to believe they are optimal.
172+ if let InstanceKind :: DropGlue ( _, Some ( ty) ) = instance. def {
173+ if tcx. sess . opts . optimize == OptLevel :: No {
174+ return InstantiationMode :: GloballyShared { may_conflict : false } ;
175+ }
176+ if tcx. sess . opts . incremental . is_none ( ) {
177+ return InstantiationMode :: LocalCopy ;
178+ }
179+ return opt_incr_drop_glue_mode ( tcx, ty) ;
180+ }
181+
182+ // We need to ensure that we do not decide the InstantiationMode of an exported symbol is
183+ // LocalCopy. Since exported symbols are computed based on the output of
184+ // cross_crate_inlinable, we are beholden to our previous decisions.
185+ //
186+ // Note that just like above, this check for requires_inline is technically a heuristic
187+ // even though it's in the "not a heuristic" part of instantiation mode selection.
188+ if !tcx. cross_crate_inlinable ( instance. def_id ( ) ) && !instance. def . requires_inline ( tcx) {
136189 return InstantiationMode :: GloballyShared { may_conflict : false } ;
137190 }
138191
0 commit comments