@@ -43,7 +43,7 @@ use crate::hir;
4343use rustc_data_structures:: bit_set:: GrowableBitSet ;
4444use rustc_data_structures:: sync:: Lock ;
4545use rustc_target:: spec:: abi:: Abi ;
46- use std:: cell:: Cell ;
46+ use std:: cell:: { Cell , RefCell } ;
4747use std:: cmp;
4848use std:: fmt:: { self , Display } ;
4949use std:: iter;
@@ -191,7 +191,6 @@ struct TraitObligationStack<'prev, 'tcx: 'prev> {
191191
192192 /// Depth-first number of this node in the search graph -- a
193193 /// pre-order index. Basically a freshly incremented counter.
194- #[ allow( dead_code) ] // TODO
195194 dfn : usize ,
196195}
197196
@@ -880,6 +879,12 @@ impl<'cx, 'gcx, 'tcx> SelectionContext<'cx, 'gcx, 'tcx> {
880879 return Ok ( result) ;
881880 }
882881
882+ if let Some ( result) = stack. cache ( ) . get_provisional ( fresh_trait_ref) {
883+ debug ! ( "PROVISIONAL CACHE HIT: EVAL({:?})={:?}" , fresh_trait_ref, result) ;
884+ stack. update_reached_depth ( stack. cache ( ) . current_reached_depth ( ) ) ;
885+ return Ok ( result) ;
886+ }
887+
883888 // Check if this is a match for something already on the
884889 // stack. If so, we don't want to insert the result into the
885890 // main cache (it is cycle dependent) nor the provisional
@@ -892,20 +897,42 @@ impl<'cx, 'gcx, 'tcx> SelectionContext<'cx, 'gcx, 'tcx> {
892897 let ( result, dep_node) = self . in_task ( |this| this. evaluate_stack ( & stack) ) ;
893898 let result = result?;
894899
900+ if !result. must_apply_modulo_regions ( ) {
901+ stack. cache ( ) . on_failure ( stack. dfn ) ;
902+ }
903+
895904 let reached_depth = stack. reached_depth . get ( ) ;
896905 if reached_depth >= stack. depth {
897906 debug ! ( "CACHE MISS: EVAL({:?})={:?}" , fresh_trait_ref, result) ;
898907 self . insert_evaluation_cache ( obligation. param_env , fresh_trait_ref, dep_node, result) ;
908+
909+ stack. cache ( ) . on_completion ( stack. depth , |fresh_trait_ref, provisional_result| {
910+ self . insert_evaluation_cache (
911+ obligation. param_env ,
912+ fresh_trait_ref,
913+ dep_node,
914+ provisional_result. max ( result) ,
915+ ) ;
916+ } ) ;
899917 } else {
918+ debug ! ( "PROVISIONAL: {:?}={:?}" , fresh_trait_ref, result) ;
900919 debug ! (
901- "evaluate_trait_predicate_recursively: skipping cache because {:?} \
920+ "evaluate_trait_predicate_recursively: caching provisionally because {:?} \
902921 is a cycle participant (at depth {}, reached depth {})",
903922 fresh_trait_ref,
904923 stack. depth,
905924 reached_depth,
906925 ) ;
926+
927+ stack. cache ( ) . insert_provisional (
928+ stack. dfn ,
929+ reached_depth,
930+ fresh_trait_ref,
931+ result,
932+ ) ;
907933 }
908934
935+
909936 Ok ( result)
910937 }
911938
@@ -4004,18 +4031,179 @@ impl<'o, 'tcx> TraitObligationStack<'o, 'tcx> {
40044031 }
40054032}
40064033
4034+ /// The "provisional evaluation cache" is used to store intermediate cache results
4035+ /// when solving auto traits. Auto traits are unusual in that they can support
4036+ /// cycles. So, for example, a "proof tree" like this would be ok:
4037+ ///
4038+ /// - `Foo<T>: Send` :-
4039+ /// - `Bar<T>: Send` :-
4040+ /// - `Foo<T>: Send` -- cycle, but ok
4041+ /// - `Baz<T>: Send`
4042+ ///
4043+ /// Here, to prove `Foo<T>: Send`, we have to prove `Bar<T>: Send` and
4044+ /// `Baz<T>: Send`. Proving `Bar<T>: Send` in turn required `Foo<T>: Send`.
4045+ /// For non-auto traits, this cycle would be an error, but for auto traits (because
4046+ /// they are coinductive) it is considered ok.
4047+ ///
4048+ /// However, there is a complication: at the point where we have
4049+ /// "proven" `Bar<T>: Send`, we have in fact only proven it
4050+ /// *provisionally*. In particular, we proved that `Bar<T>: Send`
4051+ /// *under the assumption* that `Foo<T>: Send`. But what if we later
4052+ /// find out this assumption is wrong? Specifically, we could
4053+ /// encounter some kind of error proving `Baz<T>: Send`. In that case,
4054+ /// `Bar<T>: Send` didn't turn out to be true.
4055+ ///
4056+ /// In Issue #60010, we found a bug in rustc where it would cache
4057+ /// these intermediate results. This was fixed in #60444 by disabling
4058+ /// *all* caching for things involved in a cycle -- in our example,
4059+ /// that would mean we don't cache that `Bar<T>: Send`. But this led
4060+ /// to large slowdowns.
4061+ ///
4062+ /// Specifically, imagine this scenario, where proving `Baz<T>: Send`
4063+ /// first requires proving `Bar<T>: Send` (which is true:
4064+ ///
4065+ /// - `Foo<T>: Send` :-
4066+ /// - `Bar<T>: Send` :-
4067+ /// - `Foo<T>: Send` -- cycle, but ok
4068+ /// - `Baz<T>: Send`
4069+ /// - `Bar<T>: Send` -- would be nice for this to be a cache hit!
4070+ /// - `*const T: Send` -- but what if we later encounter an error?
4071+ ///
4072+ /// The *provisional evaluation cache* resolves this issue. It stores
4073+ /// cache results that we've proven but which were involved in a cycle
4074+ /// in some way. We track the minimal stack depth (i.e., the
4075+ /// farthest from the top of the stack) that we are dependent on.
4076+ /// The idea is that the cache results within are all valid -- so long as
4077+ /// none of the nodes in between the current node and the node at that minimum
4078+ /// depth result in an error (in which case the cached results are just thrown away).
4079+ ///
4080+ /// During evaluation, we consult this provisional cache and rely on
4081+ /// it. Accessing a cached value is considered equivalent to accessing
4082+ /// a result at `reached_depth`, so it marks the *current* solution as
4083+ /// provisional as well. If an error is encountered, we toss out any
4084+ /// provisional results added from the subtree that encountered the
4085+ /// error. When we pop the node at `reached_depth` from the stack, we
4086+ /// can commit all the things that remain in the provisional cache.
40074087#[ derive( Default ) ]
40084088struct ProvisionalEvaluationCache < ' tcx > {
4089+ /// next "depth first number" to issue -- just a counter
40094090 dfn : Cell < usize > ,
4010- _dummy : Vec < & ' tcx ( ) > ,
4091+
4092+ /// Stores the "coldest" depth (bottom of stack) reached by any of
4093+ /// the evaluation entries. The idea here is that all things in the provisional
4094+ /// cache are always dependent on *something* that is colder in the stack:
4095+ /// therefore, if we add a new entry that is dependent on something *colder still*,
4096+ /// we have to modify the depth for all entries at once.
4097+ ///
4098+ /// Example:
4099+ ///
4100+ /// Imagine we have a stack `A B C D E` (with `E` being the top of
4101+ /// the stack). We cache something with depth 2, which means that
4102+ /// it was dependent on C. Then we pop E but go on and process a
4103+ /// new node F: A B C D F. Now F adds something to the cache with
4104+ /// depth 1, meaning it is dependent on B. Our original cache
4105+ /// entry is also dependent on B, because there is a path from E
4106+ /// to C and then from C to F and from F to B.
4107+ reached_depth : Cell < usize > ,
4108+
4109+ /// Map from cache key to the provisionally evaluated thing.
4110+ /// The cache entries contain the result but also the DFN in which they
4111+ /// were added. The DFN is used to clear out values on failure.
4112+ ///
4113+ /// Imagine we have a stack like:
4114+ ///
4115+ /// - `A B C` and we add a cache for the result of C (DFN 2)
4116+ /// - Then we have a stack `A B D` where `D` has DFN 3
4117+ /// - We try to solve D by evaluating E: `A B D E` (DFN 4)
4118+ /// - `E` generates various cache entries which have cyclic dependices on `B`
4119+ /// - `A B D E F` and so forth
4120+ /// - the DFN of `F` for example would be 5
4121+ /// - then we determine that `E` is in error -- we will then clear
4122+ /// all cache values whose DFN is >= 4 -- in this case, that
4123+ /// means the cached value for `F`.
4124+ map : RefCell < FxHashMap < ty:: PolyTraitRef < ' tcx > , ProvisionalEvaluation > > ,
4125+ }
4126+
4127+ /// A cache value for the provisional cache: contains the depth-first
4128+ /// number (DFN) and result.
4129+ #[ derive( Copy , Clone ) ]
4130+ struct ProvisionalEvaluation {
4131+ from_dfn : usize ,
4132+ result : EvaluationResult ,
40114133}
40124134
40134135impl < ' tcx > ProvisionalEvaluationCache < ' tcx > {
4136+ /// Get the next DFN in sequence (basically a counter).
40144137 fn next_dfn ( & self ) -> usize {
40154138 let result = self . dfn . get ( ) ;
40164139 self . dfn . set ( result + 1 ) ;
40174140 result
40184141 }
4142+
4143+ /// Check the provisional cache for any result for
4144+ /// `fresh_trait_ref`. If there is a hit, then you must consider
4145+ /// it an access to the stack slots at depth
4146+ /// `self.current_reached_depth()` and above.
4147+ fn get_provisional ( & self , fresh_trait_ref : ty:: PolyTraitRef < ' tcx > ) -> Option < EvaluationResult > {
4148+ Some ( self . map . borrow ( ) . get ( & fresh_trait_ref) ?. result )
4149+ }
4150+
4151+ /// Current value of the `reached_depth` counter -- all the
4152+ /// provisional cache entries are dependent on the item at this
4153+ /// depth.
4154+ fn current_reached_depth ( & self ) -> usize {
4155+ self . reached_depth . get ( )
4156+ }
4157+
4158+ /// Insert a provisional result into the cache. The result came
4159+ /// from the node with the given DFN. It accessed a minimum depth
4160+ /// of `reached_depth` to compute. It evaluated `fresh_trait_ref`
4161+ /// and resulted in `result`.
4162+ fn insert_provisional (
4163+ & self ,
4164+ from_dfn : usize ,
4165+ reached_depth : usize ,
4166+ fresh_trait_ref : ty:: PolyTraitRef < ' tcx > ,
4167+ result : EvaluationResult ,
4168+ ) {
4169+ let r_d = self . reached_depth . get ( ) ;
4170+ self . reached_depth . set ( r_d. min ( reached_depth) ) ;
4171+
4172+ self . map . borrow_mut ( ) . insert ( fresh_trait_ref, ProvisionalEvaluation { from_dfn, result } ) ;
4173+ }
4174+
4175+ /// Invoked when the node with dfn `dfn` does not get a successful
4176+ /// result. This will clear out any provisional cache entries
4177+ /// that were added since `dfn` was created. This is because the
4178+ /// provisional entries are things which must assume that the
4179+ /// things on the stack at the time of their creation succeeded --
4180+ /// since the failing node is presently at the top of the stack,
4181+ /// these provisional entries must either depend on it or some
4182+ /// ancestor of it.
4183+ fn on_failure ( & self , dfn : usize ) {
4184+ self . map . borrow_mut ( ) . retain ( |_key, eval| eval. from_dfn >= dfn)
4185+ }
4186+
4187+ /// Invoked when the node at depth `depth` completed without
4188+ /// depending on anything higher in the stack (if that completion
4189+ /// was a failure, then `on_failure` should have been invoked
4190+ /// already). The callback `op` will be invoked for each
4191+ /// provisional entry that we can now confirm.
4192+ fn on_completion (
4193+ & self ,
4194+ depth : usize ,
4195+ mut op : impl FnMut ( ty:: PolyTraitRef < ' tcx > , EvaluationResult ) ,
4196+ ) {
4197+ if self . reached_depth . get ( ) < depth {
4198+ return ;
4199+ }
4200+
4201+ for ( fresh_trait_ref, eval) in self . map . borrow_mut ( ) . drain ( ) {
4202+ op ( fresh_trait_ref, eval. result ) ;
4203+ }
4204+
4205+ self . reached_depth . set ( depth) ;
4206+ }
40194207}
40204208
40214209#[ derive( Copy , Clone ) ]
0 commit comments