@@ -13,6 +13,8 @@ use errors::{struct_span_err, Diagnostic, DiagnosticBuilder, FatalError, Handler
1313#[ cfg( not( parallel_compiler) ) ]
1414use rustc_data_structures:: cold_path;
1515use rustc_data_structures:: fx:: { FxHashMap , FxHasher } ;
16+ #[ cfg( parallel_compiler) ]
17+ use rustc_data_structures:: profiling:: TimingGuard ;
1618use rustc_data_structures:: sharded:: Sharded ;
1719use rustc_data_structures:: sync:: { Lock , Lrc } ;
1820use rustc_data_structures:: thin_vec:: ThinVec ;
@@ -82,6 +84,19 @@ impl<'a, 'tcx, Q: QueryDescription<'tcx>> JobOwner<'a, 'tcx, Q> {
8284 /// for some compile-time benchmarks.
8385 #[ inline( always) ]
8486 pub ( super ) fn try_get ( tcx : TyCtxt < ' tcx > , span : Span , key : & Q :: Key ) -> TryGetJob < ' a , ' tcx , Q > {
87+ // Handling the `query_blocked_prof_timer` is a bit weird because of the
88+ // control flow in this function: Blocking is implemented by
89+ // awaiting a running job and, once that is done, entering the loop below
90+ // again from the top. In that second iteration we will hit the
91+ // cache which provides us with the information we need for
92+ // finishing the "query-blocked" event.
93+ //
94+ // We thus allocate `query_blocked_prof_timer` outside the loop,
95+ // initialize it during the first iteration and finish it during the
96+ // second iteration.
97+ #[ cfg( parallel_compiler) ]
98+ let mut query_blocked_prof_timer: Option < TimingGuard < ' _ > > = None ;
99+
85100 let cache = Q :: query_cache ( tcx) ;
86101 loop {
87102 // We compute the key's hash once and then use it for both the
@@ -95,7 +110,17 @@ impl<'a, 'tcx, Q: QueryDescription<'tcx>> JobOwner<'a, 'tcx, Q> {
95110 if let Some ( ( _, value) ) =
96111 lock. results . raw_entry ( ) . from_key_hashed_nocheck ( key_hash, key)
97112 {
98- tcx. prof . query_cache_hit ( value. index . into ( ) ) ;
113+ if unlikely ! ( tcx. prof. enabled( ) ) {
114+ tcx. prof . query_cache_hit ( value. index . into ( ) ) ;
115+
116+ #[ cfg( parallel_compiler) ]
117+ {
118+ if let Some ( prof_timer) = query_blocked_prof_timer. take ( ) {
119+ prof_timer. finish_with_query_invocation_id ( value. index . into ( ) ) ;
120+ }
121+ }
122+ }
123+
99124 let result = ( value. value . clone ( ) , value. index ) ;
100125 #[ cfg( debug_assertions) ]
101126 {
@@ -104,9 +129,6 @@ impl<'a, 'tcx, Q: QueryDescription<'tcx>> JobOwner<'a, 'tcx, Q> {
104129 return TryGetJob :: JobCompleted ( result) ;
105130 }
106131
107- #[ cfg( parallel_compiler) ]
108- let query_blocked_prof_timer;
109-
110132 let job = match lock. active . entry ( ( * key) . clone ( ) ) {
111133 Entry :: Occupied ( entry) => {
112134 match * entry. get ( ) {
@@ -116,7 +138,7 @@ impl<'a, 'tcx, Q: QueryDescription<'tcx>> JobOwner<'a, 'tcx, Q> {
116138 // self-profiler.
117139 #[ cfg( parallel_compiler) ]
118140 {
119- query_blocked_prof_timer = tcx. prof . query_blocked ( Q :: NAME ) ;
141+ query_blocked_prof_timer = Some ( tcx. prof . query_blocked ( ) ) ;
120142 }
121143
122144 job. clone ( )
@@ -153,11 +175,6 @@ impl<'a, 'tcx, Q: QueryDescription<'tcx>> JobOwner<'a, 'tcx, Q> {
153175 {
154176 let result = job. r#await ( tcx, span) ;
155177
156- // This `drop()` is not strictly necessary as the binding
157- // would go out of scope anyway. But it's good to have an
158- // explicit marker of how far the measurement goes.
159- drop ( query_blocked_prof_timer) ;
160-
161178 if let Err ( cycle) = result {
162179 return TryGetJob :: Cycle ( Q :: handle_cycle_error ( tcx, cycle) ) ;
163180 }
0 commit comments