@@ -20,6 +20,8 @@ use rustc_data_structures::sharded::Sharded;
2020use rustc_data_structures:: thin_vec:: ThinVec ;
2121#[ cfg( not( parallel_compiler) ) ]
2222use rustc_data_structures:: cold_path;
23+ #[ cfg( parallel_compiler) ]
24+ use rustc_data_structures:: profiling:: TimingGuard ;
2325use std:: hash:: { Hash , Hasher } ;
2426use std:: mem;
2527use std:: ptr;
@@ -91,6 +93,19 @@ impl<'a, 'tcx, Q: QueryDescription<'tcx>> JobOwner<'a, 'tcx, Q> {
9193 /// for some compile-time benchmarks.
9294 #[ inline( always) ]
9395 pub ( super ) fn try_get ( tcx : TyCtxt < ' tcx > , span : Span , key : & Q :: Key ) -> TryGetJob < ' a , ' tcx , Q > {
96+ // Handling the `query_blocked_prof_timer` is a bit weird because of the
97+ // control flow in this function: Blocking is implemented by
98+ // awaiting a running job and, once that is done, entering the loop below
99+ // again from the top. In that second iteration we will hit the
100+ // cache which provides us with the information we need for
101+ // finishing the "query-blocked" event.
102+ //
103+ // We thus allocate `query_blocked_prof_timer` outside the loop,
104+ // initialize it during the first iteration and finish it during the
105+ // second iteration.
106+ #[ cfg( parallel_compiler) ]
107+ let mut query_blocked_prof_timer: Option < TimingGuard < ' _ > > = None ;
108+
94109 let cache = Q :: query_cache ( tcx) ;
95110 loop {
96111 // We compute the key's hash once and then use it for both the
@@ -104,7 +119,17 @@ impl<'a, 'tcx, Q: QueryDescription<'tcx>> JobOwner<'a, 'tcx, Q> {
104119 if let Some ( ( _, value) ) =
105120 lock. results . raw_entry ( ) . from_key_hashed_nocheck ( key_hash, key)
106121 {
107- tcx. prof . query_cache_hit ( value. index . into ( ) ) ;
122+ if unlikely ! ( tcx. prof. enabled( ) ) {
123+ tcx. prof . query_cache_hit ( value. index . into ( ) ) ;
124+
125+ #[ cfg( parallel_compiler) ]
126+ {
127+ if let Some ( prof_timer) = query_blocked_prof_timer. take ( ) {
128+ prof_timer. finish_with_query_invocation_id ( value. index . into ( ) ) ;
129+ }
130+ }
131+ }
132+
108133 let result = ( value. value . clone ( ) , value. index ) ;
109134 #[ cfg( debug_assertions) ]
110135 {
@@ -113,9 +138,6 @@ impl<'a, 'tcx, Q: QueryDescription<'tcx>> JobOwner<'a, 'tcx, Q> {
113138 return TryGetJob :: JobCompleted ( result) ;
114139 }
115140
116- #[ cfg( parallel_compiler) ]
117- let query_blocked_prof_timer;
118-
119141 let job = match lock. active . entry ( ( * key) . clone ( ) ) {
120142 Entry :: Occupied ( entry) => {
121143 match * entry. get ( ) {
@@ -125,7 +147,7 @@ impl<'a, 'tcx, Q: QueryDescription<'tcx>> JobOwner<'a, 'tcx, Q> {
125147 // self-profiler.
126148 #[ cfg( parallel_compiler) ]
127149 {
128- query_blocked_prof_timer = tcx. prof . query_blocked ( Q :: NAME ) ;
150+ query_blocked_prof_timer = Some ( tcx. prof . query_blocked ( ) ) ;
129151 }
130152
131153 job. clone ( )
@@ -169,11 +191,6 @@ impl<'a, 'tcx, Q: QueryDescription<'tcx>> JobOwner<'a, 'tcx, Q> {
169191 {
170192 let result = job. r#await ( tcx, span) ;
171193
172- // This `drop()` is not strictly necessary as the binding
173- // would go out of scope anyway. But it's good to have an
174- // explicit marker of how far the measurement goes.
175- drop ( query_blocked_prof_timer) ;
176-
177194 if let Err ( cycle) = result {
178195 return TryGetJob :: Cycle ( Q :: handle_cycle_error ( tcx, cycle) ) ;
179196 }
0 commit comments