self-profile: Fix issue with handling query blocking.

michaelwoerister · michaelwoerister · commit 6848ed2d6504 · 2020-01-10T10:19:39.000+01:00
diff --git a/src/librustc/ty/query/plumbing.rs b/src/librustc/ty/query/plumbing.rs
@@ -13,6 +13,8 @@ use errors::{struct_span_err, Diagnostic, DiagnosticBuilder, FatalError, Handler
 #[cfg(not(parallel_compiler))]
 use rustc_data_structures::cold_path;
 use rustc_data_structures::fx::{FxHashMap, FxHasher};
+#[cfg(parallel_compiler)]
+use rustc_data_structures::profiling::TimingGuard;
 use rustc_data_structures::sharded::Sharded;
 use rustc_data_structures::sync::{Lock, Lrc};
 use rustc_data_structures::thin_vec::ThinVec;
@@ -82,6 +84,19 @@ impl<'a, 'tcx, Q: QueryDescription<'tcx>> JobOwner<'a, 'tcx, Q> {
     /// for some compile-time benchmarks.
     #[inline(always)]
     pub(super) fn try_get(tcx: TyCtxt<'tcx>, span: Span, key: &Q::Key) -> TryGetJob<'a, 'tcx, Q> {
+        // Handling the `query_blocked_prof_timer` is a bit weird because of the
+        // control flow in this function: Blocking is implemented by
+        // awaiting a running job and, once that is done, entering the loop below
+        // again from the top. In that second iteration we will hit the
+        // cache which provides us with the information we need for
+        // finishing the "query-blocked" event.
+        //
+        // We thus allocate `query_blocked_prof_timer` outside the loop,
+        // initialize it during the first iteration and finish it during the
+        // second iteration.
+        #[cfg(parallel_compiler)]
+        let mut query_blocked_prof_timer: Option<TimingGuard<'_>> = None;
+
         let cache = Q::query_cache(tcx);
         loop {
             // We compute the key's hash once and then use it for both the
@@ -95,7 +110,17 @@ impl<'a, 'tcx, Q: QueryDescription<'tcx>> JobOwner<'a, 'tcx, Q> {
             if let Some((_, value)) =
                 lock.results.raw_entry().from_key_hashed_nocheck(key_hash, key)
             {
-                tcx.prof.query_cache_hit(value.index.into());
+                if unlikely!(tcx.prof.enabled()) {
+                    tcx.prof.query_cache_hit(value.index.into());
+
+                    #[cfg(parallel_compiler)]
+                    {
+                        if let Some(prof_timer) = query_blocked_prof_timer.take() {
+                            prof_timer.finish_with_query_invocation_id(value.index.into());
+                        }
+                    }
+                }
+
                 let result = (value.value.clone(), value.index);
                 #[cfg(debug_assertions)]
                 {
@@ -104,9 +129,6 @@ impl<'a, 'tcx, Q: QueryDescription<'tcx>> JobOwner<'a, 'tcx, Q> {
                 return TryGetJob::JobCompleted(result);
             }
 
-            #[cfg(parallel_compiler)]
-            let query_blocked_prof_timer;
-
             let job = match lock.active.entry((*key).clone()) {
                 Entry::Occupied(entry) => {
                     match *entry.get() {
@@ -116,7 +138,7 @@ impl<'a, 'tcx, Q: QueryDescription<'tcx>> JobOwner<'a, 'tcx, Q> {
                             // self-profiler.
                             #[cfg(parallel_compiler)]
                             {
-                                query_blocked_prof_timer = tcx.prof.query_blocked(Q::NAME);
+                                query_blocked_prof_timer = Some(tcx.prof.query_blocked());
                             }
 
                             job.clone()
@@ -153,11 +175,6 @@ impl<'a, 'tcx, Q: QueryDescription<'tcx>> JobOwner<'a, 'tcx, Q> {
             {
                 let result = job.r#await(tcx, span);
 
-                // This `drop()` is not strictly necessary as the binding
-                // would go out of scope anyway. But it's good to have an
-                // explicit marker of how far the measurement goes.
-                drop(query_blocked_prof_timer);
-
                 if let Err(cycle) = result {
                     return TryGetJob::Cycle(Q::handle_cycle_error(tcx, cycle));
                 }
diff --git a/src/librustc/ty/query/profiling_support.rs b/src/librustc/ty/query/profiling_support.rs
@@ -1,4 +1,3 @@
-
 use crate::hir::def_id::{CRATE_DEF_INDEX, CrateNum, DefId, DefIndex, LOCAL_CRATE};
 use crate::hir::map::definitions::DefPathData;
 use crate::ty::context::TyCtxt;
diff --git a/src/librustc_data_structures/profiling.rs b/src/librustc_data_structures/profiling.rs
@@ -339,6 +339,7 @@ impl SelfProfilerRef {
         }
     }
 
+    #[inline]
     pub fn enabled(&self) -> bool {
         self.profiler.is_some()
     }

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,3 @@`
`1`		`-`
`2`	`1`	`use crate::hir::def_id::{CRATE_DEF_INDEX, CrateNum, DefId, DefIndex, LOCAL_CRATE};`
`3`	`2`	`use crate::hir::map::definitions::DefPathData;`
`4`	`3`	`use crate::ty::context::TyCtxt;`
Original file line number	Diff line number	Diff line change
`@@ -339,6 +339,7 @@ impl SelfProfilerRef {`
`339`	`339`	`}`
`340`	`340`	`}`
`341`	`341`
	`342`	`+ #[inline]`
`342`	`343`	`pub fn enabled(&self) -> bool {`
`343`	`344`	`self.profiler.is_some()`
`344`	`345`	`}`