@@ -477,8 +477,8 @@ fn remove_cycle(
477477/// Detects query cycles by using depth first search over all active query jobs.
478478/// If a query cycle is found it will break the cycle by finding an edge which
479479/// uses a query latch and then resuming that waiter.
480- /// There may be multiple cycles involved in a deadlock, so this searches
481- /// all active queries for cycles before finally resuming all the waiters at once.
480+ /// There may be multiple cycles involved in a deadlock, but we only search
481+ /// one cycle at a call and resume one waiter at once. See `FIXME` below .
482482pub fn break_query_cycles ( query_map : QueryMap , registry : & rayon_core:: Registry ) {
483483 let mut wakelist = Vec :: new ( ) ;
484484 let mut jobs: Vec < QueryJobId > = query_map. keys ( ) . cloned ( ) . collect ( ) ;
@@ -488,6 +488,18 @@ pub fn break_query_cycles(query_map: QueryMap, registry: &rayon_core::Registry)
488488 while jobs. len ( ) > 0 {
489489 if remove_cycle ( & query_map, & mut jobs, & mut wakelist) {
490490 found_cycle = true ;
491+
492+ // FIXME(#137731): We can encounter deadlocks for cycles we can't break here,
493+ // but it's still unclear whether it's due to possible issues in rustc-rayon
494+ // or instead in the handling of query cycles. We can avoid them by only waking
495+ // up a single waiter instead of all of them. The deadlock issues seem to only
496+ // appear when multiple query cycles errors are involved, so this reduction in
497+ // parallelism, while suboptimal, is not universal and only the deadlock handler
498+ // will encounter these cases. The workaround shows loss of potential gains,
499+ // but there still are big improvements in the common case, and no regressions
500+ // compared to the single-threaded case. More investigation is still needed,
501+ // and once fixed, we can wake up all the waiters up.
502+ break ;
491503 }
492504 }
493505
@@ -506,7 +518,7 @@ pub fn break_query_cycles(query_map: QueryMap, registry: &rayon_core::Registry)
506518 ) ;
507519 }
508520
509- // FIXME: Ensure this won't cause a deadlock before we return
521+ // FIXME: Ensure this won't cause a deadlock if we resume all waiters at once.
510522 for waiter in wakelist. into_iter ( ) {
511523 waiter. notify ( registry) ;
512524 }
0 commit comments