11use std:: assert_matches:: assert_matches;
2+ use std:: collections:: hash_map:: Entry ;
23use std:: fmt:: Debug ;
34use std:: hash:: Hash ;
45use std:: marker:: PhantomData ;
@@ -7,14 +8,15 @@ use std::sync::atomic::{AtomicU32, Ordering};
78
89use rustc_data_structures:: fingerprint:: Fingerprint ;
910use rustc_data_structures:: fx:: { FxHashMap , FxHashSet } ;
10- use rustc_data_structures:: profiling:: { QueryInvocationId , SelfProfilerRef } ;
11- use rustc_data_structures:: sharded:: { self , ShardedHashMap } ;
11+ use rustc_data_structures:: profiling:: QueryInvocationId ;
12+ use rustc_data_structures:: sharded:: { self , Sharded } ;
1213use rustc_data_structures:: stable_hasher:: { HashStable , StableHasher } ;
1314use rustc_data_structures:: sync:: { AtomicU64 , Lock } ;
1415use rustc_data_structures:: unord:: UnordMap ;
1516use rustc_index:: IndexVec ;
1617use rustc_macros:: { Decodable , Encodable } ;
1718use rustc_serialize:: opaque:: { FileEncodeResult , FileEncoder } ;
19+ use rustc_session:: Session ;
1820use tracing:: { debug, instrument} ;
1921#[ cfg( debug_assertions) ]
2022use { super :: debug:: EdgeFilter , std:: env} ;
@@ -118,7 +120,7 @@ where
118120
119121impl < D : Deps > DepGraph < D > {
120122 pub fn new (
121- profiler : & SelfProfilerRef ,
123+ session : & Session ,
122124 prev_graph : Arc < SerializedDepGraph > ,
123125 prev_work_products : WorkProductMap ,
124126 encoder : FileEncoder ,
@@ -128,7 +130,7 @@ impl<D: Deps> DepGraph<D> {
128130 let prev_graph_node_count = prev_graph. node_count ( ) ;
129131
130132 let current = CurrentDepGraph :: new (
131- profiler ,
133+ session ,
132134 prev_graph_node_count,
133135 encoder,
134136 record_graph,
@@ -353,12 +355,13 @@ impl<D: Deps> DepGraphData<D> {
353355 // in `DepGraph::try_mark_green()`.
354356 // 2. Two distinct query keys get mapped to the same `DepNode`
355357 // (see for example #48923).
356- assert ! (
357- ! self . dep_node_exists ( & key ) ,
358- "forcing query with already existing `DepNode`\n \
358+ self . assert_dep_node_not_yet_allocated_in_current_session ( & key , || {
359+ format ! (
360+ "forcing query with already existing `DepNode`\n \
359361 - query-key: {arg:?}\n \
360362 - dep-node: {key:?}"
361- ) ;
363+ )
364+ } ) ;
362365
363366 let with_deps = |task_deps| D :: with_deps ( task_deps, || task ( cx, arg) ) ;
364367 let ( result, edges) = if cx. dep_context ( ) . is_eval_always ( key. kind ) {
@@ -438,7 +441,31 @@ impl<D: Deps> DepGraphData<D> {
438441 hash : self . current . anon_id_seed . combine ( hasher. finish ( ) ) . into ( ) ,
439442 } ;
440443
441- self . current . intern_new_node ( target_dep_node, task_deps, Fingerprint :: ZERO )
444+ // The DepNodes generated by the process above are not unique. 2 queries could
445+ // have exactly the same dependencies. However, deserialization does not handle
446+ // duplicated nodes, so we do the deduplication here directly.
447+ //
448+ // As anonymous nodes are a small quantity compared to the full dep-graph, the
449+ // memory impact of this `anon_node_to_index` map remains tolerable, and helps
450+ // us avoid useless growth of the graph with almost-equivalent nodes.
451+ match self
452+ . current
453+ . anon_node_to_index
454+ . get_shard_by_value ( & target_dep_node)
455+ . lock ( )
456+ . entry ( target_dep_node)
457+ {
458+ Entry :: Occupied ( entry) => * entry. get ( ) ,
459+ Entry :: Vacant ( entry) => {
460+ let dep_node_index = self . current . intern_new_node (
461+ target_dep_node,
462+ task_deps,
463+ Fingerprint :: ZERO ,
464+ ) ;
465+ entry. insert ( dep_node_index) ;
466+ dep_node_index
467+ }
468+ }
442469 }
443470 } ;
444471
@@ -613,20 +640,22 @@ impl<D: Deps> DepGraph<D> {
613640}
614641
615642impl < D : Deps > DepGraphData < D > {
616- #[ inline]
617- fn dep_node_index_of_opt ( & self , dep_node : & DepNode ) -> Option < DepNodeIndex > {
643+ fn assert_dep_node_not_yet_allocated_in_current_session < S : std:: fmt:: Display > (
644+ & self ,
645+ dep_node : & DepNode ,
646+ msg : impl FnOnce ( ) -> S ,
647+ ) {
618648 if let Some ( prev_index) = self . previous . node_to_index_opt ( dep_node) {
619- self . current . prev_index_to_index . lock ( ) [ prev_index]
620- } else {
621- self . current . new_node_to_index . get ( dep_node)
649+ let current = self . current . prev_index_to_index . lock ( ) [ prev_index] ;
650+ assert ! ( current. is_none( ) , "{}" , msg( ) )
651+ } else if let Some ( nodes_newly_allocated_in_current_session) =
652+ & self . current . nodes_newly_allocated_in_current_session
653+ {
654+ let seen = nodes_newly_allocated_in_current_session. lock ( ) . contains ( dep_node) ;
655+ assert ! ( !seen, "{}" , msg( ) ) ;
622656 }
623657 }
624658
625- #[ inline]
626- fn dep_node_exists ( & self , dep_node : & DepNode ) -> bool {
627- self . dep_node_index_of_opt ( dep_node) . is_some ( )
628- }
629-
630659 fn node_color ( & self , dep_node : & DepNode ) -> Option < DepNodeColor > {
631660 if let Some ( prev_index) = self . previous . node_to_index_opt ( dep_node) {
632661 self . colors . get ( prev_index)
@@ -659,11 +688,6 @@ impl<D: Deps> DepGraphData<D> {
659688}
660689
661690impl < D : Deps > DepGraph < D > {
662- #[ inline]
663- pub fn dep_node_exists ( & self , dep_node : & DepNode ) -> bool {
664- self . data . as_ref ( ) . is_some_and ( |data| data. dep_node_exists ( dep_node) )
665- }
666-
667691 /// Checks whether a previous work product exists for `v` and, if
668692 /// so, return the path that leads to it. Used to skip doing work.
669693 pub fn previous_work_product ( & self , v : & WorkProductId ) -> Option < WorkProduct > {
@@ -926,6 +950,16 @@ impl<D: Deps> DepGraph<D> {
926950 self . node_color ( dep_node) . is_some_and ( |c| c. is_green ( ) )
927951 }
928952
953+ pub fn assert_dep_node_not_yet_allocated_in_current_session < S : std:: fmt:: Display > (
954+ & self ,
955+ dep_node : & DepNode ,
956+ msg : impl FnOnce ( ) -> S ,
957+ ) {
958+ if let Some ( data) = & self . data {
959+ data. assert_dep_node_not_yet_allocated_in_current_session ( dep_node, msg)
960+ }
961+ }
962+
929963 /// This method loads all on-disk cacheable query results into memory, so
930964 /// they can be written out to the new cache file again. Most query results
931965 /// will already be in memory but in the case where we marked something as
@@ -1031,24 +1065,24 @@ rustc_index::newtype_index! {
10311065/// largest in the compiler.
10321066///
10331067/// For this reason, we avoid storing `DepNode`s more than once as map
1034- /// keys. The `new_node_to_index ` map only contains nodes not in the previous
1068+ /// keys. The `anon_node_to_index ` map only contains nodes of anonymous queries not in the previous
10351069/// graph, and we map nodes in the previous graph to indices via a two-step
10361070/// mapping. `SerializedDepGraph` maps from `DepNode` to `SerializedDepNodeIndex`,
10371071/// and the `prev_index_to_index` vector (which is more compact and faster than
10381072/// using a map) maps from `SerializedDepNodeIndex` to `DepNodeIndex`.
10391073///
1040- /// This struct uses three locks internally. The `data`, `new_node_to_index `,
1074+ /// This struct uses three locks internally. The `data`, `anon_node_to_index `,
10411075/// and `prev_index_to_index` fields are locked separately. Operations that take
10421076/// a `DepNodeIndex` typically just access the `data` field.
10431077///
10441078/// We only need to manipulate at most two locks simultaneously:
1045- /// `new_node_to_index ` and `data`, or `prev_index_to_index` and `data`. When
1046- /// manipulating both, we acquire `new_node_to_index ` or `prev_index_to_index`
1079+ /// `anon_node_to_index ` and `data`, or `prev_index_to_index` and `data`. When
1080+ /// manipulating both, we acquire `anon_node_to_index ` or `prev_index_to_index`
10471081/// first, and `data` second.
10481082pub ( super ) struct CurrentDepGraph < D : Deps > {
10491083 encoder : GraphEncoder < D > ,
1050- new_node_to_index : ShardedHashMap < DepNode , DepNodeIndex > ,
10511084 prev_index_to_index : Lock < IndexVec < SerializedDepNodeIndex , Option < DepNodeIndex > > > ,
1085+ anon_node_to_index : Sharded < FxHashMap < DepNode , DepNodeIndex > > ,
10521086
10531087 /// This is used to verify that fingerprints do not change between the creation of a node
10541088 /// and its recomputation.
@@ -1060,6 +1094,13 @@ pub(super) struct CurrentDepGraph<D: Deps> {
10601094 #[ cfg( debug_assertions) ]
10611095 forbidden_edge : Option < EdgeFilter > ,
10621096
1097+ /// Used to verify the absence of hash collisions among DepNodes.
1098+ /// This field is only `Some` if the `-Z incremental_verify_ich` option is present.
1099+ ///
1100+ /// The map contains all DepNodes that have been allocated in the current session so far and
1101+ /// for which there is no equivalent in the previous session.
1102+ nodes_newly_allocated_in_current_session : Option < Lock < FxHashSet < DepNode > > > ,
1103+
10631104 /// Anonymous `DepNode`s are nodes whose IDs we compute from the list of
10641105 /// their edges. This has the beneficial side-effect that multiple anonymous
10651106 /// nodes can be coalesced into one without changing the semantics of the
@@ -1081,7 +1122,7 @@ pub(super) struct CurrentDepGraph<D: Deps> {
10811122
10821123impl < D : Deps > CurrentDepGraph < D > {
10831124 fn new (
1084- profiler : & SelfProfilerRef ,
1125+ session : & Session ,
10851126 prev_graph_node_count : usize ,
10861127 encoder : FileEncoder ,
10871128 record_graph : bool ,
@@ -1113,18 +1154,31 @@ impl<D: Deps> CurrentDepGraph<D> {
11131154 prev_graph_node_count,
11141155 record_graph,
11151156 record_stats,
1116- profiler ,
1157+ & session . prof ,
11171158 previous,
11181159 ) ,
1119- new_node_to_index : ShardedHashMap :: with_capacity (
1120- new_node_count_estimate / sharded:: shards ( ) ,
1121- ) ,
1160+ anon_node_to_index : Sharded :: new ( || {
1161+ FxHashMap :: with_capacity_and_hasher (
1162+ new_node_count_estimate / sharded:: shards ( ) ,
1163+ Default :: default ( ) ,
1164+ )
1165+ } ) ,
11221166 prev_index_to_index : Lock :: new ( IndexVec :: from_elem_n ( None , prev_graph_node_count) ) ,
11231167 anon_id_seed,
11241168 #[ cfg( debug_assertions) ]
11251169 forbidden_edge,
11261170 #[ cfg( debug_assertions) ]
11271171 fingerprints : Lock :: new ( IndexVec :: from_elem_n ( None , new_node_count_estimate) ) ,
1172+ nodes_newly_allocated_in_current_session : session
1173+ . opts
1174+ . unstable_opts
1175+ . incremental_verify_ich
1176+ . then ( || {
1177+ Lock :: new ( FxHashSet :: with_capacity_and_hasher (
1178+ new_node_count_estimate,
1179+ Default :: default ( ) ,
1180+ ) )
1181+ } ) ,
11281182 total_read_count : AtomicU64 :: new ( 0 ) ,
11291183 total_duplicate_read_count : AtomicU64 :: new ( 0 ) ,
11301184 }
@@ -1148,13 +1202,19 @@ impl<D: Deps> CurrentDepGraph<D> {
11481202 edges : EdgesVec ,
11491203 current_fingerprint : Fingerprint ,
11501204 ) -> DepNodeIndex {
1151- let dep_node_index = self
1152- . new_node_to_index
1153- . get_or_insert_with ( key, || self . encoder . send ( key, current_fingerprint, edges) ) ;
1205+ let dep_node_index = self . encoder . send ( key, current_fingerprint, edges) ;
11541206
11551207 #[ cfg( debug_assertions) ]
11561208 self . record_edge ( dep_node_index, key, current_fingerprint) ;
11571209
1210+ if let Some ( ref nodes_newly_allocated_in_current_session) =
1211+ self . nodes_newly_allocated_in_current_session
1212+ {
1213+ if !nodes_newly_allocated_in_current_session. lock ( ) . insert ( key) {
1214+ panic ! ( "Found duplicate dep-node {key:?}" ) ;
1215+ }
1216+ }
1217+
11581218 dep_node_index
11591219 }
11601220
@@ -1248,7 +1308,10 @@ impl<D: Deps> CurrentDepGraph<D> {
12481308 ) {
12491309 let node = & prev_graph. index_to_node ( prev_index) ;
12501310 debug_assert ! (
1251- !self . new_node_to_index. get( node) . is_some( ) ,
1311+ !self
1312+ . nodes_newly_allocated_in_current_session
1313+ . as_ref( )
1314+ . map_or( false , |set| set. lock( ) . contains( node) ) ,
12521315 "node from previous graph present in new node collection"
12531316 ) ;
12541317 }
@@ -1370,16 +1433,6 @@ fn panic_on_forbidden_read<D: Deps>(data: &DepGraphData<D>, dep_node_index: DepN
13701433 }
13711434 }
13721435
1373- if dep_node. is_none ( ) {
1374- // Try to find it among the new nodes
1375- for shard in data. current . new_node_to_index . lock_shards ( ) {
1376- if let Some ( ( node, _) ) = shard. iter ( ) . find ( |( _, index) | * index == dep_node_index) {
1377- dep_node = Some ( * node) ;
1378- break ;
1379- }
1380- }
1381- }
1382-
13831436 let dep_node = dep_node. map_or_else (
13841437 || format ! ( "with index {:?}" , dep_node_index) ,
13851438 |dep_node| format ! ( "`{:?}`" , dep_node) ,
0 commit comments