11use std:: assert_matches:: assert_matches;
2+ use std:: collections:: hash_map:: Entry ;
23use std:: fmt:: Debug ;
34use std:: hash:: Hash ;
45use std:: marker:: PhantomData ;
@@ -7,15 +8,16 @@ use std::sync::atomic::{AtomicU32, Ordering};
78
89use rustc_data_structures:: fingerprint:: { Fingerprint , PackedFingerprint } ;
910use rustc_data_structures:: fx:: { FxHashMap , FxHashSet } ;
10- use rustc_data_structures:: profiling:: { QueryInvocationId , SelfProfilerRef } ;
11- use rustc_data_structures:: sharded:: { self , ShardedHashMap } ;
11+ use rustc_data_structures:: profiling:: QueryInvocationId ;
12+ use rustc_data_structures:: sharded:: { self , Sharded } ;
1213use rustc_data_structures:: stable_hasher:: { HashStable , StableHasher } ;
1314use rustc_data_structures:: sync:: { AtomicU64 , Lock } ;
1415use rustc_data_structures:: unord:: UnordMap ;
1516use rustc_errors:: DiagInner ;
1617use rustc_index:: IndexVec ;
1718use rustc_macros:: { Decodable , Encodable } ;
1819use rustc_serialize:: opaque:: { FileEncodeResult , FileEncoder } ;
20+ use rustc_session:: Session ;
1921use tracing:: { debug, instrument} ;
2022#[ cfg( debug_assertions) ]
2123use { super :: debug:: EdgeFilter , std:: env} ;
@@ -117,7 +119,7 @@ where
117119
118120impl < D : Deps > DepGraph < D > {
119121 pub fn new (
120- profiler : & SelfProfilerRef ,
122+ session : & Session ,
121123 prev_graph : Arc < SerializedDepGraph > ,
122124 prev_work_products : WorkProductMap ,
123125 encoder : FileEncoder ,
@@ -127,7 +129,7 @@ impl<D: Deps> DepGraph<D> {
127129 let prev_graph_node_count = prev_graph. node_count ( ) ;
128130
129131 let current = CurrentDepGraph :: new (
130- profiler ,
132+ session ,
131133 prev_graph_node_count,
132134 encoder,
133135 record_graph,
@@ -351,12 +353,13 @@ impl<D: Deps> DepGraphData<D> {
351353 // in `DepGraph::try_mark_green()`.
352354 // 2. Two distinct query keys get mapped to the same `DepNode`
353355 // (see for example #48923).
354- assert ! (
355- ! self . dep_node_exists ( & key ) ,
356- "forcing query with already existing `DepNode`\n \
356+ self . assert_dep_node_not_yet_allocated_in_current_session ( & key , || {
357+ format ! (
358+ "forcing query with already existing `DepNode`\n \
357359 - query-key: {arg:?}\n \
358360 - dep-node: {key:?}"
359- ) ;
361+ )
362+ } ) ;
360363
361364 let with_deps = |task_deps| D :: with_deps ( task_deps, || task ( cx, arg) ) ;
362365 let ( result, edges) = if cx. dep_context ( ) . is_eval_always ( key. kind ) {
@@ -436,7 +439,31 @@ impl<D: Deps> DepGraphData<D> {
436439 hash : self . current . anon_id_seed . combine ( hasher. finish ( ) ) . into ( ) ,
437440 } ;
438441
439- self . current . intern_new_node ( target_dep_node, task_deps, Fingerprint :: ZERO )
442+ // The DepNodes generated by the process above are not unique. 2 queries could
443+ // have exactly the same dependencies. However, deserialization does not handle
444+ // duplicated nodes, so we do the deduplication here directly.
445+ //
446+ // As anonymous nodes are a small quantity compared to the full dep-graph, the
447+ // memory impact of this `anon_node_to_index` map remains tolerable, and helps
448+ // us avoid useless growth of the graph with almost-equivalent nodes.
449+ match self
450+ . current
451+ . anon_node_to_index
452+ . get_shard_by_value ( & target_dep_node)
453+ . lock ( )
454+ . entry ( target_dep_node)
455+ {
456+ Entry :: Occupied ( entry) => * entry. get ( ) ,
457+ Entry :: Vacant ( entry) => {
458+ let dep_node_index = self . current . intern_new_node (
459+ target_dep_node,
460+ task_deps,
461+ Fingerprint :: ZERO ,
462+ ) ;
463+ entry. insert ( dep_node_index) ;
464+ dep_node_index
465+ }
466+ }
440467 }
441468 } ;
442469
@@ -637,20 +664,22 @@ impl<D: Deps> DepGraph<D> {
637664}
638665
639666impl < D : Deps > DepGraphData < D > {
640- #[ inline]
641- fn dep_node_index_of_opt ( & self , dep_node : & DepNode ) -> Option < DepNodeIndex > {
667+ fn assert_dep_node_not_yet_allocated_in_current_session < S : std:: fmt:: Display > (
668+ & self ,
669+ dep_node : & DepNode ,
670+ msg : impl FnOnce ( ) -> S ,
671+ ) {
642672 if let Some ( prev_index) = self . previous . node_to_index_opt ( dep_node) {
643- self . current . prev_index_to_index . lock ( ) [ prev_index]
644- } else {
645- self . current . new_node_to_index . get ( dep_node)
673+ let current = self . current . prev_index_to_index . lock ( ) [ prev_index] ;
674+ assert ! ( current. is_none( ) , "{}" , msg( ) )
675+ } else if let Some ( nodes_newly_allocated_in_current_session) =
676+ & self . current . nodes_newly_allocated_in_current_session
677+ {
678+ let seen = nodes_newly_allocated_in_current_session. lock ( ) . contains ( dep_node) ;
679+ assert ! ( !seen, "{}" , msg( ) ) ;
646680 }
647681 }
648682
649- #[ inline]
650- fn dep_node_exists ( & self , dep_node : & DepNode ) -> bool {
651- self . dep_node_index_of_opt ( dep_node) . is_some ( )
652- }
653-
654683 fn node_color ( & self , dep_node : & DepNode ) -> Option < DepNodeColor > {
655684 if let Some ( prev_index) = self . previous . node_to_index_opt ( dep_node) {
656685 self . colors . get ( prev_index)
@@ -734,11 +763,6 @@ impl<D: Deps> DepGraphData<D> {
734763}
735764
736765impl < D : Deps > DepGraph < D > {
737- #[ inline]
738- pub fn dep_node_exists ( & self , dep_node : & DepNode ) -> bool {
739- self . data . as_ref ( ) . is_some_and ( |data| data. dep_node_exists ( dep_node) )
740- }
741-
742766 /// Checks whether a previous work product exists for `v` and, if
743767 /// so, return the path that leads to it. Used to skip doing work.
744768 pub fn previous_work_product ( & self , v : & WorkProductId ) -> Option < WorkProduct > {
@@ -964,6 +988,16 @@ impl<D: Deps> DepGraph<D> {
964988 self . node_color ( dep_node) . is_some_and ( |c| c. is_green ( ) )
965989 }
966990
991+ pub fn assert_dep_node_not_yet_allocated_in_current_session < S : std:: fmt:: Display > (
992+ & self ,
993+ dep_node : & DepNode ,
994+ msg : impl FnOnce ( ) -> S ,
995+ ) {
996+ if let Some ( data) = & self . data {
997+ data. assert_dep_node_not_yet_allocated_in_current_session ( dep_node, msg)
998+ }
999+ }
1000+
9671001 /// This method loads all on-disk cacheable query results into memory, so
9681002 /// they can be written out to the new cache file again. Most query results
9691003 /// will already be in memory but in the case where we marked something as
@@ -1069,24 +1103,24 @@ rustc_index::newtype_index! {
10691103/// largest in the compiler.
10701104///
10711105/// For this reason, we avoid storing `DepNode`s more than once as map
1072- /// keys. The `new_node_to_index ` map only contains nodes not in the previous
1106+ /// keys. The `anon_node_to_index ` map only contains nodes of anonymous queries not in the previous
10731107/// graph, and we map nodes in the previous graph to indices via a two-step
10741108/// mapping. `SerializedDepGraph` maps from `DepNode` to `SerializedDepNodeIndex`,
10751109/// and the `prev_index_to_index` vector (which is more compact and faster than
10761110/// using a map) maps from `SerializedDepNodeIndex` to `DepNodeIndex`.
10771111///
1078- /// This struct uses three locks internally. The `data`, `new_node_to_index `,
1112+ /// This struct uses three locks internally. The `data`, `anon_node_to_index `,
10791113/// and `prev_index_to_index` fields are locked separately. Operations that take
10801114/// a `DepNodeIndex` typically just access the `data` field.
10811115///
10821116/// We only need to manipulate at most two locks simultaneously:
1083- /// `new_node_to_index ` and `data`, or `prev_index_to_index` and `data`. When
1084- /// manipulating both, we acquire `new_node_to_index ` or `prev_index_to_index`
1117+ /// `anon_node_to_index ` and `data`, or `prev_index_to_index` and `data`. When
1118+ /// manipulating both, we acquire `anon_node_to_index ` or `prev_index_to_index`
10851119/// first, and `data` second.
10861120pub ( super ) struct CurrentDepGraph < D : Deps > {
10871121 encoder : GraphEncoder < D > ,
1088- new_node_to_index : ShardedHashMap < DepNode , DepNodeIndex > ,
10891122 prev_index_to_index : Lock < IndexVec < SerializedDepNodeIndex , Option < DepNodeIndex > > > ,
1123+ anon_node_to_index : Sharded < FxHashMap < DepNode , DepNodeIndex > > ,
10901124
10911125 /// This is used to verify that fingerprints do not change between the creation of a node
10921126 /// and its recomputation.
@@ -1098,6 +1132,13 @@ pub(super) struct CurrentDepGraph<D: Deps> {
10981132 #[ cfg( debug_assertions) ]
10991133 forbidden_edge : Option < EdgeFilter > ,
11001134
1135+ /// Used to verify the absence of hash collisions among DepNodes.
1136+ /// This field is only `Some` if the `-Z incremental_verify_ich` option is present.
1137+ ///
1138+ /// The map contains all DepNodes that have been allocated in the current session so far and
1139+ /// for which there is no equivalent in the previous session.
1140+ nodes_newly_allocated_in_current_session : Option < Lock < FxHashSet < DepNode > > > ,
1141+
11011142 /// Anonymous `DepNode`s are nodes whose IDs we compute from the list of
11021143 /// their edges. This has the beneficial side-effect that multiple anonymous
11031144 /// nodes can be coalesced into one without changing the semantics of the
@@ -1119,7 +1160,7 @@ pub(super) struct CurrentDepGraph<D: Deps> {
11191160
11201161impl < D : Deps > CurrentDepGraph < D > {
11211162 fn new (
1122- profiler : & SelfProfilerRef ,
1163+ session : & Session ,
11231164 prev_graph_node_count : usize ,
11241165 encoder : FileEncoder ,
11251166 record_graph : bool ,
@@ -1151,18 +1192,31 @@ impl<D: Deps> CurrentDepGraph<D> {
11511192 prev_graph_node_count,
11521193 record_graph,
11531194 record_stats,
1154- profiler ,
1195+ & session . prof ,
11551196 previous,
11561197 ) ,
1157- new_node_to_index : ShardedHashMap :: with_capacity (
1158- new_node_count_estimate / sharded:: shards ( ) ,
1159- ) ,
1198+ anon_node_to_index : Sharded :: new ( || {
1199+ FxHashMap :: with_capacity_and_hasher (
1200+ new_node_count_estimate / sharded:: shards ( ) ,
1201+ Default :: default ( ) ,
1202+ )
1203+ } ) ,
11601204 prev_index_to_index : Lock :: new ( IndexVec :: from_elem_n ( None , prev_graph_node_count) ) ,
11611205 anon_id_seed,
11621206 #[ cfg( debug_assertions) ]
11631207 forbidden_edge,
11641208 #[ cfg( debug_assertions) ]
11651209 fingerprints : Lock :: new ( IndexVec :: from_elem_n ( None , new_node_count_estimate) ) ,
1210+ nodes_newly_allocated_in_current_session : session
1211+ . opts
1212+ . unstable_opts
1213+ . incremental_verify_ich
1214+ . then ( || {
1215+ Lock :: new ( FxHashSet :: with_capacity_and_hasher (
1216+ new_node_count_estimate,
1217+ Default :: default ( ) ,
1218+ ) )
1219+ } ) ,
11661220 total_read_count : AtomicU64 :: new ( 0 ) ,
11671221 total_duplicate_read_count : AtomicU64 :: new ( 0 ) ,
11681222 }
@@ -1186,13 +1240,19 @@ impl<D: Deps> CurrentDepGraph<D> {
11861240 edges : EdgesVec ,
11871241 current_fingerprint : Fingerprint ,
11881242 ) -> DepNodeIndex {
1189- let dep_node_index = self
1190- . new_node_to_index
1191- . get_or_insert_with ( key, || self . encoder . send ( key, current_fingerprint, edges) ) ;
1243+ let dep_node_index = self . encoder . send ( key, current_fingerprint, edges) ;
11921244
11931245 #[ cfg( debug_assertions) ]
11941246 self . record_edge ( dep_node_index, key, current_fingerprint) ;
11951247
1248+ if let Some ( ref nodes_newly_allocated_in_current_session) =
1249+ self . nodes_newly_allocated_in_current_session
1250+ {
1251+ if !nodes_newly_allocated_in_current_session. lock ( ) . insert ( key) {
1252+ panic ! ( "Found duplicate dep-node {key:?}" ) ;
1253+ }
1254+ }
1255+
11961256 dep_node_index
11971257 }
11981258
@@ -1286,7 +1346,10 @@ impl<D: Deps> CurrentDepGraph<D> {
12861346 ) {
12871347 let node = & prev_graph. index_to_node ( prev_index) ;
12881348 debug_assert ! (
1289- !self . new_node_to_index. get( node) . is_some( ) ,
1349+ !self
1350+ . nodes_newly_allocated_in_current_session
1351+ . as_ref( )
1352+ . map_or( false , |set| set. lock( ) . contains( node) ) ,
12901353 "node from previous graph present in new node collection"
12911354 ) ;
12921355 }
@@ -1408,16 +1471,6 @@ fn panic_on_forbidden_read<D: Deps>(data: &DepGraphData<D>, dep_node_index: DepN
14081471 }
14091472 }
14101473
1411- if dep_node. is_none ( ) {
1412- // Try to find it among the new nodes
1413- for shard in data. current . new_node_to_index . lock_shards ( ) {
1414- if let Some ( ( node, _) ) = shard. iter ( ) . find ( |( _, index) | * index == dep_node_index) {
1415- dep_node = Some ( * node) ;
1416- break ;
1417- }
1418- }
1419- }
1420-
14211474 let dep_node = dep_node. map_or_else (
14221475 || format ! ( "with index {:?}" , dep_node_index) ,
14231476 |dep_node| format ! ( "`{:?}`" , dep_node) ,
0 commit comments