@@ -339,13 +339,11 @@ impl<D: Deps> DepGraphData<D> {
339339 let (result, edges) = if cx.dep_context().is_eval_always(key.kind) {
340340 (with_deps(TaskDepsRef::EvalAlways), EdgesVec::new())
341341 } else {
342- let task_deps = Lock::new(TaskDeps {
342+ let task_deps = Lock::new(TaskDeps::new(
343343 #[cfg(debug_assertions)]
344- node: Some(key),
345- reads: EdgesVec::new(),
346- read_set: Default::default(),
347- phantom_data: PhantomData,
348- });
344+ Some(key),
345+ 0,
346+ ));
349347 (with_deps(TaskDepsRef::Allow(&task_deps)), task_deps.into_inner().reads)
350348 };
351349
@@ -377,12 +375,18 @@ impl<D: Deps> DepGraphData<D> {
377375 {
378376 debug_assert!(!cx.is_eval_always(dep_kind));
379377
380- let task_deps = Lock::new(TaskDeps::default());
378+ // Large numbers of reads are common enough here that pre-sizing `read_set`
379+ // to 128 actually helps perf on some benchmarks.
380+ let task_deps = Lock::new(TaskDeps::new(
381+ #[cfg(debug_assertions)]
382+ None,
383+ 128,
384+ ));
381385 let result = D::with_deps(TaskDepsRef::Allow(&task_deps), op);
382386 let task_deps = task_deps.into_inner();
383- let task_deps = task_deps.reads;
387+ let reads = task_deps.reads;
384388
385- let dep_node_index = match task_deps .len() {
389+ let dep_node_index = match reads .len() {
386390 0 => {
387391 // Because the dep-node id of anon nodes is computed from the sets of its
388392 // dependencies we already know what the ID of this dependency-less node is
@@ -393,7 +397,7 @@ impl<D: Deps> DepGraphData<D> {
393397 }
394398 1 => {
395399 // When there is only one dependency, don't bother creating a node.
396- task_deps [0]
400+ reads [0]
397401 }
398402 _ => {
399403 // The dep node indices are hashed here instead of hashing the dep nodes of the
@@ -402,7 +406,7 @@ impl<D: Deps> DepGraphData<D> {
402406 // combining it with the per session random number `anon_id_seed`. This hash only need
403407 // to map the dependencies to a single value on a per session basis.
404408 let mut hasher = StableHasher::new();
405- task_deps .hash(&mut hasher);
409+ reads .hash(&mut hasher);
406410
407411 let target_dep_node = DepNode {
408412 kind: dep_kind,
@@ -420,7 +424,7 @@ impl<D: Deps> DepGraphData<D> {
420424 // memory impact of this `anon_node_to_index` map remains tolerable, and helps
421425 // us avoid useless growth of the graph with almost-equivalent nodes.
422426 self.current.anon_node_to_index.get_or_insert_with(target_dep_node, || {
423- self.current.alloc_new_node(target_dep_node, task_deps , Fingerprint::ZERO)
427+ self.current.alloc_new_node(target_dep_node, reads , Fingerprint::ZERO)
424428 })
425429 }
426430 };
@@ -471,18 +475,17 @@ impl<D: Deps> DepGraph<D> {
471475 data.current.total_read_count.fetch_add(1, Ordering::Relaxed);
472476 }
473477
474- // As long as we only have a low number of reads we can avoid doing a hash
475- // insert and potentially allocating/reallocating the hashmap
476- let new_read = if task_deps.reads.len() < EdgesVec::INLINE_CAPACITY {
477- task_deps.reads.iter().all(|other| *other != dep_node_index)
478+ // Has `dep_node_index` been seen before? Use either a linear scan or a hashset
479+ // lookup to determine this. See `TaskDeps::read_set` for details.
480+ let new_read = if task_deps.reads.len() <= TaskDeps::LINEAR_SCAN_MAX {
481+ ! task_deps.reads.contains(& dep_node_index)
478482 } else {
479483 task_deps.read_set.insert(dep_node_index)
480484 };
481485 if new_read {
482486 task_deps.reads.push(dep_node_index);
483- if task_deps.reads.len() == EdgesVec::INLINE_CAPACITY {
484- // Fill `read_set` with what we have so far so we can use the hashset
485- // next time
487+ if task_deps.reads.len() == TaskDeps::LINEAR_SCAN_MAX + 1 {
488+ // Fill `read_set` with what we have so far. Future lookups will use it.
486489 task_deps.read_set.extend(task_deps.reads.iter().copied());
487490 }
488491
@@ -1292,18 +1295,30 @@ pub enum TaskDepsRef<'a> {
12921295pub struct TaskDeps {
12931296 #[cfg(debug_assertions)]
12941297 node: Option<DepNode>,
1298+
1299+ /// A vector of `DepNodeIndex`, basically.
12951300 reads: EdgesVec,
1301+
1302+ /// When adding new edges to `reads` in `DepGraph::read_index` we need to determine if the edge
1303+ /// has been seen before. If the number of elements in `reads` is small, we just do a linear
1304+ /// scan. If the number is higher, a hashset has better perf. This field is that hashset. It's
1305+ /// only used if the number of elements in `reads` exceeds `LINEAR_SCAN_MAX`.
12961306 read_set: FxHashSet<DepNodeIndex>,
1307+
12971308 phantom_data: PhantomData<DepNode>,
12981309}
12991310
1300- impl Default for TaskDeps {
1301- fn default() -> Self {
1302- Self {
1311+ impl TaskDeps {
1312+ /// See `TaskDeps::read_set` above.
1313+ const LINEAR_SCAN_MAX: usize = 16;
1314+
1315+ #[inline]
1316+ fn new(#[cfg(debug_assertions)] node: Option<DepNode>, read_set_capacity: usize) -> Self {
1317+ TaskDeps {
13031318 #[cfg(debug_assertions)]
1304- node: None ,
1319+ node,
13051320 reads: EdgesVec::new(),
1306- read_set: FxHashSet::with_capacity_and_hasher(128 , Default::default()),
1321+ read_set: FxHashSet::with_capacity_and_hasher(read_set_capacity , Default::default()),
13071322 phantom_data: PhantomData,
13081323 }
13091324 }
0 commit comments