[#9023][feat] reduce AD graph optimization time for non-participating passes (#9024)

nzmora-nvidia · web-flow · commit 53491ffdb14f · 2025-11-12T09:05:53.000-08:00
Shorten AD graph optimization by 30% (measured on Nemotron-6):

A bug in the transformation interface marked all passes as not clean, regardless of what was reported by the transformation
Fix how the optimization passes report the results of their actions. Many passes report that the graph is not clean even when they didn't participate in the optimization. Each graph cleaning invocation can take several seconds.

Signed-off-by: Neta Zmora &lt;96238833+nzmora-nvidia@users.noreply.github.com&gt;
diff --git a/tensorrt_llm/_torch/auto_deploy/transform/interface.py b/tensorrt_llm/_torch/auto_deploy/transform/interface.py
@@ -410,14 +410,14 @@ def _apply_per_gm_or_whole_model(
             return self._apply_to_full_model(mod, cm, factory, shared_config)
 
         # just run it on first graph module we are encountering for now...
-        info = TransformInfo()
+        info = None
         for k, graph_sub in named_graphmodules(mod):
             graph_sub, info_apply = self._apply(graph_sub, cm, factory, shared_config)
             if k == "":
                 mod = graph_sub
             else:
                 mod.set_submodule(k, graph_sub)
-            info = info & info_apply
+            info = info & info_apply if info is not None else info_apply
         return mod, info
 
     @final
diff --git a/tensorrt_llm/_torch/auto_deploy/transform/library/attention.py b/tensorrt_llm/_torch/auto_deploy/transform/library/attention.py
@@ -304,8 +304,8 @@ def register_repeat_kv(patterns: ADPatternMatcherPass):
         info = TransformInfo(
             skipped=False,
             num_matches=num_kv_patterns,
-            is_clean=False,
-            has_valid_shapes=False,
+            is_clean=num_kv_patterns == 0,
+            has_valid_shapes=num_kv_patterns == 0,
         )
 
         return gm, info
@@ -333,8 +333,8 @@ def register_eager_attention(patterns: ADPatternMatcherPass):
         info = TransformInfo(
             skipped=False,
             num_matches=num_eager_patterns,
-            is_clean=False,
-            has_valid_shapes=False,
+            is_clean=num_eager_patterns == 0,
+            has_valid_shapes=num_eager_patterns == 0,
         )
 
         return gm, info
@@ -647,8 +647,8 @@ def register_sdpa_to_torch_attention(patterns: ADPatternMatcherPass):
         info = TransformInfo(
             skipped=False,
             num_matches=num_patterns,
-            is_clean=False,
-            has_valid_shapes=False,
+            is_clean=num_patterns == 0,
+            has_valid_shapes=num_patterns == 0,
         )
         return gm, info
 
@@ -685,8 +685,8 @@ def register_repeat_kv_with_torch_attention(patterns: ADPatternMatcherPass):
         info = TransformInfo(
             skipped=False,
             num_matches=num_patterns,
-            is_clean=False,
-            has_valid_shapes=False,
+            is_clean=num_patterns == 0,
+            has_valid_shapes=num_patterns == 0,
         )
         return gm, info
 
@@ -870,7 +870,7 @@ def _apply(
         info = TransformInfo(
             skipped=False,
             num_matches=num_matches,
-            is_clean=False,
-            has_valid_shapes=False,
+            is_clean=num_matches == 0,
+            has_valid_shapes=num_matches == 0,
         )
         return gm, info
diff --git a/tensorrt_llm/_torch/auto_deploy/transform/library/cleanup_input_constraints.py b/tensorrt_llm/_torch/auto_deploy/transform/library/cleanup_input_constraints.py
@@ -52,6 +52,11 @@ def _apply(
             object.__setattr__(vr, "upper", max_total)
 
         # store info object about the transform
-        info = TransformInfo(skipped=False, num_matches=len(vrs))
+        info = TransformInfo(
+            skipped=False,
+            num_matches=len(vrs),
+            is_clean=len(vrs) == 0,
+            has_valid_shapes=len(vrs) == 0,
+        )
 
         return gm, info
diff --git a/tensorrt_llm/_torch/auto_deploy/transform/library/cleanup_noop_add.py b/tensorrt_llm/_torch/auto_deploy/transform/library/cleanup_noop_add.py
@@ -51,6 +51,11 @@ def _apply(
             num_matches += 1
 
         # store info object about the transform
-        info = TransformInfo(skipped=False, num_matches=num_matches)
+        info = TransformInfo(
+            skipped=False,
+            num_matches=num_matches,
+            is_clean=num_matches == 0,
+            has_valid_shapes=num_matches == 0,
+        )
 
         return gm, info
diff --git a/tensorrt_llm/_torch/auto_deploy/transform/library/cleanup_noop_slice.py b/tensorrt_llm/_torch/auto_deploy/transform/library/cleanup_noop_slice.py
@@ -48,6 +48,11 @@ def _apply(
             num_matches += 1
 
         # store info object about the transform
-        info = TransformInfo(skipped=False, num_matches=num_matches)
+        info = TransformInfo(
+            skipped=False,
+            num_matches=num_matches,
+            is_clean=num_matches == 0,
+            has_valid_shapes=num_matches == 0,
+        )
 
         return gm, info
diff --git a/tensorrt_llm/_torch/auto_deploy/transform/library/collectives.py b/tensorrt_llm/_torch/auto_deploy/transform/library/collectives.py
@@ -112,6 +112,9 @@ def _apply(
         num_matches = patterns.apply(gm.graph)
 
         info = TransformInfo(
-            skipped=False, num_matches=num_matches, is_clean=False, has_valid_shapes=False
+            skipped=False,
+            num_matches=num_matches,
+            is_clean=num_matches == 0,
+            has_valid_shapes=num_matches == 0,
         )
         return gm, info
diff --git a/tensorrt_llm/_torch/auto_deploy/transform/library/eliminate_redundant_transposes.py b/tensorrt_llm/_torch/auto_deploy/transform/library/eliminate_redundant_transposes.py
@@ -117,8 +117,8 @@ def _apply(
         info = TransformInfo(
             skipped=False,
             num_matches=len(nodes_to_eliminate),
-            is_clean=False,
-            has_valid_shapes=False,
+            is_clean=len(nodes_to_eliminate) == 0,
+            has_valid_shapes=len(nodes_to_eliminate) == 0,
         )
 
         return gm, info
diff --git a/tensorrt_llm/_torch/auto_deploy/transform/library/fuse_quant.py b/tensorrt_llm/_torch/auto_deploy/transform/library/fuse_quant.py
@@ -291,8 +291,8 @@ def _apply(
         info = TransformInfo(
             skipped=(cnt == 0),
             num_matches=cnt,
-            is_clean=False,
-            has_valid_shapes=False,
+            is_clean=cnt == 0,
+            has_valid_shapes=cnt == 0,
         )
         return gm, info
 
@@ -333,7 +333,7 @@ def _apply(
         info = TransformInfo(
             skipped=(cnt == 0),
             num_matches=cnt,
-            is_clean=False,
-            has_valid_shapes=False,
+            is_clean=(cnt == 0),
+            has_valid_shapes=(cnt == 0),
         )
         return gm, info
diff --git a/tensorrt_llm/_torch/auto_deploy/transform/library/fused_moe.py b/tensorrt_llm/_torch/auto_deploy/transform/library/fused_moe.py
@@ -510,7 +510,10 @@ def _apply(
             num_moe_patterns += 1
 
         info = TransformInfo(
-            skipped=False, num_matches=num_moe_patterns, is_clean=False, has_valid_shapes=False
+            skipped=False,
+            num_matches=num_moe_patterns,
+            is_clean=num_moe_patterns == 0,
+            has_valid_shapes=num_moe_patterns == 0,
         )
         return gm, info
 
@@ -754,7 +757,10 @@ def _apply(
             fused_key_counter = _insert_fused_moe_ops(gm)
 
         info = TransformInfo(
-            skipped=False, num_matches=fused_key_counter, is_clean=False, has_valid_shapes=False
+            skipped=False,
+            num_matches=fused_key_counter,
+            is_clean=fused_key_counter == 0,
+            has_valid_shapes=fused_key_counter == 0,
         )
         return gm, info
 
@@ -779,7 +785,7 @@ def _apply(
         info = TransformInfo(
             skipped=(fused_key_counter == 0),
             num_matches=fused_key_counter,
-            is_clean=False,
-            has_valid_shapes=False,
+            is_clean=fused_key_counter == 0,
+            has_valid_shapes=fused_key_counter == 0,
         )
         return gm, info
diff --git a/tensorrt_llm/_torch/auto_deploy/transform/library/fusion.py b/tensorrt_llm/_torch/auto_deploy/transform/library/fusion.py
@@ -215,7 +215,10 @@ def _apply_fusion_pass(
 
         torch.cuda.empty_cache()
         return gm, TransformInfo(
-            skipped=False, num_matches=num_matches, is_clean=False, has_valid_shapes=False
+            skipped=False,
+            num_matches=num_matches,
+            is_clean=num_matches == 0,
+            has_valid_shapes=num_matches == 0,
         )
 
 
@@ -252,7 +255,10 @@ def _apply(
         torch.cuda.empty_cache()
 
         info = TransformInfo(
-            skipped=False, num_matches=num_matches, is_clean=False, has_valid_shapes=False
+            skipped=False,
+            num_matches=num_matches,
+            is_clean=num_matches == 0,
+            has_valid_shapes=num_matches == 0,
         )
         return gm, info
 
diff --git a/tensorrt_llm/_torch/auto_deploy/transform/library/mxfp4_moe.py b/tensorrt_llm/_torch/auto_deploy/transform/library/mxfp4_moe.py
@@ -116,8 +116,8 @@ def _apply(
         info = TransformInfo(
             skipped=False,
             num_matches=num_matches,
-            is_clean=False,
-            has_valid_shapes=False,
+            is_clean=num_matches == 0,
+            has_valid_shapes=num_matches == 0,
         )
         return gm, info
 
@@ -314,7 +314,7 @@ def _apply(
         info = TransformInfo(
             skipped=(num_matches == 0),
             num_matches=num_matches,
-            is_clean=False,
-            has_valid_shapes=True,
+            is_clean=num_matches == 0,
+            has_valid_shapes=num_matches == 0,
         )
         return gm, info
diff --git a/tensorrt_llm/_torch/auto_deploy/transform/library/quantization.py b/tensorrt_llm/_torch/auto_deploy/transform/library/quantization.py
@@ -550,7 +550,7 @@ def _apply(
                 cnt += 1
 
         return gm, TransformInfo(
-            skipped=False, num_matches=cnt, is_clean=False, has_valid_shapes=True
+            skipped=False, num_matches=cnt, is_clean=cnt == 0, has_valid_shapes=True
         )
 
 
@@ -581,7 +581,7 @@ def _apply(
 
         remove_output_quantizers(gm)
         return gm, TransformInfo(
-            skipped=False, num_matches=cnt, is_clean=False, has_valid_shapes=True
+            skipped=False, num_matches=cnt, is_clean=cnt == 0, has_valid_shapes=True
         )
 
 
@@ -610,7 +610,8 @@ def _apply(
                 self._insert_quantized_linear(gm, n, is_quantized_graph=True)
                 cnt += 1
 
+        # if cnt > 0:
         remove_output_quantizers(gm)
         return gm, TransformInfo(
-            skipped=False, num_matches=cnt, is_clean=False, has_valid_shapes=True
+            skipped=False, num_matches=cnt, is_clean=cnt == 0, has_valid_shapes=True
         )
diff --git a/tensorrt_llm/_torch/auto_deploy/transform/library/rms_norm.py b/tensorrt_llm/_torch/auto_deploy/transform/library/rms_norm.py
@@ -169,7 +169,9 @@ def dummy_args(input_dtype: torch.dtype, weight_dtype: torch.dtype, eps: float =
 
         cnt = patterns.apply(graph)
 
-        info = TransformInfo(skipped=False, num_matches=cnt, is_clean=False, has_valid_shapes=False)
+        info = TransformInfo(
+            skipped=False, num_matches=cnt, is_clean=cnt == 0, has_valid_shapes=cnt == 0
+        )
 
         return gm, info
 
@@ -256,7 +258,7 @@ def make_dummy_args(group_size: int, eps: float) -> list:
         info = TransformInfo(
             skipped=False,
             num_matches=num,
-            is_clean=False,
-            has_valid_shapes=False,
+            is_clean=num == 0,
+            has_valid_shapes=num == 0,
         )
         return gm, info
diff --git a/tensorrt_llm/_torch/auto_deploy/transform/library/rope.py b/tensorrt_llm/_torch/auto_deploy/transform/library/rope.py
@@ -219,7 +219,10 @@ def _apply(
         num_matches = patterns.apply(graph)
 
         info = TransformInfo(
-            skipped=False, num_matches=num_matches, is_clean=False, has_valid_shapes=False
+            skipped=False,
+            num_matches=num_matches,
+            is_clean=num_matches == 0,
+            has_valid_shapes=num_matches == 0,
         )
 
         return gm, info
@@ -349,8 +352,8 @@ def _apply(
         info = TransformInfo(
             skipped=False,
             num_matches=num_rope_layout_matches,
-            is_clean=False,
-            has_valid_shapes=False,
+            is_clean=num_rope_layout_matches == 0,
+            has_valid_shapes=num_rope_layout_matches == 0,
         )
 
         return gm, info
@@ -386,10 +389,13 @@ def _apply(
                 continue
             num_rope_optimizations += 1
 
+        skipped = num_rope_optimizations == 0
         info = TransformInfo(
-            skipped=False, num_matches=num_rope_optimizations, is_clean=False, has_valid_shapes=True
+            skipped=skipped,
+            num_matches=num_rope_optimizations,
+            is_clean=skipped,
+            has_valid_shapes=True,
         )
-
         return gm, info
 
 
diff --git a/tensorrt_llm/_torch/auto_deploy/transform/library/sharding.py b/tensorrt_llm/_torch/auto_deploy/transform/library/sharding.py
@@ -105,7 +105,10 @@ def check_and_apply(transform: ShardingTransformInfo) -> bool:
                 ad_logger.warning(f"Invalid parameter update transformation {update_transform}.")
 
         info = TransformInfo(
-            skipped=False, num_matches=num_matches, is_clean=False, has_valid_shapes=False
+            skipped=False,
+            num_matches=num_matches,
+            is_clean=num_matches == 0,
+            has_valid_shapes=num_matches == 0,
         )
         # exit()
         return gm, info