fixed bug in test_decoders passing an extra kwarg for sdpa

JRosenkranz · JRosenkranz · commit 55ca17ef74e6 · 2025-09-16T14:03:10.000Z
Signed-off-by: Joshua Rosenkranz &lt;jmrosenk@us.ibm.com&gt;
diff --git a/tests/models/test_decoders.py b/tests/models/test_decoders.py
@@ -185,6 +185,7 @@
         ]
     )
     os.environ["VLLM_DT_MAX_BATCH_SIZE"] = str(max(max(common_batch_sizes), 2))
+    fx_config.backed_size_oblivious = True
 
 # thresholds are chosen based on 1024 tokens per sequence
 # 1% error threshold rate between cpu fp32 and cuda fp16
@@ -402,7 +403,6 @@ def get_or_create(self, is_gptq, is_fp8, **kwargs):
             self.__maybe_prepare_fp8_weights(model, is_fp8)
 
             model.eval()
-            fx_config.backed_size_oblivious = compile_dynamic_sendnn
             model.compile(
                 backend="sendnn", options={"sendnn.dynamic": compile_dynamic_sendnn}
             )
@@ -632,7 +632,8 @@ def _metric_calculator(r: torch.Tensor, t: torch.Tensor):
                 )
                 extra_kwargs["attn_name"] = ATTN_NAME
                 if (
-                    "ibm-granite/granite-3.3-8b-instruct" in model_path
+                    "paged" in ATTN_NAME
+                    and "ibm-granite/granite-3.3-8b-instruct" in model_path
                     and USE_DISTRIBUTED
                     and dist.get_world_size() == 4
                 ):

Original file line number	Diff line number	Diff line change
`@@ -185,6 +185,7 @@`
`185`	`185`	`]`
`186`	`186`	`)`
`187`	`187`	`os.environ["VLLM_DT_MAX_BATCH_SIZE"] = str(max(max(common_batch_sizes), 2))`
	`188`	`+ fx_config.backed_size_oblivious = True`
`188`	`189`
`189`	`190`	`# thresholds are chosen based on 1024 tokens per sequence`
`190`	`191`	`# 1% error threshold rate between cpu fp32 and cuda fp16`
`@@ -402,7 +403,6 @@ def get_or_create(self, is_gptq, is_fp8, **kwargs):`
`402`	`403`	`self.__maybe_prepare_fp8_weights(model, is_fp8)`
`403`	`404`
`404`	`405`	`model.eval()`
`405`		`- fx_config.backed_size_oblivious = compile_dynamic_sendnn`
`406`	`406`	`model.compile(`
`407`	`407`	`backend="sendnn", options={"sendnn.dynamic": compile_dynamic_sendnn}`
`408`	`408`	`)`
`@@ -632,7 +632,8 @@ def _metric_calculator(r: torch.Tensor, t: torch.Tensor):`
`632`	`632`	`)`
`633`	`633`	`extra_kwargs["attn_name"] = ATTN_NAME`
`634`	`634`	`if (`
`635`		`- "ibm-granite/granite-3.3-8b-instruct" in model_path`
	`635`	`+ "paged" in ATTN_NAME`
	`636`	`+ and "ibm-granite/granite-3.3-8b-instruct" in model_path`
`636`	`637`	`and USE_DISTRIBUTED`
`637`	`638`	`and dist.get_world_size() == 4`
`638`	`639`	`):`