add back attn head

kylesayrs · kylesayrs · commit 274b5a299567 · 2025-10-14T00:12:11.000-04:00
Signed-off-by: Kyle Sayers &lt;kylesayrs@gmail.com&gt;
diff --git a/tests/test_quantization/lifecycle/test_static_lifecycle.py b/tests/test_quantization/lifecycle/test_static_lifecycle.py
@@ -309,8 +309,6 @@ class MockAttention(torch.nn.Module):
         # static token is not supported
         # channel is not supported
         # group is not supported
-        # tensor group is not supported
-        # block is not supported
         (
             QuantizationArgs(
                 num_bits=4,
@@ -340,6 +338,34 @@ class MockAttention(torch.nn.Module):
             ),
             0.55,
         ),
+        # block is not supported
+        (
+            QuantizationArgs(
+                num_bits=4,
+                type="int",
+                symmetric=True,
+                strategy="attn_head",
+            ),
+            torch.tensor([[[0.0]], [[12.0]]]),
+            torch.tensor([[[11.0]], [[23.0]]]),
+            torch.tensor(
+                [
+                    [
+                        [
+                            [0.0000, 1.4688, 1.4688, 2.9375],
+                            [4.4062, 4.4062, 5.8750, 7.3438],
+                            [7.3438, 8.8125, 10.2500, 10.2500],
+                        ],
+                        [
+                            [12.2500, 12.2500, 15.3125, 15.3125],
+                            [15.3125, 18.3750, 18.3750, 18.3750],
+                            [21.5000, 21.5000, 21.5000, 21.5000],
+                        ],
+                    ]
+                ]
+            ),
+            0.55,
+        ),
     ],
 )
 def test_static_attention_quantization(