Add Method to check if output has valid shape for perturbations per eval. (meta-pytorch#1666)

cyrjano · facebook-github-bot · commit 3804e56f0afd · 2025-11-13T20:00:37.000-08:00
Summary:

This diff adds a new method to the `captum.attr._core.feature_ablation` module to check if the output shape of the forward function scales correctly with the input batch size when perturbations are applied. The method takes in the inputs, the number of examples, the initial evaluation, the modified evaluation, and the number of perturbations per evaluation as arguments. It then validates that the output shape of the forward function scales correctly with the input batch size.

Differential Revision: D86976520
diff --git a/captum/attr/_core/feature_ablation.py b/captum/attr/_core/feature_ablation.py
@@ -90,8 +90,6 @@ def process_initial_eval(
     use_weights: bool = False,
 ) -> Tuple[List[Tensor], List[Tensor], Tensor, Tensor, int, dtype]:
 
-    initial_eval = _parse_forward_out(initial_eval)
-
     # number of elements in the output of forward_func
     n_outputs = initial_eval.numel()
 
@@ -153,6 +151,74 @@ def format_result(
     return _format_output(is_inputs_tuple, attrib)
 
 
+def check_output_shape_valid(
+    inputs: TensorOrTupleOfTensorsGeneric,
+    num_examples: int,
+    initial_eval: Tensor,
+    modified_eval: Tensor,
+    perturbations_per_eval: int,
+) -> None:
+    """
+    Validates that the forward function's output shape scales correctly with
+    input batch size when perturbations_per_eval > 1.
+
+    When multiple perturbations are evaluated simultaneously
+    (perturbations_per_eval > 1),
+    the forward function must return outputs whose first dimension grows proportionally
+    with the input batch size. This ensures the forward function is not aggregating
+    results across the batch, which would prevent correct attribution calculation.
+
+    Args:
+        inputs (Tensor or tuple[Tensor, ...]): Input tensors used for evaluation.
+                    The first dimension of inputs[0] is used to determine current
+                    batch size.
+        num_examples (int): The original number of examples (batch size) before
+                    expansion for perturbations.
+        initial_eval (Tensor): Output from forward function with original batch size
+                    (perturbations_per_eval = 1). Used as baseline for shape comparison.
+        modified_eval (Tensor): Output from forward function with expanded batch size
+                    (batch_size = num_examples * n_perturb).
+        perturbations_per_eval (int): Number of perturbations processed simultaneously.
+                    Validation only occurs when this value is greater than 1.
+
+    Raises:
+        AssertionError: If perturbations_per_eval > 1 and the output shape does not
+                    scale correctly. Specifically, if modified_eval.shape[0] is not
+                    equal to n_perturb * initial_eval.shape[0], where n_perturb is
+                    the ratio of current batch size to original batch size.
+    """
+
+    if perturbations_per_eval > 1:
+        # if perturbations_per_eval > 1, the output shape must grow with
+        # input and not be aggregated
+        current_batch_size = inputs[0].shape[0]
+
+        # number of perturbation, which is not the same as
+        # perturbations_per_eval when not enough features to perturb
+        n_perturb: int = current_batch_size // num_examples
+        mod_perturb: int = current_batch_size % num_examples
+        current_output_shape = modified_eval.shape
+
+        # use initial_eval as the forward of perturbations_per_eval = 1
+        initial_output_shape = initial_eval.shape
+
+        assert (
+            # check if the output is not a scalar
+            current_output_shape
+            and initial_output_shape
+            and mod_perturb == 0
+            # check if the output grow in same ratio, i.e., not agg
+            and current_output_shape[0] == n_perturb * initial_output_shape[0]
+        ), (
+            "When perturbations_per_eval > 1, forward_func's output "
+            "should be a tensor whose 1st dim grow with the input "
+            f"batch size: when input batch size is {num_examples}, "
+            f"the output shape is {initial_output_shape}; "
+            f"when input batch size is {current_batch_size}, "
+            f"the output shape is {current_output_shape}"
+        )
+
+
 class FeatureAblation(PerturbationAttribution):
     """
     A perturbation based approach to computing attribution, involving
@@ -395,7 +461,7 @@ def attribute(
         """
         # Keeps track whether original input is a tuple or not before
         # converting it into a tuple. We return the attribution as tuple in the
-        # end if the inputs where tuple.
+        # end if the inputs were a tuple.
         is_inputs_tuple = _is_tuple(inputs)
 
         formatted_inputs, baselines = _format_input_baseline(inputs, baselines)
@@ -443,7 +509,7 @@ def attribute(
                     "when using the attribute function, initial_eval should have "
                     f"non-Future type rather than {type(initial_eval)}"
                 )
-
+            initial_eval = _parse_forward_out(initial_eval)
             (
                 total_attrib,
                 weights,
@@ -581,26 +647,33 @@ def _attribute_with_cross_tensor_feature_masks(
                 current_target,
                 current_additional_args,
             )
+            modified_eval = _parse_forward_out(modified_eval)
 
             attr_progress.update()
 
             assert not isinstance(modified_eval, torch.Future), (
                 "when use_futures is True, modified_eval should have "
                 f"non-Future type rather than {type(modified_eval)}"
             )
-
+            # Just do the check once.
+            if not self._is_output_shape_valid:
+                check_output_shape_valid(
+                    inputs=current_inputs,
+                    num_examples=num_examples,
+                    initial_eval=initial_eval,
+                    modified_eval=modified_eval,
+                    perturbations_per_eval=perturbations_per_eval,
+                )
+                self._is_output_shape_valid = True
             total_attrib, weights = self._process_ablated_out_full(
-                modified_eval,
-                current_masks,
-                flattened_initial_eval,
-                initial_eval,
-                current_inputs,
-                n_outputs,
-                num_examples,
-                total_attrib,
-                weights,
-                attrib_type,
-                perturbations_per_eval,
+                modified_eval=modified_eval,
+                current_mask=current_masks,
+                flattened_initial_eval=flattened_initial_eval,
+                inputs=current_inputs,
+                n_outputs=n_outputs,
+                total_attrib=total_attrib,
+                weights=weights,
+                attrib_type=attrib_type,
             )
         return total_attrib, weights
 
@@ -705,6 +778,7 @@ def _initial_eval_to_processed_initial_eval_fut(
                     "initial_eval_to_processed_initial_eval_fut: "
                     "initial_eval should be a Tensor"
                 )
+            initial_eval_processed = _parse_forward_out(initial_eval_processed)
             result = process_initial_eval(
                 initial_eval_processed, formatted_inputs, use_weights=self.use_weights
             )
@@ -1039,6 +1113,7 @@ def _eval_fut_to_ablated_out_fut_cross_tensor(
                     "total_attrib, weights, initial_eval, "
                     "flattened_initial_eval, n_outputs, attrib_type "
                 )
+            modified_eval = _parse_forward_out(modified_eval)
             if not isinstance(modified_eval, Tensor):
                 raise AssertionError(
                     "_eval_fut_to_ablated_out_fut_cross_tensor: "
@@ -1052,13 +1127,21 @@ def _eval_fut_to_ablated_out_fut_cross_tensor(
                 n_outputs,
                 attrib_type,
             ) = initial_eval_tuple
+            # Just do the check once.
+            if not self._is_output_shape_valid:
+                check_output_shape_valid(
+                    inputs=current_inputs,
+                    num_examples=num_examples,
+                    initial_eval=initial_eval,
+                    modified_eval=modified_eval,
+                    perturbations_per_eval=perturbations_per_eval,
+                )
+                self._is_output_shape_valid = True
+
             total_attrib, weights = self._process_ablated_out_full(
                 modified_eval=modified_eval,
                 inputs=current_inputs,
                 current_mask=current_mask,
-                perturbations_per_eval=perturbations_per_eval,
-                num_examples=num_examples,
-                initial_eval=initial_eval,
                 flattened_initial_eval=flattened_initial_eval,
                 n_outputs=n_outputs,
                 total_attrib=total_attrib,
@@ -1076,47 +1159,12 @@ def _process_ablated_out_full(
         modified_eval: Tensor,
         current_mask: Tuple[Optional[Tensor], ...],
         flattened_initial_eval: Tensor,
-        initial_eval: Tensor,
         inputs: TensorOrTupleOfTensorsGeneric,
         n_outputs: int,
-        num_examples: int,
         total_attrib: List[Tensor],
         weights: List[Tensor],
         attrib_type: dtype,
-        perturbations_per_eval: int,
     ) -> Tuple[List[Tensor], List[Tensor]]:
-        modified_eval = _parse_forward_out(modified_eval)
-        # if perturbations_per_eval > 1, the output shape must grow with
-        # input and not be aggregated
-        current_batch_size = inputs[0].shape[0]
-
-        # number of perturbation, which is not the same as
-        # perturbations_per_eval when not enough features to perturb
-        n_perturb = current_batch_size / num_examples
-        if perturbations_per_eval > 1 and not self._is_output_shape_valid:
-
-            current_output_shape = modified_eval.shape
-
-            # use initial_eval as the forward of perturbations_per_eval = 1
-            initial_output_shape = initial_eval.shape
-
-            assert (
-                # check if the output is not a scalar
-                current_output_shape
-                and initial_output_shape
-                # check if the output grow in same ratio, i.e., not agg
-                and current_output_shape[0] == n_perturb * initial_output_shape[0]
-            ), (
-                "When perturbations_per_eval > 1, forward_func's output "
-                "should be a tensor whose 1st dim grow with the input "
-                f"batch size: when input batch size is {num_examples}, "
-                f"the output shape is {initial_output_shape}; "
-                f"when input batch size is {current_batch_size}, "
-                f"the output shape is {current_output_shape}"
-            )
-
-            self._is_output_shape_valid = True
-
         # reshape the leading dim for n_feature_perturbed
         # flatten each feature's eval outputs into 1D of (n_outputs)
         modified_eval = modified_eval.reshape(-1, n_outputs)
diff --git a/tests/attr/test_feature_ablation.py b/tests/attr/test_feature_ablation.py
@@ -14,6 +14,7 @@
 from captum._utils.typing import BaselineType, TargetType, TensorOrTupleOfTensorsGeneric
 from captum.attr._core.feature_ablation import (
     _parse_forward_out,
+    check_output_shape_valid,
     FeatureAblation,
     format_result,
 )
@@ -936,8 +937,10 @@ def test_parse_forward_out_invalid_none(self) -> None:
 class TestFormatResult(BaseTest):
 
     def test_format_result_single_tensor_no_weights(self) -> None:
-        total_attrib = [torch.tensor([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])]
-        weights = []
+        total_attrib: list[torch.Tensor] = [
+            torch.tensor([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
+        ]
+        weights: list[torch.Tensor] = []
         is_inputs_tuple = False
         use_weights = False
 
@@ -951,11 +954,11 @@ def test_format_result_single_tensor_no_weights(self) -> None:
         )
 
     def test_format_result_tuple_output_no_weights(self) -> None:
-        total_attrib = [
+        total_attrib: list[torch.Tensor] = [
             torch.tensor([[1.0, 2.0], [3.0, 4.0]]),
             torch.tensor([[5.0, 6.0], [7.0, 8.0]]),
         ]
-        weights = []
+        weights: list[torch.Tensor] = []
         is_inputs_tuple = True
         use_weights = False
 
@@ -967,8 +970,12 @@ def test_format_result_tuple_output_no_weights(self) -> None:
         assertTensorAlmostEqual(self, result[1], torch.tensor([[5.0, 6.0], [7.0, 8.0]]))
 
     def test_format_result_single_tensor_with_weights(self) -> None:
-        total_attrib = [torch.tensor([[10.0, 20.0, 30.0], [40.0, 50.0, 60.0]])]
-        weights = [torch.tensor([[2.0, 4.0, 5.0], [8.0, 10.0, 12.0]])]
+        total_attrib: list[torch.Tensor] = [
+            torch.tensor([[10.0, 20.0, 30.0], [40.0, 50.0, 60.0]])
+        ]
+        weights: list[torch.Tensor] = [
+            torch.tensor([[2.0, 4.0, 5.0], [8.0, 10.0, 12.0]])
+        ]
         is_inputs_tuple = False
         use_weights = True
 
@@ -979,11 +986,11 @@ def test_format_result_single_tensor_with_weights(self) -> None:
         assertTensorAlmostEqual(self, result, expected)
 
     def test_format_result_tuple_output_with_weights(self) -> None:
-        total_attrib = [
+        total_attrib: list[torch.Tensor] = [
             torch.tensor([[10.0, 20.0], [30.0, 40.0]]),
             torch.tensor([[50.0, 60.0], [70.0, 80.0]]),
         ]
-        weights = [
+        weights: list[torch.Tensor] = [
             torch.tensor([[2.0, 4.0], [5.0, 8.0]]),
             torch.tensor([[10.0, 12.0], [14.0, 16.0]]),
         ]
@@ -998,8 +1005,10 @@ def test_format_result_tuple_output_with_weights(self) -> None:
         assertTensorAlmostEqual(self, result[1], torch.tensor([[5.0, 5.0], [5.0, 5.0]]))
 
     def test_format_result_integer_dtype_no_weights(self) -> None:
-        total_attrib = [torch.tensor([[1, 2, 3], [4, 5, 6]], dtype=torch.int32)]
-        weights = []
+        total_attrib: list[torch.Tensor] = [
+            torch.tensor([[1, 2, 3], [4, 5, 6]], dtype=torch.int32)
+        ]
+        weights: list[torch.Tensor] = []
         is_inputs_tuple = False
         use_weights = False
 
@@ -1013,5 +1022,69 @@ def test_format_result_integer_dtype_no_weights(self) -> None:
         )
 
 
+class TestCheckOutputShapeValid(BaseTest):
+    def test_valid_output_shape_scaling(self) -> None:
+        inputs = (torch.randn(4, 3),)
+        num_examples = 2
+        initial_eval = torch.randn(2, 5)
+        modified_eval = torch.randn(4, 5)
+        perturbations_per_eval = 2
+
+        check_output_shape_valid(
+            inputs=inputs,
+            num_examples=num_examples,
+            initial_eval=initial_eval,
+            modified_eval=modified_eval,
+            perturbations_per_eval=perturbations_per_eval,
+        )
+
+    def test_invalid_output_shape_scaling(self) -> None:
+        inputs = (torch.randn(4, 3),)
+        num_examples = 2
+        initial_eval = torch.randn(2, 5)
+        modified_eval = torch.randn(6, 5)
+        perturbations_per_eval = 2
+
+        with self.assertRaises(AssertionError):
+            check_output_shape_valid(
+                inputs=inputs,
+                num_examples=num_examples,
+                initial_eval=initial_eval,
+                modified_eval=modified_eval,
+                perturbations_per_eval=perturbations_per_eval,
+            )
+
+    def test_skip_validation_when_perturbations_per_eval_is_one(self) -> None:
+        inputs = (torch.randn(4, 3),)
+        num_examples = 2
+        initial_eval = torch.randn(2, 5)
+        modified_eval = torch.randn(3, 5)
+        perturbations_per_eval = 1
+
+        check_output_shape_valid(
+            inputs=inputs,
+            num_examples=num_examples,
+            initial_eval=initial_eval,
+            modified_eval=modified_eval,
+            perturbations_per_eval=perturbations_per_eval,
+        )
+
+    def test_invalid_batch_size_not_divisible_by_num_examples(self) -> None:
+        inputs = (torch.randn(5, 3),)
+        num_examples = 2
+        initial_eval = torch.randn(2, 5)
+        modified_eval = torch.randn(5, 5)
+        perturbations_per_eval = 2
+
+        with self.assertRaises(AssertionError):
+            check_output_shape_valid(
+                inputs=inputs,
+                num_examples=num_examples,
+                initial_eval=initial_eval,
+                modified_eval=modified_eval,
+                perturbations_per_eval=perturbations_per_eval,
+            )
+
+
 if __name__ == "__main__":
     unittest.main()