mind-inria · jpaillard · Dec 5, 2025 · Dec 1, 2025 · Dec 1, 2025 · Dec 1, 2025
diff --git a/examples/plot_2D_simulation_example.py b/examples/plot_2D_simulation_example.py
@@ -86,9 +86,7 @@
 dl.fit_importance(X_init, y)
 
 # compute estimated support (first method)
-selected_dl = (dl.pvalues_ < (fwer_target / 2) / n_features) | (
-    dl.one_minus_pvalues_ < (fwer_target / 2) / n_features
-)
+selected_dl = dl.fwer_selection(fwer=fwer_target, n_tests=n_features)
 
 tp_mask = ((selected_dl.astype(int) == 1) & (beta == 1)).astype(bool)
 fp_mask = ((selected_dl.astype(int) == 1) & (beta == 0)).astype(bool)
@@ -151,9 +149,7 @@
 clu_dl = CluDL(desparsified_lasso=dl_2, clustering=clustering, random_state=0)
 clu_dl.fit_importance(X_init, y)
 
-selected_cdl = (clu_dl.pvalues_ < (fwer_target / 2) / n_clusters) | (
-    clu_dl.one_minus_pvalues_ < (fwer_target / 2) / n_clusters
-)
+selected_cdl = clu_dl.fwer_selection(fwer=fwer_target, n_tests=n_clusters)
 
 
 # %%
@@ -211,9 +207,7 @@
 )
 enclu_dl.fit_importance(X_init, y)
 
-selected_ecdl = (enclu_dl.pvalues_ < (fwer_target / 2) / n_clusters) | (
-    enclu_dl.one_minus_pvalues_ < (fwer_target / 2) / n_clusters
-)
+selected_ecdl = enclu_dl.fwer_selection(fwer=fwer_target, n_tests=n_clusters)
 
 
 # %%

diff --git a/examples/plot_digits.py b/examples/plot_digits.py
@@ -130,17 +130,19 @@
 )
 
 encludl.fit_importance(X_4_7, y_4_7)
-selected_pos_4_7 = encludl.pvalues_ < fwer / 2 / n_clusters
-selected_neg_4_7 = encludl.one_minus_pvalues_ < fwer / 2 / n_clusters
+selected_4_7 = encludl.fwer_selection(
+    fwer=fwer, n_tests=n_clusters, two_tailed_test=True
+)
 
 encludl.fit_importance(X_0_1, y_0_1)
-selected_pos_0_1 = encludl.pvalues_ < fwer / 2 / n_clusters
-selected_neg_0_1 = encludl.one_minus_pvalues_ < fwer / 2 / n_clusters
+selected_0_1 = encludl.fwer_selection(
+    fwer=fwer, n_tests=n_clusters, two_tailed_test=True
+)
 
 encludl.fit_importance(X_0_9, y_0_9)
-selected_pos_0_9 = encludl.pvalues_ < fwer / 2 / n_clusters
-selected_neg_0_9 = encludl.one_minus_pvalues_ < fwer / 2 / n_clusters
-
+selected_0_9 = encludl.fwer_selection(
+    fwer=fwer, n_tests=n_clusters, two_tailed_test=True
+)
 
 # %%
 # Visualizing the results
@@ -153,16 +155,14 @@
 
 _, axes = plt.subplots(1, 3, figsize=(5, 2), subplot_kw={"xticks": [], "yticks": []})
 
-for i, (title, selected_pos, selected_neg) in enumerate(
+for i, (title, selected) in enumerate(
     [
-        ("4 vs 7", selected_pos_4_7, selected_neg_4_7),
-        ("0 vs 1", selected_pos_0_1, selected_neg_0_1),
-        ("0 vs 9", selected_pos_0_9, selected_neg_0_9),
+        ("4 vs 7", selected_4_7),
+        ("0 vs 1", selected_0_1),
+        ("0 vs 9", selected_0_9),
     ]
 ):
-    mask_encludl = np.zeros(shape)
-    mask_encludl[selected_pos.reshape(shape)] = 1
-    mask_encludl[selected_neg.reshape(shape)] = -1
+    mask_encludl = selected.reshape(shape)
 
     cmap = ListedColormap(["tab:red", "white", "tab:blue"])
     axes[i].imshow(mask_encludl, cmap=cmap, vmin=-1, vmax=1)

diff --git a/examples/plot_fmri_data_example.py b/examples/plot_fmri_data_example.py
@@ -126,7 +126,6 @@ def preprocess_haxby(subject=2, memory=None):
 # Making the inference with several algorithms
 # --------------------------------------------
 
-
 from sklearn.linear_model import LassoCV
 from sklearn.model_selection import KFold
 
@@ -173,6 +172,7 @@ def preprocess_haxby(subject=2, memory=None):
 )
 cludl.fit_importance(X, y)
 
+
 # %%
 # Below, we run the ensemble clustered inference algorithm which adds a
 # randomization step over the clustered inference algorithm (c.f. References).
@@ -182,7 +182,6 @@ def preprocess_haxby(subject=2, memory=None):
 # However you might benefit from clustering randomization taking
 # `n_bootstraps=25` or `n_bootstraps=100`, also we set `n_jobs=n_jobs`.
 
-
 from hidimstat.ensemble_clustered_inference import EnCluDL
 
 encludl = EnCluDL(
@@ -195,6 +194,7 @@ def preprocess_haxby(subject=2, memory=None):
 )
 encludl.fit_importance(X, y)
 
+
 # %%
 # Plotting the results
 # --------------------
@@ -210,27 +210,6 @@ def preprocess_haxby(subject=2, memory=None):
 n_samples, n_features = X.shape
 target_fwer = 0.1
 
-# %%
-# We now translate the FWER target into a z-score target.
-# For the permutation test methods we do not need any additional correction
-# since the p-values are already adjusted for multiple testing.
-
-zscore_threshold_corr = zscore_from_pval((target_fwer / 2))
-
-# %%
-# Other methods need to be corrected. We consider the Bonferroni correction.
-# For methods that do not reduce the feature space, the correction
-# consists in dividing by the number of features.
-
-correction = 1.0 / n_features
-zscore_threshold_no_clust = zscore_from_pval((target_fwer / 2) * correction)
-
-# %%
-# For methods that parcelates the brain into groups of voxels, the correction
-# consists in dividing by the number of parcels (or clusters).
-
-correction_clust = 1.0 / n_clusters
-zscore_threshold_clust = zscore_from_pval((target_fwer / 2) * correction_clust)
 
 # %%
 # Now, we can plot the thresholded z-score maps by translating the
@@ -241,7 +220,8 @@ def preprocess_haxby(subject=2, memory=None):
 
 from matplotlib.pyplot import get_cmap
 from nilearn.plotting import plot_stat_map, show
-from sklearn.preprocessing import StandardScaler
+
+from hidimstat.statistical_tools.p_values import zscore_from_pval
 
 
 def plot_map(
@@ -268,20 +248,25 @@ def plot_map(
     )
 
 
+selected_cludl = cludl.fwer_selection(fwer=target_fwer, two_tailed_test=True)
 plot_map(
-    zscore_from_pval(cludl.pvalues_, cludl.one_minus_pvalues_),
-    float(zscore_threshold_clust),
-    "CluDL",
+    zscore_from_pval(cludl.pvalues_) * selected_cludl,
+    float(zscore_from_pval(target_fwer / 2 / n_clusters)),
+    title="CluDL",
 )
 
-selected = encludl.pvalues_ < target_fwer / 2 / n_clusters
-selected = selected.astype(int)
-selected[(encludl.one_minus_pvalues_ < target_fwer / 2 / n_clusters)] = -1
-plot_map(selected, 0.5, "EnCluDL", vmin=-1, vmax=1)
+selected_encludl = encludl.fwer_selection(fwer=target_fwer, two_tailed_test=True)
+z_score_encludl = zscore_from_pval(encludl.pvalues_) * selected_encludl
+plot_map(
+    z_score_encludl,
+    float(zscore_from_pval(target_fwer / 2 / n_clusters)),
+    "EnCluDL",
+)
 # Finally, calling plotting.show() is necessary to display the figure when
 # running as a script outside IPython
 show()
 
+
 # %%
 # Analysis of the results
 # -----------------------

diff --git a/src/hidimstat/base_variable_importance.py b/src/hidimstat/base_variable_importance.py
@@ -7,6 +7,7 @@
 
 from hidimstat._utils.exception import InternalError
 from hidimstat.statistical_tools.multiple_testing import fdr_threshold
+from hidimstat.statistical_tools.p_values import pval_from_two_sided_pval_and_sign
 
 
 def _selection_generic(
@@ -114,9 +115,6 @@ class BaseVariableImportance(BaseEstimator):
         The computed importance scores for each feature.
     pvalues_ : array-like of shape (n_features,), default=None
         The computed p-values for each feature.
-    one_minus_pvalues_: ndarray of shape (n_features)
-        One minus the corrected p-value, with numerically accurate values for negative
-        effects (ie., for p-value close to one).
 
     Methods
     -------
@@ -131,7 +129,6 @@ def __init__(self):
         super().__init__()
         self.importances_ = None
         self.pvalues_ = None
-        self.one_minus_pvalues_ = None
 
     def _check_importance(self):
         """
@@ -230,7 +227,8 @@ def fdr_selection(
         fdr,
         fdr_control="bhq",
         reshaping_function=None,
-        alternative_hypothesis=False,
+        two_tailed_test=False,
+        eps=1e-14,
     ):
         """
         Performs feature selection based on False Discovery Rate (FDR) control.
@@ -246,16 +244,20 @@ def fdr_selection(
         reshaping_function: callable or None, default=None
             Optional reshaping function for FDR control methods.
             If None, defaults to sum of reciprocals for 'bhy'.
-        alternative_hippothesis: bool or None, default=False
-            If False, selects features with small p-values.
-            If True, selects features with large p-values (close to 1).
-            If None, selects features that have either small or large p-values.
+        two_tailed_test: bool, default=False
+            If True, performs two-tailed test selection using both p-values
+            for positive effects and one-minus p-values for negative effects. The sign
+            of the effect is determined from the sign of the importance scores.
+        eps : float, default=1e-14
+            Small value to ensure numerical stability when computing one-minus p-values.
 
         Returns
         -------
-        selected : ndarray of bool
-            Boolean mask of selected features.
-            True indicates selected features, False indicates non-selected features.
+        selected : ndarray of int
+            Integer array indicating the selected features.
+            1 indicates selected features with positive effects,
+            -1 indicates selected features with negative effects,
+            0 indicates non-selected features.
 
         Raises
         ------
@@ -272,39 +274,82 @@ def fdr_selection(
         assert (
             fdr_control == "bhq" or fdr_control == "bhy"
         ), "only 'bhq' and 'bhy' are supported"
-        assert alternative_hypothesis is None or isinstance(
-            alternative_hypothesis, bool
-        ), "alternative_hippothesis can have only three values: True, False and None."
 
-        # selection on pvalue
-        if alternative_hypothesis is None or not alternative_hypothesis:
-            threshold_pvalues = fdr_threshold(
-                self.pvalues_,
-                fdr=fdr,
-                method=fdr_control,
-                reshaping_function=reshaping_function,
-            )
-            selected_pvalues = self.pvalues_ <= threshold_pvalues
-        else:
-            selected_pvalues = np.zeros_like(self.pvalues_, dtype=bool)
-
-        # selection on 1-pvalue
-        if alternative_hypothesis is None or alternative_hypothesis:
-            threshold_one_minus_pvalues = fdr_threshold(
-                self.one_minus_pvalues_,
-                fdr=fdr,
-                method=fdr_control,
-                reshaping_function=reshaping_function,
-            )
-            selected_one_minus_pvalues = (
-                self.one_minus_pvalues_
-            ) <= threshold_one_minus_pvalues
-        else:
-            selected_one_minus_pvalues = np.zeros_like(self.pvalues_, dtype=bool)
+        # Adjust fdr for two-tailed test
+        if two_tailed_test:
+            fdr = fdr / 2
+
+        threshold_pvalues = fdr_threshold(
+            self.pvalues_,
+            fdr=fdr,
+            method=fdr_control,
+            reshaping_function=reshaping_function,
+        )
+        selected = (self.pvalues_ <= threshold_pvalues).astype(int)
+
+        # For two-tailed test, determine the sign of the effect
+        if two_tailed_test:
+            if self.importances_.ndim > 1:
+                sign_beta = np.sign(self.importances_.sum(axis=1))
+            else:
+                sign_beta = np.sign(self.importances_)
+            selected = selected * sign_beta
 
-        selected = selected_pvalues | selected_one_minus_pvalues
         return selected
 
+    def fwer_selection(
+        self, fwer, procedure="bonferroni", n_tests=None, two_tailed_test=False
+    ):
+        """
+        Performs feature selection based on Family-Wise Error Rate (FWER) control.
+
+        Parameters
+        ----------
+        fwer : float
+            The target family-wise error rate level (between 0 and 1)
+        procedure : {'bonferroni'}, default='bonferroni'
+            The FWER control method to use:
+            - 'bonferroni': Bonferroni correction
+        n_tests : int or None, default=None
+            Factor for multiple testing correction. If None, uses the number of clusters
+            or the number of features in this order.
+        two_tailed_test : bool, default=False
+            If True, uses the sign of the importance scores to indicate whether the
+            selected features have positive or negative effects.
+
+        Returns
+        -------
+        selected : ndarray of int
+            Integer array indicating the selected features.
+            1 indicates selected features with positive effects,
+            -1 indicates selected features with negative effects,
+            0 indicates non-selected features.
+        """
+        self._check_importance()
+
+        if procedure == "bonferroni":
+            if n_tests is None:
+                if hasattr(self, "clustering_"):
+                    print("Using number of clusters for multiple testing correction.")
+                    n_tests = self.clustering_.n_clusters_
+                else:
+                    print("Using number of features for multiple testing correction.")
+                    n_tests = self.importances_.shape[0]
+
+            # Adjust fwer for two-tailed test
+            if two_tailed_test:
+                fwer = fwer / 2
+
+            threshold_pvalue = fwer / n_tests
+            selected = (self.pvalues_ < threshold_pvalue).astype(int)
+            if two_tailed_test:
+                sign_beta = np.sign(self.importances_)
+                selected = selected * sign_beta
+            return selected
+
+        else:
+            raise ValueError("Only 'bonferroni' procedure is supported")
+
     def plot_importance(
         self,
         ax=None,