Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 3 additions & 9 deletions examples/plot_2D_simulation_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,9 +86,7 @@
dl.fit_importance(X_init, y)

# compute estimated support (first method)
selected_dl = (dl.pvalues_ < (fwer_target / 2) / n_features) | (
dl.one_minus_pvalues_ < (fwer_target / 2) / n_features
)
selected_dl = dl.fwer_selection(fwer=fwer_target, n_tests=n_features)

tp_mask = ((selected_dl.astype(int) == 1) & (beta == 1)).astype(bool)
fp_mask = ((selected_dl.astype(int) == 1) & (beta == 0)).astype(bool)
Expand Down Expand Up @@ -151,9 +149,7 @@
clu_dl = CluDL(desparsified_lasso=dl_2, clustering=clustering, random_state=0)
clu_dl.fit_importance(X_init, y)

selected_cdl = (clu_dl.pvalues_ < (fwer_target / 2) / n_clusters) | (
clu_dl.one_minus_pvalues_ < (fwer_target / 2) / n_clusters
)
selected_cdl = clu_dl.fwer_selection(fwer=fwer_target, n_tests=n_clusters)


# %%
Expand Down Expand Up @@ -211,9 +207,7 @@
)
enclu_dl.fit_importance(X_init, y)

selected_ecdl = (enclu_dl.pvalues_ < (fwer_target / 2) / n_clusters) | (
enclu_dl.one_minus_pvalues_ < (fwer_target / 2) / n_clusters
)
selected_ecdl = enclu_dl.fwer_selection(fwer=fwer_target, n_tests=n_clusters)


# %%
Expand Down
28 changes: 14 additions & 14 deletions examples/plot_digits.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,17 +130,19 @@
)

encludl.fit_importance(X_4_7, y_4_7)
selected_pos_4_7 = encludl.pvalues_ < fwer / 2 / n_clusters
selected_neg_4_7 = encludl.one_minus_pvalues_ < fwer / 2 / n_clusters
selected_4_7 = encludl.fwer_selection(
fwer=fwer, n_tests=n_clusters, two_tailed_test=True
)

encludl.fit_importance(X_0_1, y_0_1)
selected_pos_0_1 = encludl.pvalues_ < fwer / 2 / n_clusters
selected_neg_0_1 = encludl.one_minus_pvalues_ < fwer / 2 / n_clusters
selected_0_1 = encludl.fwer_selection(
fwer=fwer, n_tests=n_clusters, two_tailed_test=True
)

encludl.fit_importance(X_0_9, y_0_9)
selected_pos_0_9 = encludl.pvalues_ < fwer / 2 / n_clusters
selected_neg_0_9 = encludl.one_minus_pvalues_ < fwer / 2 / n_clusters

selected_0_9 = encludl.fwer_selection(
fwer=fwer, n_tests=n_clusters, two_tailed_test=True
)

# %%
# Visualizing the results
Expand All @@ -153,16 +155,14 @@

_, axes = plt.subplots(1, 3, figsize=(5, 2), subplot_kw={"xticks": [], "yticks": []})

for i, (title, selected_pos, selected_neg) in enumerate(
for i, (title, selected) in enumerate(
[
("4 vs 7", selected_pos_4_7, selected_neg_4_7),
("0 vs 1", selected_pos_0_1, selected_neg_0_1),
("0 vs 9", selected_pos_0_9, selected_neg_0_9),
("4 vs 7", selected_4_7),
("0 vs 1", selected_0_1),
("0 vs 9", selected_0_9),
]
):
mask_encludl = np.zeros(shape)
mask_encludl[selected_pos.reshape(shape)] = 1
mask_encludl[selected_neg.reshape(shape)] = -1
mask_encludl = selected.reshape(shape)

cmap = ListedColormap(["tab:red", "white", "tab:blue"])
axes[i].imshow(mask_encludl, cmap=cmap, vmin=-1, vmax=1)
Expand Down
47 changes: 16 additions & 31 deletions examples/plot_fmri_data_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,6 @@ def preprocess_haxby(subject=2, memory=None):
# Making the inference with several algorithms
# --------------------------------------------


from sklearn.linear_model import LassoCV
from sklearn.model_selection import KFold

Expand Down Expand Up @@ -173,6 +172,7 @@ def preprocess_haxby(subject=2, memory=None):
)
cludl.fit_importance(X, y)


# %%
# Below, we run the ensemble clustered inference algorithm which adds a
# randomization step over the clustered inference algorithm (c.f. References).
Expand All @@ -182,7 +182,6 @@ def preprocess_haxby(subject=2, memory=None):
# However you might benefit from clustering randomization taking
# `n_bootstraps=25` or `n_bootstraps=100`, also we set `n_jobs=n_jobs`.


from hidimstat.ensemble_clustered_inference import EnCluDL

encludl = EnCluDL(
Expand All @@ -195,6 +194,7 @@ def preprocess_haxby(subject=2, memory=None):
)
encludl.fit_importance(X, y)


# %%
# Plotting the results
# --------------------
Expand All @@ -210,27 +210,6 @@ def preprocess_haxby(subject=2, memory=None):
n_samples, n_features = X.shape
target_fwer = 0.1

# %%
# We now translate the FWER target into a z-score target.
# For the permutation test methods we do not need any additional correction
# since the p-values are already adjusted for multiple testing.

zscore_threshold_corr = zscore_from_pval((target_fwer / 2))

# %%
# Other methods need to be corrected. We consider the Bonferroni correction.
# For methods that do not reduce the feature space, the correction
# consists in dividing by the number of features.

correction = 1.0 / n_features
zscore_threshold_no_clust = zscore_from_pval((target_fwer / 2) * correction)

# %%
# For methods that parcelates the brain into groups of voxels, the correction
# consists in dividing by the number of parcels (or clusters).

correction_clust = 1.0 / n_clusters
zscore_threshold_clust = zscore_from_pval((target_fwer / 2) * correction_clust)

# %%
# Now, we can plot the thresholded z-score maps by translating the
Expand All @@ -241,7 +220,8 @@ def preprocess_haxby(subject=2, memory=None):

from matplotlib.pyplot import get_cmap
from nilearn.plotting import plot_stat_map, show
from sklearn.preprocessing import StandardScaler

from hidimstat.statistical_tools.p_values import zscore_from_pval


def plot_map(
Expand All @@ -268,20 +248,25 @@ def plot_map(
)


selected_cludl = cludl.fwer_selection(fwer=target_fwer, two_tailed_test=True)
plot_map(
zscore_from_pval(cludl.pvalues_, cludl.one_minus_pvalues_),
float(zscore_threshold_clust),
"CluDL",
zscore_from_pval(cludl.pvalues_) * selected_cludl,
float(zscore_from_pval(target_fwer / 2 / n_clusters)),
title="CluDL",
)

selected = encludl.pvalues_ < target_fwer / 2 / n_clusters
selected = selected.astype(int)
selected[(encludl.one_minus_pvalues_ < target_fwer / 2 / n_clusters)] = -1
plot_map(selected, 0.5, "EnCluDL", vmin=-1, vmax=1)
selected_encludl = encludl.fwer_selection(fwer=target_fwer, two_tailed_test=True)
z_score_encludl = zscore_from_pval(encludl.pvalues_) * selected_encludl
plot_map(
z_score_encludl,
float(zscore_from_pval(target_fwer / 2 / n_clusters)),
"EnCluDL",
)
# Finally, calling plotting.show() is necessary to display the figure when
# running as a script outside IPython
show()


# %%
# Analysis of the results
# -----------------------
Expand Down
127 changes: 86 additions & 41 deletions src/hidimstat/base_variable_importance.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

from hidimstat._utils.exception import InternalError
from hidimstat.statistical_tools.multiple_testing import fdr_threshold
from hidimstat.statistical_tools.p_values import pval_from_two_sided_pval_and_sign


def _selection_generic(
Expand Down Expand Up @@ -114,9 +115,6 @@ class BaseVariableImportance(BaseEstimator):
The computed importance scores for each feature.
pvalues_ : array-like of shape (n_features,), default=None
The computed p-values for each feature.
one_minus_pvalues_: ndarray of shape (n_features)
One minus the corrected p-value, with numerically accurate values for negative
effects (ie., for p-value close to one).

Methods
-------
Expand All @@ -131,7 +129,6 @@ def __init__(self):
super().__init__()
self.importances_ = None
self.pvalues_ = None
self.one_minus_pvalues_ = None

def _check_importance(self):
"""
Expand Down Expand Up @@ -230,7 +227,8 @@ def fdr_selection(
fdr,
fdr_control="bhq",
reshaping_function=None,
alternative_hypothesis=False,
two_tailed_test=False,
eps=1e-14,
):
"""
Performs feature selection based on False Discovery Rate (FDR) control.
Expand All @@ -246,16 +244,20 @@ def fdr_selection(
reshaping_function: callable or None, default=None
Optional reshaping function for FDR control methods.
If None, defaults to sum of reciprocals for 'bhy'.
alternative_hippothesis: bool or None, default=False
If False, selects features with small p-values.
If True, selects features with large p-values (close to 1).
If None, selects features that have either small or large p-values.
two_tailed_test: bool, default=False
If True, performs two-tailed test selection using both p-values
for positive effects and one-minus p-values for negative effects. The sign
of the effect is determined from the sign of the importance scores.
eps : float, default=1e-14
Small value to ensure numerical stability when computing one-minus p-values.

Returns
-------
selected : ndarray of bool
Boolean mask of selected features.
True indicates selected features, False indicates non-selected features.
selected : ndarray of int
Integer array indicating the selected features.
1 indicates selected features with positive effects,
-1 indicates selected features with negative effects,
0 indicates non-selected features.

Raises
------
Expand All @@ -272,39 +274,82 @@ def fdr_selection(
assert (
fdr_control == "bhq" or fdr_control == "bhy"
), "only 'bhq' and 'bhy' are supported"
assert alternative_hypothesis is None or isinstance(
alternative_hypothesis, bool
), "alternative_hippothesis can have only three values: True, False and None."

# selection on pvalue
if alternative_hypothesis is None or not alternative_hypothesis:
threshold_pvalues = fdr_threshold(
self.pvalues_,
fdr=fdr,
method=fdr_control,
reshaping_function=reshaping_function,
)
selected_pvalues = self.pvalues_ <= threshold_pvalues
else:
selected_pvalues = np.zeros_like(self.pvalues_, dtype=bool)

# selection on 1-pvalue
if alternative_hypothesis is None or alternative_hypothesis:
threshold_one_minus_pvalues = fdr_threshold(
self.one_minus_pvalues_,
fdr=fdr,
method=fdr_control,
reshaping_function=reshaping_function,
)
selected_one_minus_pvalues = (
self.one_minus_pvalues_
) <= threshold_one_minus_pvalues
else:
selected_one_minus_pvalues = np.zeros_like(self.pvalues_, dtype=bool)
# Adjust fdr for two-tailed test
if two_tailed_test:
fdr = fdr / 2

threshold_pvalues = fdr_threshold(
self.pvalues_,
fdr=fdr,
method=fdr_control,
reshaping_function=reshaping_function,
)
selected = (self.pvalues_ <= threshold_pvalues).astype(int)

# For two-tailed test, determine the sign of the effect
if two_tailed_test:
if self.importances_.ndim > 1:
sign_beta = np.sign(self.importances_.sum(axis=1))
else:
sign_beta = np.sign(self.importances_)
selected = selected * sign_beta

selected = selected_pvalues | selected_one_minus_pvalues
return selected

def fwer_selection(
self, fwer, procedure="bonferroni", n_tests=None, two_tailed_test=False
):
"""
Performs feature selection based on Family-Wise Error Rate (FWER) control.

Parameters
----------
fwer : float
The target family-wise error rate level (between 0 and 1)
procedure : {'bonferroni'}, default='bonferroni'
The FWER control method to use:
- 'bonferroni': Bonferroni correction
n_tests : int or None, default=None
Factor for multiple testing correction. If None, uses the number of clusters
or the number of features in this order.
two_tailed_test : bool, default=False
If True, uses the sign of the importance scores to indicate whether the
selected features have positive or negative effects.

Returns
-------
selected : ndarray of int
Integer array indicating the selected features.
1 indicates selected features with positive effects,
-1 indicates selected features with negative effects,
0 indicates non-selected features.
"""
self._check_importance()

if procedure == "bonferroni":
if n_tests is None:
if hasattr(self, "clustering_"):
print("Using number of clusters for multiple testing correction.")
n_tests = self.clustering_.n_clusters_
else:
print("Using number of features for multiple testing correction.")
n_tests = self.importances_.shape[0]

# Adjust fwer for two-tailed test
if two_tailed_test:
fwer = fwer / 2

threshold_pvalue = fwer / n_tests
selected = (self.pvalues_ < threshold_pvalue).astype(int)
if two_tailed_test:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

here we should set threshold_pvalue to fwer / (2 * n_tests)

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thx. Can you remove the division by 2 from examples ?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

sign_beta = np.sign(self.importances_)
selected = selected * sign_beta
return selected

else:
raise ValueError("Only 'bonferroni' procedure is supported")

def plot_importance(
self,
ax=None,
Expand Down
Loading