-
Notifications
You must be signed in to change notification settings - Fork 12
[CORE] remove one_minus_pval attribute, add fwer_selection
#541
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
18f3a1c
e61ff31
f130a5c
336ceae
1d2b45e
c5ef974
1b2ce1a
a37d5ec
44fba3e
093adc6
2bb505b
0c8bd90
9cefa18
541faa0
23cf646
0ed2d7d
56d6e7c
3249423
68376c1
43a4fe9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -7,6 +7,7 @@ | |
|
|
||
| from hidimstat._utils.exception import InternalError | ||
| from hidimstat.statistical_tools.multiple_testing import fdr_threshold | ||
| from hidimstat.statistical_tools.p_values import pval_from_two_sided_pval_and_sign | ||
|
|
||
|
|
||
| def _selection_generic( | ||
|
|
@@ -114,9 +115,6 @@ class BaseVariableImportance(BaseEstimator): | |
| The computed importance scores for each feature. | ||
| pvalues_ : array-like of shape (n_features,), default=None | ||
| The computed p-values for each feature. | ||
| one_minus_pvalues_: ndarray of shape (n_features) | ||
| One minus the corrected p-value, with numerically accurate values for negative | ||
| effects (ie., for p-value close to one). | ||
|
|
||
| Methods | ||
| ------- | ||
|
|
@@ -131,7 +129,6 @@ def __init__(self): | |
| super().__init__() | ||
| self.importances_ = None | ||
| self.pvalues_ = None | ||
| self.one_minus_pvalues_ = None | ||
|
|
||
| def _check_importance(self): | ||
| """ | ||
|
|
@@ -230,7 +227,8 @@ def fdr_selection( | |
| fdr, | ||
| fdr_control="bhq", | ||
| reshaping_function=None, | ||
| alternative_hypothesis=False, | ||
| two_tailed_test=False, | ||
| eps=1e-14, | ||
| ): | ||
| """ | ||
| Performs feature selection based on False Discovery Rate (FDR) control. | ||
|
|
@@ -246,16 +244,20 @@ def fdr_selection( | |
| reshaping_function: callable or None, default=None | ||
| Optional reshaping function for FDR control methods. | ||
| If None, defaults to sum of reciprocals for 'bhy'. | ||
| alternative_hippothesis: bool or None, default=False | ||
| If False, selects features with small p-values. | ||
| If True, selects features with large p-values (close to 1). | ||
| If None, selects features that have either small or large p-values. | ||
| two_tailed_test: bool, default=False | ||
| If True, performs two-tailed test selection using both p-values | ||
| for positive effects and one-minus p-values for negative effects. The sign | ||
| of the effect is determined from the sign of the importance scores. | ||
| eps : float, default=1e-14 | ||
| Small value to ensure numerical stability when computing one-minus p-values. | ||
|
|
||
| Returns | ||
| ------- | ||
| selected : ndarray of bool | ||
| Boolean mask of selected features. | ||
| True indicates selected features, False indicates non-selected features. | ||
| selected : ndarray of int | ||
| Integer array indicating the selected features. | ||
| 1 indicates selected features with positive effects, | ||
| -1 indicates selected features with negative effects, | ||
| 0 indicates non-selected features. | ||
|
|
||
| Raises | ||
| ------ | ||
|
|
@@ -272,39 +274,82 @@ def fdr_selection( | |
| assert ( | ||
| fdr_control == "bhq" or fdr_control == "bhy" | ||
| ), "only 'bhq' and 'bhy' are supported" | ||
| assert alternative_hypothesis is None or isinstance( | ||
| alternative_hypothesis, bool | ||
| ), "alternative_hippothesis can have only three values: True, False and None." | ||
|
|
||
| # selection on pvalue | ||
| if alternative_hypothesis is None or not alternative_hypothesis: | ||
| threshold_pvalues = fdr_threshold( | ||
| self.pvalues_, | ||
| fdr=fdr, | ||
| method=fdr_control, | ||
| reshaping_function=reshaping_function, | ||
| ) | ||
| selected_pvalues = self.pvalues_ <= threshold_pvalues | ||
| else: | ||
| selected_pvalues = np.zeros_like(self.pvalues_, dtype=bool) | ||
|
|
||
| # selection on 1-pvalue | ||
| if alternative_hypothesis is None or alternative_hypothesis: | ||
| threshold_one_minus_pvalues = fdr_threshold( | ||
| self.one_minus_pvalues_, | ||
| fdr=fdr, | ||
| method=fdr_control, | ||
| reshaping_function=reshaping_function, | ||
| ) | ||
| selected_one_minus_pvalues = ( | ||
| self.one_minus_pvalues_ | ||
| ) <= threshold_one_minus_pvalues | ||
| else: | ||
| selected_one_minus_pvalues = np.zeros_like(self.pvalues_, dtype=bool) | ||
| # Adjust fdr for two-tailed test | ||
| if two_tailed_test: | ||
| fdr = fdr / 2 | ||
|
|
||
| threshold_pvalues = fdr_threshold( | ||
| self.pvalues_, | ||
| fdr=fdr, | ||
| method=fdr_control, | ||
| reshaping_function=reshaping_function, | ||
| ) | ||
| selected = (self.pvalues_ <= threshold_pvalues).astype(int) | ||
|
|
||
| # For two-tailed test, determine the sign of the effect | ||
| if two_tailed_test: | ||
| if self.importances_.ndim > 1: | ||
| sign_beta = np.sign(self.importances_.sum(axis=1)) | ||
| else: | ||
| sign_beta = np.sign(self.importances_) | ||
| selected = selected * sign_beta | ||
|
|
||
| selected = selected_pvalues | selected_one_minus_pvalues | ||
| return selected | ||
|
|
||
| def fwer_selection( | ||
| self, fwer, procedure="bonferroni", n_tests=None, two_tailed_test=False | ||
| ): | ||
| """ | ||
| Performs feature selection based on Family-Wise Error Rate (FWER) control. | ||
|
|
||
| Parameters | ||
| ---------- | ||
| fwer : float | ||
| The target family-wise error rate level (between 0 and 1) | ||
| procedure : {'bonferroni'}, default='bonferroni' | ||
| The FWER control method to use: | ||
| - 'bonferroni': Bonferroni correction | ||
| n_tests : int or None, default=None | ||
| Factor for multiple testing correction. If None, uses the number of clusters | ||
| or the number of features in this order. | ||
| two_tailed_test : bool, default=False | ||
| If True, uses the sign of the importance scores to indicate whether the | ||
| selected features have positive or negative effects. | ||
|
|
||
| Returns | ||
| ------- | ||
| selected : ndarray of int | ||
| Integer array indicating the selected features. | ||
| 1 indicates selected features with positive effects, | ||
| -1 indicates selected features with negative effects, | ||
| 0 indicates non-selected features. | ||
| """ | ||
| self._check_importance() | ||
|
|
||
| if procedure == "bonferroni": | ||
| if n_tests is None: | ||
| if hasattr(self, "clustering_"): | ||
| print("Using number of clusters for multiple testing correction.") | ||
| n_tests = self.clustering_.n_clusters_ | ||
| else: | ||
| print("Using number of features for multiple testing correction.") | ||
| n_tests = self.importances_.shape[0] | ||
|
|
||
| # Adjust fwer for two-tailed test | ||
| if two_tailed_test: | ||
| fwer = fwer / 2 | ||
|
|
||
| threshold_pvalue = fwer / n_tests | ||
| selected = (self.pvalues_ < threshold_pvalue).astype(int) | ||
| if two_tailed_test: | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. here we should set threshold_pvalue to fwer / (2 * n_tests)
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thx. Can you remove the division by 2 from examples ?
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
| sign_beta = np.sign(self.importances_) | ||
| selected = selected * sign_beta | ||
| return selected | ||
|
|
||
| else: | ||
| raise ValueError("Only 'bonferroni' procedure is supported") | ||
|
|
||
| def plot_importance( | ||
| self, | ||
| ax=None, | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.