Skip to content
This repository was archived by the owner on Oct 21, 2025. It is now read-only.

Commit bc271bb

Browse files
improved numerical stability of ztests
by catching division by zero if stdev is zero.
1 parent 8d9874c commit bc271bb

File tree

3 files changed

+22
-14
lines changed

3 files changed

+22
-14
lines changed

diffxpy/stats/stats.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,7 @@ def wald_test(
211211
if theta_mle.shape[0] != theta0.shape[0]:
212212
raise ValueError('stats.wald_test(): theta_mle and theta0 have to contain the same number of entries')
213213

214+
theta_sd = np.nextafter(0, np.inf, out=theta_sd, where=theta_sd < np.nextafter(0, np.inf))
214215
wald_statistic = np.abs(np.divide(theta_mle - theta0, theta_sd))
215216
pvals = 2 * (1 - scipy.stats.norm(loc=0, scale=1).cdf(wald_statistic)) # two-tailed test
216217
return pvals
@@ -313,7 +314,10 @@ def two_coef_z_test(
313314
if theta_mle0.shape[0] != theta_sd0.shape[0]:
314315
raise ValueError('stats.two_coef_z_test(): theta_mle0 and theta_sd0 have to contain the same number of entries')
315316

316-
z_statistic = np.abs((theta_mle0 - theta_mle1) / np.sqrt(np.square(theta_sd0) + np.square(theta_sd1)))
317+
divisor = np.square(theta_sd0) + np.square(theta_sd1)
318+
divisor = np.nextafter(0, np.inf, out=divisor, where=divisor < np.nextafter(0, np.inf))
319+
divisor = np.sqrt(divisor)
320+
z_statistic = np.abs((theta_mle0 - theta_mle1)) / divisor
317321
pvals = 2 * (1 - scipy.stats.norm(loc=0, scale=1).cdf(z_statistic)) # two-tailed test
318322
return pvals
319323

diffxpy/testing/det.py

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2102,11 +2102,13 @@ def __init__(
21022102
self.grouping = grouping
21032103
self.groups = list(np.asarray(groups))
21042104

2105-
# values of parameter estimates: coefficients x genes array with one coefficient per group
2105+
# Values of parameter estimates: coefficients x genes array with one coefficient per group
21062106
self._theta_mle = model_estim.a_var
2107-
# standard deviation of estimates: coefficients x genes array with one coefficient per group
2108-
# theta_sd = sqrt(diagonal(fisher_inv))
2109-
self._theta_sd = np.sqrt(np.diagonal(model_estim.fisher_inv, axis1=-2, axis2=-1)).T
2107+
# Standard deviation of estimates: coefficients x genes array with one coefficient per group
2108+
# Need .copy() here as nextafter needs mutabls copy.
2109+
theta_sd = np.diagonal(model_estim.fisher_inv, axis1=-2, axis2=-1).T.copy()
2110+
theta_sd = np.nextafter(0, np.inf, out=theta_sd, where=theta_sd < np.nextafter(0, np.inf))
2111+
self._theta_sd = np.sqrt(theta_sd)
21102112
self._logfc = None
21112113

21122114
# Call tests in constructor.
@@ -2307,11 +2309,13 @@ def __init__(
23072309
else:
23082310
self.groups = groups.tolist()
23092311

2310-
# values of parameter estimates: coefficients x genes array with one coefficient per group
2312+
# Values of parameter estimates: coefficients x genes array with one coefficient per group
23112313
self._theta_mle = model_estim.a_var
2312-
# standard deviation of estimates: coefficients x genes array with one coefficient per group
2313-
# theta_sd = sqrt(diagonal(fisher_inv))
2314-
self._theta_sd = np.sqrt(np.diagonal(model_estim.fisher_inv, axis1=-2, axis2=-1)).T
2314+
# Standard deviation of estimates: coefficients x genes array with one coefficient per group
2315+
# Need .copy() here as nextafter needs mutabls copy.
2316+
theta_sd = np.diagonal(model_estim.fisher_inv, axis1=-2, axis2=-1).T.copy()
2317+
theta_sd = np.nextafter(0, np.inf, out=theta_sd, where=theta_sd < np.nextafter(0, np.inf))
2318+
self._theta_sd = np.sqrt(theta_sd)
23152319

23162320
def _correction(self, pvals, method="fdr_bh") -> np.ndarray:
23172321
"""

diffxpy/testing/tests.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -819,8 +819,8 @@ def two_sample(
819819
:param kwargs: [Debugging] Additional arguments will be passed to the _fit method.
820820
"""
821821
if test in ['t-test', 'rank'] and noise_model is not None:
822-
raise ValueError('base.two_sample(): Do not specify `noise_model` if using test t-test or rank_test: ' +
823-
'The t-test is based on a gaussian noise model and wilcoxon is model free.')
822+
raise Warning('two_sample(): Do not specify `noise_model` if using test t-test or rank_test: ' +
823+
'The t-test is based on a gaussian noise model and the rank sum test is model free.')
824824

825825
gene_names = parse_gene_names(data, gene_names)
826826
grouping = parse_grouping(data, sample_description, grouping)
@@ -906,11 +906,11 @@ def pairwise(
906906
data: Union[anndata.AnnData, Raw, np.ndarray, scipy.sparse.csr_matrix, glm.typing.InputDataBase],
907907
grouping: Union[str, np.ndarray, list],
908908
as_numeric: Union[List[str], Tuple[str], str] = (),
909-
test: str = 'z-test',
910-
lazy: bool = False,
909+
test: str = "z-test",
910+
lazy: bool = True,
911911
gene_names: Union[np.ndarray, list] = None,
912912
sample_description: pd.DataFrame = None,
913-
noise_model: str = None,
913+
noise_model: str = "nb",
914914
size_factors: np.ndarray = None,
915915
batch_size: int = None,
916916
training_strategy: Union[str, List[Dict[str, object]], Callable] = "AUTO",

0 commit comments

Comments
 (0)