Skip to content
This repository was archived by the owner on Oct 21, 2025. It is now read-only.

Commit 516489f

Browse files
all unit tests are working
fixed remaining interfaces to batchglm
1 parent 97340f7 commit 516489f

13 files changed

+470
-468
lines changed

diffxpy/testing/base.py

Lines changed: 32 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,16 @@ def design_loc(self) -> np.ndarray:
5353
def design_scale(self) -> np.ndarray:
5454
pass
5555

56+
@property
57+
@abc.abstractmethod
58+
def constraints_loc(self) -> np.ndarray:
59+
pass
60+
61+
@property
62+
@abc.abstractmethod
63+
def constraints_scale(self) -> np.ndarray:
64+
pass
65+
5666
@property
5767
@abc.abstractmethod
5868
def num_observations(self) -> int:
@@ -73,12 +83,9 @@ def features(self) -> np.ndarray:
7383
def observations(self) -> np.ndarray:
7484
pass
7585

86+
@property
7687
@abc.abstractmethod
77-
def probs(self) -> np.ndarray:
78-
pass
79-
80-
@abc.abstractmethod
81-
def log_probs(self) -> np.ndarray:
88+
def log_likelihood(self, **kwargs) -> np.ndarray:
8289
pass
8390

8491
@property
@@ -88,7 +95,7 @@ def loss(self, **kwargs) -> np.ndarray:
8895

8996
@property
9097
@abc.abstractmethod
91-
def gradient(self, **kwargs) -> np.ndarray:
98+
def gradients(self, **kwargs) -> np.ndarray:
9299
pass
93100

94101
@property
@@ -410,24 +417,21 @@ def X(self):
410417

411418
@property
412419
def reduced_model_gradient(self):
413-
return self.reduced_estim.gradient
420+
return self.reduced_estim.gradients
414421

415422
@property
416423
def full_model_gradient(self):
417-
return self.full_estim.gradient
424+
return self.full_estim.gradients
418425

419426
def _test(self):
420-
full = np.sum(self.full_estim.log_probs(), axis=0)
421-
reduced = np.sum(self.reduced_estim.log_probs(), axis=0)
422-
423-
if np.any(full < reduced):
424-
logger.warning("Test assumption failed: full model is (partially) less probable than reduced model!")
427+
if np.any(self.full_estim.log_likelihood < self.reduced_estim.log_likelihood):
428+
logger.warning("Test assumption failed: full model is (partially) less probable than reduced model")
425429

426430
return stats.likelihood_ratio_test(
427-
ll_full=full,
428-
ll_reduced=reduced,
429-
df_full=self.full_estim.design_loc.shape[-1] + self.full_estim.design_scale.shape[-1],
430-
df_reduced=self.reduced_estim.design_loc.shape[-1] + self.reduced_estim.design_scale.shape[-1],
431+
ll_full=self.full_estim.log_likelihood,
432+
ll_reduced=self.reduced_estim.log_likelihood,
433+
df_full=self.full_estim.constraints_loc.shape[1] + self.full_estim.constraints_scale.shape[1],
434+
df_reduced=self.reduced_estim.constraints_loc.shape[1] + self.reduced_estim.constraints_scale.shape[1],
431435
)
432436

433437
def _ave(self):
@@ -643,7 +647,7 @@ def X(self):
643647

644648
@property
645649
def model_gradient(self):
646-
return self.model_estim.gradient
650+
return self.model_estim.gradients
647651

648652
def log_fold_change(self, base=np.e, **kwargs):
649653
"""
@@ -1244,7 +1248,7 @@ def log_probs(self):
12441248

12451249
@property
12461250
def model_gradient(self):
1247-
return self.model_estim.gradient
1251+
return self.model_estim.gradients
12481252

12491253
def _ave(self):
12501254
"""
@@ -1449,7 +1453,7 @@ def log_probs(self):
14491453

14501454
@property
14511455
def model_gradient(self):
1452-
return self.model_estim.gradient
1456+
return self.model_estim.gradients
14531457

14541458
def _ave(self):
14551459
"""
@@ -2611,6 +2615,9 @@ def lrt(
26112615
"""
26122616
Perform log-likelihood ratio test for differential expression for each gene.
26132617
2618+
Note that lrt() does not support constraints in its current form. Please
2619+
use wald() for constraints.
2620+
26142621
:param data: input data
26152622
:param reduced_formula: formula
26162623
Reduced model formula for location and scale parameter models.
@@ -2734,6 +2741,8 @@ def lrt(
27342741
data=X,
27352742
design_loc=reduced_design_loc,
27362743
design_scale=reduced_design_scale,
2744+
constraints_loc=None,
2745+
constraints_scale=None,
27372746
init_a=init_a,
27382747
init_b=init_b,
27392748
as_numeric=as_numeric,
@@ -2750,6 +2759,8 @@ def lrt(
27502759
data=X,
27512760
design_loc=full_design_loc,
27522761
design_scale=full_design_scale,
2762+
constraints_loc=None,
2763+
constraints_scale=None,
27532764
gene_names=gene_names,
27542765
init_a="init_model",
27552766
init_b="init_model",
@@ -4035,7 +4046,7 @@ def continuous_1d(
40354046
noise_model: str = 'nb',
40364047
size_factors: np.ndarray = None,
40374048
batch_size: int = None,
4038-
training_strategy: Union[str, List[Dict[str, object]], Callable] = "CONTINUOUS",
4049+
training_strategy: Union[str, List[Dict[str, object]], Callable] = "DEFAULT",
40394050
quick_scale: bool = None,
40404051
dtype="float64",
40414052
**kwargs

diffxpy/testing/continuous.py

Whitespace-only changes.

diffxpy/unit_test/test_constrained.py

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -75,11 +75,10 @@ def test_null_distribution_wald_constrained(self, n_genes: int = 100):
7575
# Compare p-value distribution under null model against uniform distribution.
7676
pval_h0 = stats.kstest(test.pval, 'uniform').pvalue
7777

78-
print('KS-test pvalue for null model match of wald(): %f' % pval_h0)
79-
78+
logging.getLogger("diffxpy").info('KS-test pvalue for null model match of wald(): %f' % pval_h0)
8079
assert pval_h0 > 0.05, "KS-Test failed: pval_h0 is <= 0.05!"
8180

82-
return pval_h0
81+
return True
8382

8483
def test_null_distribution_wald_constrained_2layer(self, n_genes: int = 100):
8584
"""
@@ -162,11 +161,10 @@ def test_null_distribution_wald_constrained_2layer(self, n_genes: int = 100):
162161
# Compare p-value distribution under null model against uniform distribution.
163162
pval_h0 = stats.kstest(test.pval, 'uniform').pvalue
164163

165-
print('KS-test pvalue for null model match of wald(): %f' % pval_h0)
166-
164+
logging.getLogger("diffxpy").info('KS-test pvalue for null model match of wald(): %f' % pval_h0)
167165
assert pval_h0 > 0.05, "KS-Test failed: pval_h0 is <= 0.05!"
168166

169-
return pval_h0
167+
return True
170168

171169
def test_null_distribution_wald_multi_constrained_2layer(self, n_genes: int = 50):
172170
"""
@@ -181,8 +179,8 @@ def test_null_distribution_wald_multi_constrained_2layer(self, n_genes: int = 50
181179
:param n_genes: Number of genes to simulate (number of tests).
182180
"""
183181
logging.getLogger("tensorflow").setLevel(logging.ERROR)
184-
logging.getLogger("batchglm").setLevel(logging.INFO)
185-
logging.getLogger("diffxpy").setLevel(logging.INFO)
182+
logging.getLogger("batchglm").setLevel(logging.WARNING)
183+
logging.getLogger("diffxpy").setLevel(logging.WARNING)
186184

187185
n_cells = 3000
188186

@@ -239,11 +237,10 @@ def test_null_distribution_wald_multi_constrained_2layer(self, n_genes: int = 50
239237
# Compare p-value distribution under null model against uniform distribution.
240238
pval_h0 = stats.kstest(test.pval, 'uniform').pvalue
241239

242-
print('KS-test pvalue for null model match of wald(): %f' % pval_h0)
243-
240+
logging.getLogger("diffxpy").info('KS-test pvalue for null model match of wald(): %f' % pval_h0)
244241
assert pval_h0 > 0.05, "KS-Test failed: pval_h0 is <= 0.05!"
245242

246-
return pval_h0
243+
return True
247244

248245

249246
if __name__ == '__main__':

diffxpy/unit_test/test_continuous.py

Lines changed: 20 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,9 @@
33
import numpy as np
44
import pandas as pd
55
import scipy.stats as stats
6-
import scipy.sparse
7-
import anndata
86
import logging
97

10-
from batchglm.api.models.glm_nb import Simulator, Estimator, InputData
8+
from batchglm.api.models.glm_nb import Simulator
119
import diffxpy.api as de
1210

1311

@@ -20,7 +18,9 @@ def test_forfatal_functions(self):
2018
:param n_cells: Number of cells to simulate (number of observations per test).
2119
:param n_genes: Number of genes to simulate (number of tests).
2220
"""
23-
logging.getLogger('diffxpy').addFilter('DEBUG')
21+
logging.getLogger("tensorflow").setLevel(logging.ERROR)
22+
logging.getLogger("batchglm").setLevel(logging.WARNING)
23+
logging.getLogger("diffxpy").setLevel(logging.WARNING)
2424

2525
num_observations = 10
2626
num_features = 2
@@ -45,6 +45,7 @@ def test_forfatal_functions(self):
4545
sample_description=random_sample_description,
4646
quick_scale=True,
4747
batch_size=None,
48+
training_strategy="DEFAULT",
4849
dtype="float64"
4950
)
5051

@@ -67,8 +68,9 @@ def test_forfatal_functions(self):
6768
temp = test.argmin(genes=ids, nonnumeric=True)
6869
temp = test.summary(nonnumeric=True)
6970

71+
return True
7072

71-
def test_null_distribution_wald(self, n_cells: int = 2000, n_genes: int = 500):
73+
def test_null_distribution_wald(self, n_cells: int = 2000, n_genes: int = 100):
7274
"""
7375
Test if de.test.continuous() generates a uniform p-value distribution in the wald test
7476
if it is given data simulated based on the null model. Returns the p-value
@@ -78,7 +80,9 @@ def test_null_distribution_wald(self, n_cells: int = 2000, n_genes: int = 500):
7880
:param n_cells: Number of cells to simulate (number of observations per test).
7981
:param n_genes: Number of genes to simulate (number of tests).
8082
"""
81-
logging.getLogger('diffxpy').addFilter('DEBUG')
83+
logging.getLogger("tensorflow").setLevel(logging.INFO)
84+
logging.getLogger("batchglm").setLevel(logging.INFO)
85+
logging.getLogger("diffxpy").setLevel(logging.WARNING)
8286

8387
sim = Simulator(num_observations=n_cells, num_features=n_genes)
8488
sim.generate_sample_description(num_batches=0, num_conditions=0)
@@ -99,20 +103,20 @@ def test_null_distribution_wald(self, n_cells: int = 2000, n_genes: int = 500):
99103
sample_description=random_sample_description,
100104
quick_scale=True,
101105
batch_size=None,
106+
training_strategy="DEFAULT",
102107
dtype="float64"
103108
)
104109
summary = test.summary()
105110

106111
# Compare p-value distribution under null model against uniform distribution.
107112
pval_h0 = stats.kstest(test.pval, 'uniform').pvalue
108113

109-
print('KS-test pvalue for null model match of wald(): %f' % pval_h0)
110-
114+
logging.getLogger("diffxpy").info('KS-test pvalue for null model match of wald(): %f' % pval_h0)
111115
assert pval_h0 > 0.05, "KS-Test failed: pval_h0 is <= 0.05!"
112116

113-
return pval_h0
117+
return True
114118

115-
def test_null_distribution_lrt(self, n_cells: int = 2000, n_genes: int = 500):
119+
def test_null_distribution_lrt(self, n_cells: int = 2000, n_genes: int = 100):
116120
"""
117121
Test if de.test.continuous() generates a uniform p-value distribution in lrt
118122
if it is given data simulated based on the null model. Returns the p-value
@@ -122,7 +126,9 @@ def test_null_distribution_lrt(self, n_cells: int = 2000, n_genes: int = 500):
122126
:param n_cells: Number of cells to simulate (number of observations per test).
123127
:param n_genes: Number of genes to simulate (number of tests).
124128
"""
125-
logging.getLogger('diffxpy').addFilter('DEBUG')
129+
logging.getLogger("tensorflow").setLevel(logging.INFO)
130+
logging.getLogger("batchglm").setLevel(logging.INFO)
131+
logging.getLogger("diffxpy").setLevel(logging.WARNING)
126132

127133
sim = Simulator(num_observations=n_cells, num_features=n_genes)
128134
sim.generate_sample_description(num_batches=0, num_conditions=0)
@@ -143,18 +149,18 @@ def test_null_distribution_lrt(self, n_cells: int = 2000, n_genes: int = 500):
143149
sample_description=random_sample_description,
144150
quick_scale=False,
145151
batch_size=None,
152+
training_strategy="DEFAULT",
146153
dtype="float64"
147154
)
148155
summary = test.summary()
149156

150157
# Compare p-value distribution under null model against uniform distribution.
151158
pval_h0 = stats.kstest(test.pval, 'uniform').pvalue
152159

153-
print('KS-test pvalue for null model match of wald(): %f' % pval_h0)
154-
160+
logging.getLogger("diffxpy").info('KS-test pvalue for null model match of wald(): %f' % pval_h0)
155161
assert pval_h0 > 0.05, "KS-Test failed: pval_h0 is <= 0.05!"
156162

157-
return pval_h0
163+
return True
158164

159165
if __name__ == '__main__':
160166
unittest.main()
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
import unittest
2+
import numpy as np
3+
import pandas as pd
4+
import scipy.stats as stats
5+
import logging
6+
7+
from batchglm.api.models.glm_nb import Simulator, Estimator, InputData
8+
import diffxpy.api as de
9+
10+
11+
12+
class TestCorrection(unittest.TestCase):
13+
pass
14+
15+
if __name__ == '__main__':
16+
unittest.main()
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
import unittest
2+
import logging
3+
4+
import numpy as np
5+
import pandas as pd
6+
import scipy.stats as stats
7+
import scipy.sparse
8+
import anndata
9+
10+
from batchglm.api.models.glm_nb import Simulator
11+
import diffxpy.api as de
12+
13+
14+
class TestDataTypes(unittest.TestCase):
15+
16+
def test_sparse_anndata(self, n_cells: int = 2000, n_genes: int = 100):
17+
"""
18+
Test if de.wald() generates a uniform p-value distribution
19+
if it is given data simulated based on the null model. Returns the p-value
20+
of the two-side Kolmgorov-Smirnov test for equality of the observed
21+
p-value distribution and a uniform distribution.
22+
23+
:param n_cells: Number of cells to simulate (number of observations per test).
24+
:param n_genes: Number of genes to simulate (number of tests).
25+
"""
26+
logging.getLogger("tensorflow").setLevel(logging.ERROR)
27+
logging.getLogger("batchglm").setLevel(logging.WARNING)
28+
logging.getLogger("diffxpy").setLevel(logging.WARNING)
29+
30+
sim = Simulator(num_observations=n_cells, num_features=n_genes)
31+
sim.generate_sample_description(num_batches=0, num_conditions=0)
32+
sim.generate()
33+
34+
random_sample_description = pd.DataFrame({
35+
"condition": np.random.randint(2, size=sim.num_observations)
36+
})
37+
38+
adata = anndata.AnnData(scipy.sparse.csr_matrix(sim.X.values))
39+
# X = adata.X
40+
test = de.test.wald(
41+
data=adata,
42+
factor_loc_totest="condition",
43+
formula="~ 1 + condition",
44+
sample_description=random_sample_description,
45+
quick_scale=True,
46+
training_strategy="DEFAULT",
47+
dtype="float64"
48+
)
49+
summary = test.summary()
50+
51+
# Compare p-value distribution under null model against uniform distribution.
52+
pval_h0 = stats.kstest(test.pval, 'uniform').pvalue
53+
54+
logging.getLogger("diffxpy").info('KS-test pvalue for null model match of wald(): %f' % pval_h0)
55+
assert pval_h0 > 0.05, "KS-Test failed: pval_h0 is <= 0.05!"
56+
57+
return True
58+
59+
60+
if __name__ == '__main__':
61+
unittest.main()

0 commit comments

Comments
 (0)