Skip to content
This repository was archived by the owner on Oct 21, 2025. It is now read-only.

Commit 8d9874c

Browse files
fixed pairwise unit tests for null model
1 parent 5019c98 commit 8d9874c

File tree

2 files changed

+95
-207
lines changed

2 files changed

+95
-207
lines changed

diffxpy/testing/tests.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -848,6 +848,8 @@ def two_sample(
848848
sample_description=sample_description,
849849
noise_model=noise_model,
850850
size_factors=size_factors,
851+
init_a="closed_form",
852+
init_b="closed_form",
851853
batch_size=batch_size,
852854
training_strategy=training_strategy,
853855
quick_scale=quick_scale,
@@ -872,6 +874,8 @@ def two_sample(
872874
sample_description=sample_description,
873875
noise_model=noise_model,
874876
size_factors=size_factors,
877+
init_a="closed_form",
878+
init_b="closed_form",
875879
batch_size=batch_size,
876880
training_strategy=training_strategy,
877881
quick_scale=quick_scale,
@@ -911,7 +915,7 @@ def pairwise(
911915
batch_size: int = None,
912916
training_strategy: Union[str, List[Dict[str, object]], Callable] = "AUTO",
913917
is_sig_zerovar: bool = True,
914-
quick_scale: bool = None,
918+
quick_scale: bool = False,
915919
dtype="float64",
916920
pval_correction: str = "global",
917921
keep_full_test_objs: bool = False,
@@ -1082,7 +1086,7 @@ def pairwise(
10821086
grouping == g2
10831087
))[0]
10841088
de_test_temp = two_sample(
1085-
data=data[idx],
1089+
data=data[idx, :],
10861090
grouping=grouping[idx],
10871091
as_numeric=as_numeric,
10881092
test=test,
@@ -1100,7 +1104,7 @@ def pairwise(
11001104
pvals[i, j] = de_test_temp.pval
11011105
pvals[j, i] = pvals[i, j]
11021106
logfc[i, j] = de_test_temp.log_fold_change()
1103-
logfc[j, i] = - logfc[i, j]
1107+
logfc[j, i] = -logfc[i, j]
11041108
if keep_full_test_objs:
11051109
tests[i, j] = de_test_temp
11061110
tests[j, i] = de_test_temp

diffxpy/unit_test/test_pairwise.py

Lines changed: 88 additions & 204 deletions
Original file line numberDiff line numberDiff line change
@@ -4,265 +4,149 @@
44
import pandas as pd
55
import scipy.stats as stats
66

7-
from batchglm.api.models.glm_nb import Simulator
87
import diffxpy.api as de
98

109

11-
class TestPairwiseNull(unittest.TestCase):
10+
class _TestPairwiseNull:
1211

13-
def test_null_distribution_ztest(self, n_cells: int = 2000, n_genes: int = 100, n_groups=2):
14-
"""
15-
Test if de.wald() generates a uniform p-value distribution
16-
if it is given data simulated based on the null model. Returns the p-value
17-
of the two-side Kolmgorov-Smirnov test for equality of the observed
18-
p-value distriubution and a uniform distribution.
12+
noise_model: str
1913

20-
:param n_cells: Number of cells to simulate (number of observations per test).
21-
:param n_genes: Number of genes to simulate (number of tests).
22-
"""
23-
logging.getLogger("tensorflow").setLevel(logging.ERROR)
24-
logging.getLogger("batchglm").setLevel(logging.WARNING)
25-
logging.getLogger("diffxpy").setLevel(logging.WARNING)
14+
def _prepate_data(
15+
self,
16+
n_cells: int,
17+
n_genes: int,
18+
n_groups: int
19+
):
20+
if self.noise_model == "nb":
21+
from batchglm.api.models.glm_nb import Simulator
22+
rand_fn_loc = lambda shape: np.random.uniform(0.1, 1, shape)
23+
rand_fn_scale = lambda shape: np.random.uniform(0.5, 1, shape)
24+
elif self.noise_model == "norm" or self.noise_model is None:
25+
from batchglm.api.models.glm_norm import Simulator
26+
rand_fn_loc = lambda shape: np.random.uniform(500, 1000, shape)
27+
rand_fn_scale = lambda shape: np.random.uniform(1, 2, shape)
28+
else:
29+
raise ValueError("noise model %s not recognized" % self.noise_model)
2630

2731
sim = Simulator(num_observations=n_cells, num_features=n_genes)
2832
sim.generate_sample_description(num_batches=0, num_conditions=0)
29-
sim.generate()
33+
sim.generate_params(
34+
rand_fn_loc=rand_fn_loc,
35+
rand_fn_scale=rand_fn_scale
36+
)
37+
sim.generate_data()
3038

3139
random_sample_description = pd.DataFrame({
3240
"condition": np.random.randint(n_groups, size=sim.nobs)
3341
})
34-
35-
test = de.test.pairwise(
36-
data=sim.x,
37-
grouping="condition",
38-
test="z-test",
39-
noise_model="nb",
40-
sample_description=random_sample_description,
41-
dtype="float64"
42-
)
43-
summary = test.summary()
44-
45-
# Compare p-value distribution under null model against uniform distribution.
46-
pval_h0 = stats.kstest(test.pval[~np.eye(test.pval.shape[0]).astype(bool)].flatten(), 'uniform').pvalue
47-
48-
logging.getLogger("diffxpy").info('KS-test pvalue for null model match of wald(): %f' % pval_h0)
49-
assert pval_h0 > 0.05, "KS-Test failed: pval_h0=%f is <= 0.05!" % np.round(pval_h0, 5)
50-
51-
return True
52-
53-
def test_null_distribution_z_lazy(self, n_cells: int = 2000, n_genes: int = 100):
42+
return sim, random_sample_description
43+
44+
def _test_null_distribution_basic(
45+
self,
46+
test: str,
47+
lazy: bool,
48+
quick_scale: bool = False,
49+
n_cells: int = 3000,
50+
n_genes: int = 200,
51+
n_groups: int = 3
52+
):
5453
"""
55-
Test if de.pairwise() generates a uniform p-value distribution for lazy z-tests
54+
Test if de.wald() generates a uniform p-value distribution
5655
if it is given data simulated based on the null model. Returns the p-value
57-
of the two-side Kolmgorov-Smirnov test for equality of the observed
56+
of the two-side Kolmgorov-Smirnov test for equality of the observed
5857
p-value distriubution and a uniform distribution.
5958
6059
:param n_cells: Number of cells to simulate (number of observations per test).
6160
:param n_genes: Number of genes to simulate (number of tests).
6261
"""
63-
logging.getLogger("tensorflow").setLevel(logging.ERROR)
64-
logging.getLogger("batchglm").setLevel(logging.WARNING)
65-
logging.getLogger("diffxpy").setLevel(logging.WARNING)
66-
67-
sim = Simulator(num_observations=n_cells, num_features=n_genes)
68-
sim.generate_sample_description(num_batches=0, num_conditions=0)
69-
sim.generate()
70-
71-
random_sample_description = pd.DataFrame({
72-
"condition": np.random.randint(4, size=sim.nobs)
73-
})
74-
62+
sim, sample_description = self._prepate_data(
63+
n_cells=n_cells,
64+
n_genes=n_genes,
65+
n_groups=n_groups
66+
)
7567
test = de.test.pairwise(
76-
data=sim.x,
68+
data=sim.input_data,
69+
sample_description=sample_description,
7770
grouping="condition",
78-
test='z-test',
79-
lazy=True,
80-
noise_model="nb",
81-
pval_correction="global",
82-
quick_scale=True,
83-
sample_description=random_sample_description,
84-
dtype="float64"
71+
test=test,
72+
lazy=lazy,
73+
quick_scale=quick_scale,
74+
noise_model=self.noise_model
8575
)
76+
_ = test.summary()
8677

8778
# Compare p-value distribution under null model against uniform distribution.
88-
pvals = test.pval_pairs(groups0=0, groups1=1)
89-
pval_h0 = stats.kstest(pvals.flatten(), 'uniform').pvalue
79+
if lazy:
80+
pval_h0 = stats.kstest(test.pval_pairs(groups0=0, groups1=1).flatten(), 'uniform').pvalue
81+
else:
82+
pval_h0 = stats.kstest(test.pval[0, 1, :].flatten(), 'uniform').pvalue
9083

9184
logging.getLogger("diffxpy").info('KS-test pvalue for null model match of wald(): %f' % pval_h0)
9285
assert pval_h0 > 0.05, "KS-Test failed: pval_h0=%f is <= 0.05!" % np.round(pval_h0, 5)
93-
9486
return True
9587

96-
def test_null_distribution_lrt(self, n_cells: int = 2000, n_genes: int = 100, n_groups=2):
97-
"""
98-
Test if de.wald() generates a uniform p-value distribution
99-
if it is given data simulated based on the null model. Returns the p-value
100-
of the two-side Kolmgorov-Smirnov test for equality of the observed
101-
p-value distriubution and a uniform distribution.
10288

103-
:param n_cells: Number of cells to simulate (number of observations per test).
104-
:param n_genes: Number of genes to simulate (number of tests).
105-
"""
89+
class TestPairwiseNullStandard(unittest.TestCase, _TestPairwiseNull):
90+
91+
def test_null_distribution_ttest(self):
10692
logging.getLogger("tensorflow").setLevel(logging.ERROR)
10793
logging.getLogger("batchglm").setLevel(logging.WARNING)
10894
logging.getLogger("diffxpy").setLevel(logging.WARNING)
10995

110-
sim = Simulator(num_observations=n_cells, num_features=n_genes)
111-
sim.generate_sample_description(num_batches=0, num_conditions=0)
112-
sim.generate()
113-
114-
random_sample_description = pd.DataFrame({
115-
"condition": np.random.randint(n_groups, size=sim.nobs)
116-
})
117-
118-
test = de.test.pairwise(
119-
data=sim.x,
120-
grouping="condition",
121-
test="lrt",
122-
noise_model="nb",
123-
sample_description=random_sample_description,
124-
dtype="float64"
125-
)
126-
127-
# Compare p-value distribution under null model against uniform distribution.
128-
pval_h0 = stats.kstest(test.pval[~np.eye(test.pval.shape[0]).astype(bool)].flatten(), 'uniform').pvalue
96+
np.random.seed(1)
97+
self.noise_model = None
98+
self._test_null_distribution_basic(test="t-test", lazy=False)
12999

130-
logging.getLogger("diffxpy").info('KS-test pvalue for null model match of wald(): %f' % pval_h0)
131-
assert pval_h0 > 0.05, "KS-Test failed: pval_h0=%f is <= 0.05!" % np.round(pval_h0, 5)
132-
133-
return True
134-
135-
def test_null_distribution_ttest(self, n_cells: int = 2000, n_genes: int = 10000, n_groups=2):
136-
"""
137-
Test if de.wald() generates a uniform p-value distribution
138-
if it is given data simulated based on the null model. Returns the p-value
139-
of the two-side Kolmgorov-Smirnov test for equality of the observed
140-
p-value distriubution and a uniform distribution.
141-
142-
:param n_cells: Number of cells to simulate (number of observations per test).
143-
:param n_genes: Number of genes to simulate (number of tests).
144-
"""
100+
def test_null_distribution_rank(self):
145101
logging.getLogger("tensorflow").setLevel(logging.ERROR)
146102
logging.getLogger("batchglm").setLevel(logging.WARNING)
147103
logging.getLogger("diffxpy").setLevel(logging.WARNING)
148104

149-
sim = Simulator(num_observations=n_cells, num_features=n_genes)
150-
sim.generate_sample_description(num_batches=0, num_conditions=0)
151-
sim.generate()
152-
153-
random_sample_description = pd.DataFrame({
154-
"condition": np.random.randint(n_groups, size=sim.nobs)
155-
})
156-
157-
test = de.test.pairwise(
158-
data=sim.x,
159-
grouping="condition",
160-
test="t-test",
161-
sample_description=random_sample_description,
162-
)
163-
summary = test.summary()
164-
165-
# Compare p-value distribution under null model against uniform distribution.
166-
pval_h0 = stats.kstest(test.pval[~np.eye(test.pval.shape[0]).astype(bool)].flatten(), 'uniform').pvalue
167-
168-
logging.getLogger("diffxpy").info('KS-test pvalue for null model match of wald(): %f' % pval_h0)
169-
assert pval_h0 > 0.05, "KS-Test failed: pval_h0=%f is <= 0.05!" % np.round(pval_h0, 5)
105+
np.random.seed(1)
106+
self.noise_model = None
107+
self._test_null_distribution_basic(test="rank", lazy=False)
170108

171-
return True
172109

173-
def test_null_distribution_wilcoxon(self, n_cells: int = 2000, n_genes: int = 10000, n_groups=2):
174-
"""
175-
Test if de.wald() generates a uniform p-value distribution
176-
if it is given data simulated based on the null model. Returns the p-value
177-
of the two-side Kolmgorov-Smirnov test for equality of the observed
178-
p-value distriubution and a uniform distribution.
110+
class TestPairwiseNullNb(unittest.TestCase, _TestPairwiseNull):
179111

180-
:param n_cells: Number of cells to simulate (number of observations per test).
181-
:param n_genes: Number of genes to simulate (number of tests).
182-
"""
112+
def test_null_distribution_ztest(self):
183113
logging.getLogger("tensorflow").setLevel(logging.ERROR)
184114
logging.getLogger("batchglm").setLevel(logging.WARNING)
185115
logging.getLogger("diffxpy").setLevel(logging.WARNING)
186116

187-
sim = Simulator(num_observations=n_cells, num_features=n_genes)
188-
sim.generate_sample_description(num_batches=0, num_conditions=0)
189-
sim.generate()
190-
191-
random_sample_description = pd.DataFrame({
192-
"condition": np.random.randint(n_groups, size=sim.nobs)
193-
})
194-
195-
test = de.test.pairwise(
196-
data=sim.x,
197-
grouping="condition",
198-
test="wilcoxon",
199-
sample_description=random_sample_description,
200-
)
201-
summary = test.summary()
202-
203-
# Compare p-value distribution under null model against uniform distribution.
204-
pval_h0 = stats.kstest(test.pval[~np.eye(test.pval.shape[0]).astype(bool)].flatten(), 'uniform').pvalue
117+
np.random.seed(1)
118+
self.noise_model = "nb"
119+
self._test_null_distribution_basic(test="z-test", lazy=False, quick_scale=False)
120+
self._test_null_distribution_basic(test="z-test", lazy=False, quick_scale=True)
205121

206-
logging.getLogger("diffxpy").info('KS-test pvalue for null model match of wald(): %f' % pval_h0)
207-
assert pval_h0 > 0.05, "KS-Test failed: pval_h0=%f is <= 0.05!" % np.round(pval_h0, 5)
208-
209-
return True
210-
211-
212-
class TestPairwiseDE(unittest.TestCase):
213-
214-
def test_ztest_de(self, n_cells: int = 2000, n_genes: int = 500):
215-
"""
216-
Test if de.lrt() generates a uniform p-value distribution
217-
if it is given data simulated based on the null model. Returns the p-value
218-
of the two-side Kolmgorov-Smirnov test for equality of the observed
219-
p-value distriubution and a uniform distribution.
220-
221-
:param n_cells: Number of cells to simulate (number of observations per test).
222-
:param n_genes: Number of genes to simulate (number of tests).
223-
"""
122+
def test_null_distribution_ztest_lazy(self):
224123
logging.getLogger("tensorflow").setLevel(logging.ERROR)
225124
logging.getLogger("batchglm").setLevel(logging.WARNING)
226125
logging.getLogger("diffxpy").setLevel(logging.WARNING)
227126

228-
num_non_de = n_genes // 2
229-
sim = Simulator(num_observations=n_cells, num_features=n_genes)
230-
sim.generate_sample_description(num_batches=0, num_conditions=2)
231-
# simulate: coefficients ~ log(N(1, 0.5)).
232-
# re-sample if N(1, 0.5) <= 0
233-
sim.generate_params(rand_fn=lambda shape: 1 + stats.truncnorm.rvs(-1 / 0.5, np.infty, scale=0.5, size=shape))
234-
sim.params["a"][1, :num_non_de] = 0
235-
sim.params["b"][1, :num_non_de] = 0
236-
sim.params["isDE"] = ("features",), np.arange(n_genes) >= num_non_de
237-
sim.generate_data()
127+
np.random.seed(1)
128+
self.noise_model = "nb"
129+
self._test_null_distribution_basic(test="z-test", lazy=True, quick_scale=False)
130+
self._test_null_distribution_basic(test="z-test", lazy=True, quick_scale=True)
238131

239-
sample_description = sim.sample_description
132+
def test_null_distribution_wald(self):
133+
logging.getLogger("tensorflow").setLevel(logging.ERROR)
134+
logging.getLogger("batchglm").setLevel(logging.WARNING)
135+
logging.getLogger("diffxpy").setLevel(logging.WARNING)
240136

241-
test = de.test.pairwise(
242-
data=sim.x,
243-
grouping="condition",
244-
test="z-test",
245-
noise_model="nb",
246-
sample_description=sample_description,
247-
)
248-
summary = test.summary()
137+
np.random.seed(1)
138+
self.noise_model = "nb"
139+
self._test_null_distribution_basic(test="wald", lazy=False, quick_scale=False)
140+
self._test_null_distribution_basic(test="wald", lazy=False, quick_scale=True)
249141

250-
frac_nonde_sig = np.mean(
251-
np.sum(test.qval[~np.eye(test.pval.shape[0]).astype(bool), :num_non_de] < 0.05) /
252-
(2 * num_non_de)
253-
)
254-
frac_de_sig = np.mean(
255-
np.sum(test.qval[~np.eye(test.pval.shape[0]).astype(bool), num_non_de:] < 0.05) /
256-
(2 * (n_genes - num_non_de))
257-
)
258-
logging.getLogger("diffxpy").info('fraction of non-DE genes with q-value < 0.05: %.1f%%' %
259-
str(np.round(100. * frac_nonde_sig, 3)))
260-
logging.getLogger("diffxpy").info('fraction of DE genes with q-value < 0.05: %.1f%%' %
261-
str(np.round(100. * frac_de_sig, 3)))
142+
def test_null_distribution_lrt(self):
143+
logging.getLogger("tensorflow").setLevel(logging.ERROR)
144+
logging.getLogger("batchglm").setLevel(logging.WARNING)
145+
logging.getLogger("diffxpy").setLevel(logging.WARNING)
262146

263-
assert frac_de_sig > 0.5, "too many DE"
264-
assert frac_nonde_sig < 0.5, "too many non-DE"
265-
return True
147+
np.random.seed(1)
148+
self.noise_model = "nb"
149+
self._test_null_distribution_basic(test="lrt", lazy=False, quick_scale=False)
266150

267151

268152
if __name__ == '__main__':

0 commit comments

Comments
 (0)