Skip to content
This repository was archived by the owner on Oct 21, 2025. It is now read-only.

Commit 43fb819

Browse files
Merge pull request #97 from theislab/dev
Dev
2 parents 0054f90 + 395e152 commit 43fb819

23 files changed

+2865
-1124
lines changed

LICENSE

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
BSD 3-Clause License
22

3-
Copyright (c) 2018, David S. Fischer, Florian R. Hölzlwimmer.
3+
Copyright (c) 2018, David S. Fischer, Florian R. Hölzlwimmer, Theis Lab (theislab).
44
All rights reserved.
55

66
Redistribution and use in source and binary forms, with or without

NOTICE

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
The file ./docs/conf.py was adapted from ttps://github.com/theislab/scanpy/scanpy/conf.py and is licensed by the
2+
scanpy project (F. Alexander Wolf, P. Angerer, Theis Lab) as described in the file.

diffxpy/__init__.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,11 @@
44
del get_versions
55

66
from .log_cfg import logger, unconfigure_logging, enable_logging
7+
8+
__author__ = ', '.join([
9+
'David Sebastian Fischer',
10+
'Florian Hölzlwimmer'
11+
])
12+
__email__ = ', '.join([
13+
'david.fischer@helmholtz-muenchen.de'
14+
])

diffxpy/api/test.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,2 @@
11
from diffxpy.testing import lrt, wald, t_test, rank_test, two_sample, pairwise, \
22
versus_rest, partition, continuous_1d
3-
from diffxpy.testing import design_matrix, coef_names

diffxpy/api/utils.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,4 @@
1-
import batchglm.data as data_utils
1+
from diffxpy.testing.utils import constraint_matrix_from_string, constraint_matrix_from_dict, \
2+
constraint_system_from_star
3+
from diffxpy.testing.utils import design_matrix, design_matrix_from_xarray, design_matrix_from_anndata
4+
from diffxpy.testing.utils import view_coef_names, preview_coef_names

diffxpy/enrichment/enrich.py

Lines changed: 42 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
from ..testing import correction
88
from ..testing.det import _DifferentialExpressionTest
99

10-
logger = logging.getLogger(__name__)
1110

1211
class RefSets:
1312
"""
@@ -42,15 +41,19 @@ def clean(self, ids):
4241

4342
def __init__(self, sets=None, fn=None, type='gmt'):
4443
if sets is not None:
45-
self.load_sets(sets, type=type)
46-
self._genes = np.sort(np.unique(np.concatenate([np.asarray(list(x.genes)) for x in self.sets])))
44+
if len(sets) > 0:
45+
self.load_sets(sets, type=type)
46+
self._genes = np.sort(np.unique(np.concatenate([np.asarray(list(x.genes)) for x in self.sets])))
47+
else:
48+
self.sets = []
49+
self._genes = np.array([])
4750
elif fn is not None:
4851
self.read_from_file(fn=fn, type=type)
4952
self._genes = np.sort(np.unique(np.concatenate([np.asarray(list(x.genes)) for x in self.sets])))
5053
else:
5154
self.sets = []
5255
self._genes = np.array([])
53-
self._ids = [x.id for x in self.sets]
56+
self._ids = np.array([x.id for x in self.sets])
5457
self._set_lens = np.array([x.len for x in self.sets])
5558
self.genes_discarded = None
5659

@@ -113,7 +116,7 @@ def add(self, id: str, source: str, gene_ids: list):
113116
self.sets.append(self._Set(id=id, source=source, gene_ids=gene_ids))
114117
# Update summary variables:
115118
self._genes = np.sort(np.unique(np.concatenate([np.asarray(list(x.genes)) for x in self.sets])))
116-
self._ids = [x.id for x in self.sets]
119+
self._ids = np.array([x.id for x in self.sets])
117120
self._set_lens = np.array([x.len for x in self.sets])
118121

119122
## Processing functions.
@@ -165,7 +168,7 @@ def get_set(self, id):
165168
"""
166169
Return the set with a given set identifier.
167170
"""
168-
return self.sets[self._ids.index(id)]
171+
return self.sets[self._ids.tolist().index(id)]
169172

170173
## Overlap functions.
171174

@@ -191,9 +194,9 @@ def overlap(self, enq_set: set, set_id=None):
191194
def test(
192195
ref: RefSets,
193196
det: Union[_DifferentialExpressionTest, None] = None,
194-
pval: Union[np.array, None] = None,
197+
scores: Union[np.array, None] = None,
195198
gene_ids: Union[list, None] = None,
196-
de_threshold=0.05,
199+
threshold=0.05,
197200
incl_all_zero=False,
198201
all_ids=None,
199202
clean_ref=False,
@@ -205,38 +208,31 @@ def test(
205208
nice doc string and that the call to this is de.enrich.test which
206209
makes more sense to me than de.enrich.Enrich.
207210
208-
:param RefSets:
209-
The annotated gene sets against which enrichment is tested.
210-
:param DETest:
211-
The differential expression results object which is tested
211+
:param ref: The annotated gene sets against which enrichment is tested.
212+
:param det: The differential expression results object which is tested
212213
for enrichment in the gene sets.
213-
:param pval:
214-
Alternative to DETest, vector of p-values for differential expression.
215-
:param gene_ids:
216-
If pval was supplied instead of DETest, use gene_ids to supply the
214+
:param scores: Alternative to DETest, vector of scores (scalar per gene) which are then
215+
used to discretize gene list. This can for example be corrected p-values from a differential expression
216+
test, in that case the parameter threshold would be a significance threshold.
217+
:param gene_ids: If pval was supplied instead of DETest, use gene_ids to supply the
217218
vector of gene identifiers (strings) that correspond to the p-values
218-
which can be matched against the identifieres in the sets in RefSets.
219-
:param de_threshold:
220-
Significance threshold at which a differential test (a multiple-testing
221-
corrected p-value) is called siginficant. T
222-
:param incl_all_zero:
223-
Wehther to include genes in gene universe which were all zero.
224-
:param all_ids:
225-
Set of all gene identifiers, this is used as the background set in the
219+
which can be matched against the identifiers in the sets in RefSets.
220+
:param threshold: Threshold of parameter scores at which a gene is included as a hit: In the case
221+
of differential test p-values in scores, threshold is the significance threshold.
222+
:param incl_all_zero: Wehther to include genes in gene universe which were all zero.
223+
:param all_ids: Set of all gene identifiers, this is used as the background set in the
226224
hypergeometric test. Only supply this if not all genes were tested
227225
and are supplied above in DETest or gene_ids.
228-
:param clean_ref:
229-
Whether or not to only retain gene identifiers in RefSets that occur in
226+
:param clean_ref: Whether or not to only retain gene identifiers in RefSets that occur in
230227
the background set of identifiers supplied here through all_ids.
231-
:param capital:
232-
Make all gene IDs captial.
228+
:param capital: Make all gene IDs captial.
233229
"""
234230
return Enrich(
235231
ref=ref,
236232
det=det,
237-
pval=pval,
233+
scores=scores,
238234
gene_ids=gene_ids,
239-
de_threshold=de_threshold,
235+
threshold=threshold,
240236
incl_all_zero=incl_all_zero,
241237
all_ids=all_ids,
242238
clean_ref=clean_ref,
@@ -252,9 +248,9 @@ def __init__(
252248
self,
253249
ref: RefSets,
254250
det: Union[_DifferentialExpressionTest, None],
255-
pval: Union[np.array, None],
256-
gene_ids: Union[list, None],
257-
de_threshold,
251+
scores: Union[np.array, None],
252+
gene_ids: Union[list, np.ndarray, None],
253+
threshold,
258254
incl_all_zero,
259255
all_ids,
260256
clean_ref,
@@ -263,6 +259,8 @@ def __init__(
263259
self._n_overlaps = None
264260
self._pval_enrich = None
265261
self._qval_enrich = None
262+
if isinstance(gene_ids, list):
263+
gene_ids = np.asarray(gene_ids)
266264
# Load multiple-testing-corrected differential expression
267265
# p-values from differential expression output.
268266
if det is not None:
@@ -273,24 +271,24 @@ def __init__(
273271
idx_not_all_zero = np.where(np.logical_not(det.summary()["zero_mean"].values))[0]
274272
self._qval_de = det.qval[idx_not_all_zero]
275273
self._gene_ids = det.gene_ids[idx_not_all_zero]
276-
elif pval is not None and gene_ids is not None:
277-
self._qval_de = np.asarray(pval)
274+
elif scores is not None and gene_ids is not None:
275+
self._qval_de = np.asarray(scores)
278276
self._gene_ids = gene_ids
279277
else:
280278
raise ValueError('Supply either DETest or pval and gene_ids to Enrich().')
281279
# Take out NA genes labels:
282280
# Select significant genes based on user defined threshold.
283281
if any([x is np.nan for x in self._gene_ids]):
284282
idx_notnan = np.where([x is not np.nan for x in self._gene_ids])[0]
285-
logger.info(
283+
logging.getLogger("diffxpy").info(
286284
" Discarded %i nan gene ids, leaving %i genes.",
287285
len(self._gene_ids) - len(idx_notnan),
288286
len(idx_notnan)
289287
)
290288
self._qval_de = self._qval_de[idx_notnan]
291289
self._gene_ids = self._gene_ids[idx_notnan]
292290

293-
self._significant_de = self._qval_de <= de_threshold
291+
self._significant_de = self._qval_de <= threshold
294292
self._significant_ids = set(self._gene_ids[np.where(self._significant_de)[0]])
295293
if all_ids is not None:
296294
self._all_ids = set(all_ids)
@@ -303,7 +301,7 @@ def __init__(
303301
self._significant_ids = set([x.upper() for x in self._significant_ids])
304302

305303
# Generate diagnostic statistic of number of possible overlaps in total.
306-
logger.info(
304+
logging.getLogger("diffxpy").info(
307305
" %i overlaps found between refset (%i) and provided gene list (%i).",
308306
len(set(self._all_ids).intersection(set(ref._genes))),
309307
len(ref._genes),
@@ -318,7 +316,7 @@ def __init__(
318316
# Print if there are empty sets.
319317
idx_nonempty = np.where([len(x.genes) > 0 for x in self.RefSets.sets])[0]
320318
if len(self.RefSets.sets) - len(idx_nonempty) > 0:
321-
logger.info(
319+
logging.getLogger("diffxpy").info(
322320
" Found %i empty sets, removing those.",
323321
len(self.RefSets.sets) - len(idx_nonempty)
324322
)
@@ -389,7 +387,10 @@ def significant_sets(self, threshold=0.05) -> list:
389387
"""
390388
Return significant sets from gene set enrichement analysis as an output table.
391389
"""
392-
return self.RefSets.subset(idx=np.where(self.qval <= threshold)[0])
390+
sig_sets = np.where(self.qval <= threshold)[0]
391+
if len(sig_sets) == 0:
392+
logging.getLogger("diffxpy").info("no significant sets found")
393+
return self.RefSets.subset(idx=sig_sets)
393394

394395
def significant_set_ids(self, threshold=0.05) -> np.array:
395396
"""
@@ -424,4 +425,4 @@ def set_summary(self, id: str):
424425
425426
:return: Slice of summary table.
426427
"""
427-
return self.summary(sort=False).iloc[self.RefSets._ids.index(id), :]
428+
return self.summary(sort=False).iloc[self.RefSets._ids.tolist().index(id), :]

diffxpy/testing/__init__.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,2 @@
11
from .tests import lrt, wald, t_test, rank_test, two_sample, pairwise, \
22
versus_rest, partition, continuous_1d
3-
from .utils import design_matrix, coef_names

0 commit comments

Comments
 (0)