theislab
diff --git a/‎LICENSE‎
Lines changed: 1 addition & 1 deletion b/‎LICENSE‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎NOTICE‎
Lines changed: 2 additions & 0 deletions b/‎NOTICE‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎diffxpy/__init__.py‎
Lines changed: 8 additions & 0 deletions b/‎diffxpy/__init__.py‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎diffxpy/api/test.py‎
Lines changed: 0 additions & 1 deletion b/‎diffxpy/api/test.py‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎diffxpy/api/utils.py‎
Lines changed: 4 additions & 1 deletion b/‎diffxpy/api/utils.py‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎diffxpy/enrichment/enrich.py‎
Lines changed: 42 additions & 41 deletions b/‎diffxpy/enrichment/enrich.py‎
Lines changed: 42 additions & 41 deletions
diff --git a/‎diffxpy/testing/__init__.py‎
Lines changed: 0 additions & 1 deletion b/‎diffxpy/testing/__init__.py‎
Lines changed: 0 additions & 1 deletion
@@ -1,6 +1,6 @@
 BSD 3-Clause License
 
-Copyright (c) 2018, David S. Fischer, Florian R. Hölzlwimmer.
+Copyright (c) 2018, David S. Fischer, Florian R. Hölzlwimmer, Theis Lab (theislab).
 All rights reserved.
 
 Redistribution and use in source and binary forms, with or without
 
@@ -0,0 +1,2 @@
+The file ./docs/conf.py was adapted from ttps://github.com/theislab/scanpy/scanpy/conf.py and is licensed by the
+scanpy project (F. Alexander Wolf, P. Angerer, Theis Lab) as described in the file.
@@ -4,3 +4,11 @@
 del get_versions
 
 from .log_cfg import logger, unconfigure_logging, enable_logging
+
+__author__ = ', '.join([
+    'David Sebastian Fischer',
+    'Florian Hölzlwimmer'
+])
+__email__ = ', '.join([
+    'david.fischer@helmholtz-muenchen.de'
+])
@@ -1,3 +1,2 @@
 from diffxpy.testing import lrt, wald, t_test, rank_test, two_sample, pairwise, \
     versus_rest, partition, continuous_1d
-from diffxpy.testing import design_matrix, coef_names
@@ -1 +1,4 @@
-import batchglm.data as data_utils
+from diffxpy.testing.utils import constraint_matrix_from_string, constraint_matrix_from_dict, \
+    constraint_system_from_star
+from diffxpy.testing.utils import design_matrix, design_matrix_from_xarray, design_matrix_from_anndata
+from diffxpy.testing.utils import view_coef_names, preview_coef_names
@@ -7,7 +7,6 @@
 from ..testing import correction
 from ..testing.det import _DifferentialExpressionTest
 
-logger = logging.getLogger(__name__)
 
 class RefSets:
     """
@@ -42,15 +41,19 @@ def clean(self, ids):
 
     def __init__(self, sets=None, fn=None, type='gmt'):
         if sets is not None:
-            self.load_sets(sets, type=type)
-            self._genes = np.sort(np.unique(np.concatenate([np.asarray(list(x.genes)) for x in self.sets])))
+            if len(sets) > 0:
+                self.load_sets(sets, type=type)
+                self._genes = np.sort(np.unique(np.concatenate([np.asarray(list(x.genes)) for x in self.sets])))
+            else:
+                self.sets = []
+                self._genes = np.array([])
         elif fn is not None:
             self.read_from_file(fn=fn, type=type)
             self._genes = np.sort(np.unique(np.concatenate([np.asarray(list(x.genes)) for x in self.sets])))
         else:
             self.sets = []
             self._genes = np.array([])
-        self._ids = [x.id for x in self.sets]
+        self._ids = np.array([x.id for x in self.sets])
         self._set_lens = np.array([x.len for x in self.sets])
         self.genes_discarded = None
 
@@ -113,7 +116,7 @@ def add(self, id: str, source: str, gene_ids: list):
         self.sets.append(self._Set(id=id, source=source, gene_ids=gene_ids))
         # Update summary variables:
         self._genes = np.sort(np.unique(np.concatenate([np.asarray(list(x.genes)) for x in self.sets])))
-        self._ids = [x.id for x in self.sets]
+        self._ids = np.array([x.id for x in self.sets])
         self._set_lens = np.array([x.len for x in self.sets])
 
     ## Processing functions.
@@ -165,7 +168,7 @@ def get_set(self, id):
         """ 
         Return the set with a given set identifier.
         """
-        return self.sets[self._ids.index(id)]
+        return self.sets[self._ids.tolist().index(id)]
 
     ## Overlap functions.
 
@@ -191,9 +194,9 @@ def overlap(self, enq_set: set, set_id=None):
 def test(
         ref: RefSets,
         det: Union[_DifferentialExpressionTest, None] = None,
-        pval: Union[np.array, None] = None,
+        scores: Union[np.array, None] = None,
         gene_ids: Union[list, None] = None,
-        de_threshold=0.05,
+        threshold=0.05,
         incl_all_zero=False,
         all_ids=None,
         clean_ref=False,
@@ -205,38 +208,31 @@ def test(
     nice doc string and that the call to this is de.enrich.test which
     makes more sense to me than de.enrich.Enrich.
 
-    :param RefSets:
-        The annotated gene sets against which enrichment is tested.
-    :param DETest:
-        The differential expression results object which is tested
+    :param ref: The annotated gene sets against which enrichment is tested.
+    :param det: The differential expression results object which is tested
         for enrichment in the gene sets.
-    :param pval:
-        Alternative to DETest, vector of p-values for differential expression.
-    :param gene_ids:
-        If pval was supplied instead of DETest, use gene_ids to supply the
+    :param scores: Alternative to DETest, vector of scores (scalar per gene) which are then
+        used to discretize gene list. This can for example be corrected p-values from a differential expression
+        test, in that case the parameter threshold would be a significance threshold.
+    :param gene_ids: If pval was supplied instead of DETest, use gene_ids to supply the
         vector of gene identifiers (strings) that correspond to the p-values
-        which can be matched against the identifieres in the sets in RefSets.
-    :param de_threshold:
-        Significance threshold at which a differential test (a multiple-testing 
-        corrected p-value) is called siginficant. T
-    :param incl_all_zero:
-        Wehther to include genes in gene universe which were all zero.
-    :param all_ids:
-        Set of all gene identifiers, this is used as the background set in the
+        which can be matched against the identifiers in the sets in RefSets.
+    :param threshold: Threshold of parameter scores at which a gene is included as a hit: In the case
+        of differential test p-values in scores, threshold is the significance threshold.
+    :param incl_all_zero: Wehther to include genes in gene universe which were all zero.
+    :param all_ids: Set of all gene identifiers, this is used as the background set in the
         hypergeometric test. Only supply this if not all genes were tested
         and are supplied above in DETest or gene_ids.
-    :param clean_ref:
-        Whether or not to only retain gene identifiers in RefSets that occur in 
+    :param clean_ref: Whether or not to only retain gene identifiers in RefSets that occur in
         the background set of identifiers supplied here through all_ids.
-    :param capital:
-        Make all gene IDs captial.
+    :param capital: Make all gene IDs captial.
     """
     return Enrich(
         ref=ref,
         det=det,
-        pval=pval,
+        scores=scores,
         gene_ids=gene_ids,
-        de_threshold=de_threshold,
+        threshold=threshold,
         incl_all_zero=incl_all_zero,
         all_ids=all_ids,
         clean_ref=clean_ref,
@@ -252,9 +248,9 @@ def __init__(
             self,
             ref: RefSets,
             det: Union[_DifferentialExpressionTest, None],
-            pval: Union[np.array, None],
-            gene_ids: Union[list, None],
-            de_threshold,
+            scores: Union[np.array, None],
+            gene_ids: Union[list, np.ndarray, None],
+            threshold,
             incl_all_zero,
             all_ids,
             clean_ref,
@@ -263,6 +259,8 @@ def __init__(
         self._n_overlaps = None
         self._pval_enrich = None
         self._qval_enrich = None
+        if isinstance(gene_ids, list):
+            gene_ids = np.asarray(gene_ids)
         # Load multiple-testing-corrected differential expression
         # p-values from differential expression output.
         if det is not None:
@@ -273,24 +271,24 @@ def __init__(
                 idx_not_all_zero = np.where(np.logical_not(det.summary()["zero_mean"].values))[0]
                 self._qval_de = det.qval[idx_not_all_zero]
                 self._gene_ids = det.gene_ids[idx_not_all_zero]
-        elif pval is not None and gene_ids is not None:
-            self._qval_de = np.asarray(pval)
+        elif scores is not None and gene_ids is not None:
+            self._qval_de = np.asarray(scores)
             self._gene_ids = gene_ids
         else:
             raise ValueError('Supply either DETest or pval and gene_ids to Enrich().')
         # Take out NA genes labels:
         # Select significant genes based on user defined threshold.
         if any([x is np.nan for x in self._gene_ids]):
             idx_notnan = np.where([x is not np.nan for x in self._gene_ids])[0]
-            logger.info(
+            logging.getLogger("diffxpy").info(
                 " Discarded %i nan gene ids, leaving %i genes.",
                 len(self._gene_ids) - len(idx_notnan),
                 len(idx_notnan)
             )
             self._qval_de = self._qval_de[idx_notnan]
             self._gene_ids = self._gene_ids[idx_notnan]
 
-        self._significant_de = self._qval_de <= de_threshold
+        self._significant_de = self._qval_de <= threshold
         self._significant_ids = set(self._gene_ids[np.where(self._significant_de)[0]])
         if all_ids is not None:
             self._all_ids = set(all_ids)
@@ -303,7 +301,7 @@ def __init__(
             self._significant_ids = set([x.upper() for x in self._significant_ids])
 
         # Generate diagnostic statistic of number of possible overlaps in total.
-        logger.info(
+        logging.getLogger("diffxpy").info(
             " %i overlaps found between refset (%i) and provided gene list (%i).",
             len(set(self._all_ids).intersection(set(ref._genes))),
             len(ref._genes),
@@ -318,7 +316,7 @@ def __init__(
         # Print if there are empty sets.
         idx_nonempty = np.where([len(x.genes) > 0 for x in self.RefSets.sets])[0]
         if len(self.RefSets.sets) - len(idx_nonempty) > 0:
-            logger.info(
+            logging.getLogger("diffxpy").info(
                 " Found %i empty sets, removing those.",
                 len(self.RefSets.sets) - len(idx_nonempty)
             )
@@ -389,7 +387,10 @@ def significant_sets(self, threshold=0.05) -> list:
         """
         Return significant sets from gene set enrichement analysis as an output table.
         """
-        return self.RefSets.subset(idx=np.where(self.qval <= threshold)[0])
+        sig_sets = np.where(self.qval <= threshold)[0]
+        if len(sig_sets) == 0:
+            logging.getLogger("diffxpy").info("no significant sets found")
+        return self.RefSets.subset(idx=sig_sets)
 
     def significant_set_ids(self, threshold=0.05) -> np.array:
         """
@@ -424,4 +425,4 @@ def set_summary(self, id: str):
 
         :return: Slice of summary table.
         """
-        return self.summary(sort=False).iloc[self.RefSets._ids.index(id), :]
+        return self.summary(sort=False).iloc[self.RefSets._ids.tolist().index(id), :]
@@ -1,3 +1,2 @@
 from .tests import lrt, wald, t_test, rank_test, two_sample, pairwise, \
     versus_rest, partition, continuous_1d
-from .utils import design_matrix, coef_names
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+The file ./docs/conf.py was adapted from ttps://github.com/theislab/scanpy/scanpy/conf.py and is licensed by the`
	`2`	`+scanpy project (F. Alexander Wolf, P. Angerer, Theis Lab) as described in the file.`
Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,2 @@`
`1`	`1`	`from diffxpy.testing import lrt, wald, t_test, rank_test, two_sample, pairwise, \`
`2`	`2`	`versus_rest, partition, continuous_1d`
`3`		`-from diffxpy.testing import design_matrix, coef_names`
Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,2 @@`
`1`	`1`	`from .tests import lrt, wald, t_test, rank_test, two_sample, pairwise, \`
`2`	`2`	`versus_rest, partition, continuous_1d`
`3`		`-from .utils import design_matrix, coef_names`