Skip to content
This repository was archived by the owner on Oct 21, 2025. It is now read-only.

Commit 97340f7

Browse files
adapted diffxpy to new batchglm interface, including constraints
lrt is still to come. unit tests for constraints work.
1 parent 6f4ebc6 commit 97340f7

File tree

13 files changed

+117
-188
lines changed

13 files changed

+117
-188
lines changed

diffxpy/api/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,4 @@
44
from . import test
55
from . import enrich
66
from . import stats
7+
from . import utils

diffxpy/api/enrich.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
from ..enrichment.enrich import RefSets
2-
from ..enrichment.enrich import test
1+
from diffxpy.enrichment.enrich import RefSets
2+
from diffxpy.enrichment.enrich import test

diffxpy/api/stats.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
1-
from ..stats.stats import hypergeom_test
2-
from ..stats.stats import likelihood_ratio_test
3-
from ..stats.stats import wald_test
4-
from ..stats.stats import wald_test_chisq
5-
from ..stats.stats import two_coef_z_test
6-
from ..stats.stats import wilcoxon_test
7-
from ..stats.stats import t_test_moments
8-
from ..stats.stats import t_test_raw
1+
from diffxpy.stats.stats import hypergeom_test
2+
from diffxpy.stats.stats import likelihood_ratio_test
3+
from diffxpy.stats.stats import wald_test
4+
from diffxpy.stats.stats import wald_test_chisq
5+
from diffxpy.stats.stats import two_coef_z_test
6+
from diffxpy.stats.stats import wilcoxon_test
7+
from diffxpy.stats.stats import t_test_moments
8+
from diffxpy.stats.stats import t_test_raw
99

1010
from ..testing.correction import correct

diffxpy/api/test.py

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,11 @@
1-
# from ..testing.base import stats
2-
from ..testing.base import two_sample
3-
from ..testing.base import lrt
4-
from ..testing.base import wald
5-
from ..testing.base import t_test
6-
from ..testing.base import wilcoxon
7-
from ..testing.base import partition
8-
from ..testing.base import pairwise
9-
from ..testing.base import versus_rest
10-
from ..testing.base import continuous_1d
11-
from ..testing.base import design_matrix
12-
from ..testing.base import coef_names
1+
from diffxpy.testing.base import two_sample
2+
from diffxpy.testing.base import lrt
3+
from diffxpy.testing.base import wald
4+
from diffxpy.testing.base import t_test
5+
from diffxpy.testing.base import wilcoxon
6+
from diffxpy.testing.base import partition
7+
from diffxpy.testing.base import pairwise
8+
from diffxpy.testing.base import versus_rest
9+
from diffxpy.testing.base import continuous_1d
10+
from diffxpy.testing.base import design_matrix
11+
from diffxpy.testing.base import coef_names

diffxpy/api/utils.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
import batchglm.data as data_utils

diffxpy/pkg_constants.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
BATCHGLM_OPTIM_GD = True
2+
BATCHGLM_OPTIM_ADAM = True
3+
BATCHGLM_OPTIM_ADAGRAD = False
4+
BATCHGLM_OPTIM_RMSPROP = False
5+
BATCHGLM_OPTIM_NEWTON = True
6+
BATCHGLM_OPTIM_IRLS = True
7+
BATCHGLM_TERMINATION_TYPE = "by_feature"

diffxpy/testing/base.py

Lines changed: 27 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,6 @@
55
import pandas as pd
66

77
import numpy as np
8-
# import scipy.sparse
9-
10-
# import dask
118
import xarray as xr
129

1310
try:
@@ -17,11 +14,12 @@
1714

1815
import patsy
1916
import batchglm.data as data_utils
20-
from batchglm.api.models.glm import Model as GeneralizedLinearModel
17+
from batchglm.api.models.glm_nb import Model as GeneralizedLinearModel
2118

2219
from ..stats import stats
2320
from . import correction
2421
from ..models.batch_bfgs.optim import Estim_BFGS
22+
from diffxpy import pkg_constants
2523

2624
logger = logging.getLogger(__name__)
2725

@@ -603,9 +601,7 @@ class DifferentialExpressionTestWald(_DifferentialExpressionTestSingle):
603601
"""
604602

605603
model_estim: _Estimation
606-
sd_loc_totest: np.ndarray
607604
coef_loc_totest: np.ndarray
608-
indep_coefs: np.ndarray
609605
theta_mle: np.ndarray
610606
theta_sd: np.ndarray
611607
_error_codes: np.ndarray
@@ -614,30 +610,16 @@ class DifferentialExpressionTestWald(_DifferentialExpressionTestSingle):
614610
def __init__(
615611
self,
616612
model_estim: _Estimation,
617-
col_indices: np.ndarray,
618-
indep_coefs: np.ndarray = None
613+
col_indices: np.ndarray
619614
):
620615
"""
621616
:param model_estim:
622-
:param cold_index: indices of indep_coefs to test
623-
:param indep_coefs: indices of independent coefficients in coefficient vector
617+
:param cold_index: indices of coefs to test
624618
"""
625619
super().__init__()
626620

627621
self.model_estim = model_estim
628622
self.coef_loc_totest = col_indices
629-
# Note that self.indep_coefs are relevant if constraints are given
630-
# and hessian is computed across independent coefficients only
631-
# whereas point estimators are given for all coefficients.
632-
if indep_coefs is not None:
633-
self.indep_coefs = indep_coefs
634-
self.sd_loc_totest = np.where([x in col_indices for x in self.indep_coefs])[0]
635-
else:
636-
self.indep_coefs = None
637-
self.sd_loc_totest = self.coef_loc_totest
638-
639-
# p = self.pval
640-
# q = self.qval
641623

642624
try:
643625
if model_estim._error_codes is not None:
@@ -689,15 +671,15 @@ def _ll(self):
689671
690672
:return: xr.DataArray
691673
"""
692-
return np.sum(self.model_estim.log_probs(), axis=0)
674+
return np.sum(self.model_estim.log_likelihood, axis=0)
693675

694676
def _ave(self):
695677
"""
696678
Returns a xr.DataArray containing the mean expression by gene
697679
698680
:return: xr.DataArray
699681
"""
700-
return np.mean(self.model_estim.X, axis=0)
682+
return np.mean(self.X, axis=0)
701683

702684
def _test(self):
703685
"""
@@ -710,8 +692,8 @@ def _test(self):
710692
# with a normal distribution, for multiple parameters, a chi-square distribution is used.
711693
self.theta_mle = self.model_estim.par_link_loc[self.coef_loc_totest]
712694
if len(self.coef_loc_totest) == 1:
713-
self.theta_mle = self.theta_mle[0] # Make xarray one dimensinoal for stats.wald_test.
714-
self.theta_sd = self.model_estim.fisher_inv[:, self.sd_loc_totest[0], self.sd_loc_totest[0]].values
695+
self.theta_mle = self.theta_mle[0] # Make xarray one dimensional for stats.wald_test.
696+
self.theta_sd = self.model_estim.fisher_inv[:, self.coef_loc_totest[0], self.coef_loc_totest[0]].values
715697
self.theta_sd = np.nextafter(0, np.inf, out=self.theta_sd,
716698
where=self.theta_sd < np.nextafter(0, np.inf))
717699
self.theta_sd = np.sqrt(self.theta_sd)
@@ -727,7 +709,7 @@ def _test(self):
727709
self.theta_sd = np.sqrt(self.theta_sd)
728710
return stats.wald_test_chisq(
729711
theta_mle=self.theta_mle,
730-
theta_covar=self.model_estim.fisher_inv[:, self.sd_loc_totest, self.sd_loc_totest],
712+
theta_covar=self.model_estim.fisher_inv[:, self.coef_loc_totest, self.coef_loc_totest],
731713
theta0=0
732714
)
733715

@@ -2529,6 +2511,16 @@ def _fit(
25292511
Should be "float32" for single precision or "float64" for double precision.
25302512
:param close_session: If True, will finalize the estimator. Otherwise, return the estimator itself.
25312513
"""
2514+
provide_optimizers = {
2515+
"gd": pkg_constants.BATCHGLM_OPTIM_GD,
2516+
"adam": pkg_constants.BATCHGLM_OPTIM_ADAM,
2517+
"adagrad": pkg_constants.BATCHGLM_OPTIM_ADAGRAD,
2518+
"rmsprop": pkg_constants.BATCHGLM_OPTIM_RMSPROP,
2519+
"nr": pkg_constants.BATCHGLM_OPTIM_NEWTON,
2520+
"irls": pkg_constants.BATCHGLM_OPTIM_IRLS
2521+
}
2522+
termination_type = pkg_constants.BATCHGLM_TERMINATION_TYPE
2523+
25322524
if isinstance(training_strategy, str) and training_strategy.lower() == 'bfgs':
25332525
lib_size = np.zeros(data.shape[0])
25342526
if noise_model == "nb" or noise_model == "negative_binomial":
@@ -2540,7 +2532,7 @@ def _fit(
25402532
raise ValueError('base.test(): `noise_model="%s"` not recognized.' % noise_model)
25412533
else:
25422534
if noise_model == "nb" or noise_model == "negative_binomial":
2543-
import batchglm.api.models.nb_glm as test_model
2535+
import batchglm.api.models.glm_nb as test_model
25442536

25452537
logger.info("Fitting model...")
25462538
logger.debug(" * Assembling input data...")
@@ -2565,6 +2557,8 @@ def _fit(
25652557
init_model=init_model,
25662558
init_a=init_a,
25672559
init_b=init_b,
2560+
provide_optimizers=provide_optimizers,
2561+
termination_type=termination_type,
25682562
dtype=dtype,
25692563
**constructor_args
25702564
)
@@ -2955,8 +2949,6 @@ def wald(
29552949
design_scale = dmat_scale
29562950

29572951
# Coefficients to test:
2958-
indep_coef_indices = None
2959-
col_indices = None
29602952
if factor_loc_totest is not None:
29612953
# Select coefficients to test via formula model:
29622954
col_indices = np.concatenate([
@@ -2975,7 +2967,7 @@ def wald(
29752967
elif coef_to_test is not None:
29762968
# Directly select coefficients to test from design matrix (xarray):
29772969
# Check that coefficients to test are not dependent parameters if constraints are given:
2978-
# TODO: design_loc is sometimes xarray and sometimes patsy when it arrives here,
2970+
# TODO: design_loc is sometimes xarray and sometimes patsy when it arrives here,
29792971
# should it not always be xarray?
29802972
if isinstance(design_loc, patsy.design_info.DesignMatrix):
29812973
col_indices = np.asarray([
@@ -2987,10 +2979,8 @@ def wald(
29872979
list(np.asarray(design_loc.coords['design_params'])).index(x)
29882980
for x in coef_to_test
29892981
])
2990-
if constraints_loc is not None:
2991-
dep_coef_indices = np.where(np.any(constraints_loc == -1, axis=0) == True)[0]
2992-
assert np.all([x not in dep_coef_indices for x in col_indices]), "cannot test dependent coefficient"
2993-
indep_coef_indices = np.where(np.any(constraints_loc == -1, axis=0) == False)[0]
2982+
else:
2983+
raise ValueError("either set factor_loc_totest or coef_to_test")
29942984

29952985
## Fit GLM:
29962986
model = _fit(
@@ -3014,9 +3004,8 @@ def wald(
30143004

30153005
## Perform DE test:
30163006
de_test = DifferentialExpressionTestWald(
3017-
model,
3018-
col_indices=col_indices,
3019-
indep_coefs=indep_coef_indices
3007+
model_estim=model,
3008+
col_indices=col_indices
30203009
)
30213010

30223011
return de_test

0 commit comments

Comments
 (0)