55import pandas as pd
66
77import numpy as np
8- # import scipy.sparse
9-
10- # import dask
118import xarray as xr
129
1310try :
1714
1815import patsy
1916import batchglm .data as data_utils
20- from batchglm .api .models .glm import Model as GeneralizedLinearModel
17+ from batchglm .api .models .glm_nb import Model as GeneralizedLinearModel
2118
2219from ..stats import stats
2320from . import correction
2421from ..models .batch_bfgs .optim import Estim_BFGS
22+ from diffxpy import pkg_constants
2523
2624logger = logging .getLogger (__name__ )
2725
@@ -603,9 +601,7 @@ class DifferentialExpressionTestWald(_DifferentialExpressionTestSingle):
603601 """
604602
605603 model_estim : _Estimation
606- sd_loc_totest : np .ndarray
607604 coef_loc_totest : np .ndarray
608- indep_coefs : np .ndarray
609605 theta_mle : np .ndarray
610606 theta_sd : np .ndarray
611607 _error_codes : np .ndarray
@@ -614,30 +610,16 @@ class DifferentialExpressionTestWald(_DifferentialExpressionTestSingle):
614610 def __init__ (
615611 self ,
616612 model_estim : _Estimation ,
617- col_indices : np .ndarray ,
618- indep_coefs : np .ndarray = None
613+ col_indices : np .ndarray
619614 ):
620615 """
621616 :param model_estim:
622- :param cold_index: indices of indep_coefs to test
623- :param indep_coefs: indices of independent coefficients in coefficient vector
617+ :param cold_index: indices of coefs to test
624618 """
625619 super ().__init__ ()
626620
627621 self .model_estim = model_estim
628622 self .coef_loc_totest = col_indices
629- # Note that self.indep_coefs are relevant if constraints are given
630- # and hessian is computed across independent coefficients only
631- # whereas point estimators are given for all coefficients.
632- if indep_coefs is not None :
633- self .indep_coefs = indep_coefs
634- self .sd_loc_totest = np .where ([x in col_indices for x in self .indep_coefs ])[0 ]
635- else :
636- self .indep_coefs = None
637- self .sd_loc_totest = self .coef_loc_totest
638-
639- # p = self.pval
640- # q = self.qval
641623
642624 try :
643625 if model_estim ._error_codes is not None :
@@ -689,15 +671,15 @@ def _ll(self):
689671
690672 :return: xr.DataArray
691673 """
692- return np .sum (self .model_estim .log_probs () , axis = 0 )
674+ return np .sum (self .model_estim .log_likelihood , axis = 0 )
693675
694676 def _ave (self ):
695677 """
696678 Returns a xr.DataArray containing the mean expression by gene
697679
698680 :return: xr.DataArray
699681 """
700- return np .mean (self .model_estim . X , axis = 0 )
682+ return np .mean (self .X , axis = 0 )
701683
702684 def _test (self ):
703685 """
@@ -710,8 +692,8 @@ def _test(self):
710692 # with a normal distribution, for multiple parameters, a chi-square distribution is used.
711693 self .theta_mle = self .model_estim .par_link_loc [self .coef_loc_totest ]
712694 if len (self .coef_loc_totest ) == 1 :
713- self .theta_mle = self .theta_mle [0 ] # Make xarray one dimensinoal for stats.wald_test.
714- self .theta_sd = self .model_estim .fisher_inv [:, self .sd_loc_totest [0 ], self .sd_loc_totest [0 ]].values
695+ self .theta_mle = self .theta_mle [0 ] # Make xarray one dimensional for stats.wald_test.
696+ self .theta_sd = self .model_estim .fisher_inv [:, self .coef_loc_totest [0 ], self .coef_loc_totest [0 ]].values
715697 self .theta_sd = np .nextafter (0 , np .inf , out = self .theta_sd ,
716698 where = self .theta_sd < np .nextafter (0 , np .inf ))
717699 self .theta_sd = np .sqrt (self .theta_sd )
@@ -727,7 +709,7 @@ def _test(self):
727709 self .theta_sd = np .sqrt (self .theta_sd )
728710 return stats .wald_test_chisq (
729711 theta_mle = self .theta_mle ,
730- theta_covar = self .model_estim .fisher_inv [:, self .sd_loc_totest , self .sd_loc_totest ],
712+ theta_covar = self .model_estim .fisher_inv [:, self .coef_loc_totest , self .coef_loc_totest ],
731713 theta0 = 0
732714 )
733715
@@ -2529,6 +2511,16 @@ def _fit(
25292511 Should be "float32" for single precision or "float64" for double precision.
25302512 :param close_session: If True, will finalize the estimator. Otherwise, return the estimator itself.
25312513 """
2514+ provide_optimizers = {
2515+ "gd" : pkg_constants .BATCHGLM_OPTIM_GD ,
2516+ "adam" : pkg_constants .BATCHGLM_OPTIM_ADAM ,
2517+ "adagrad" : pkg_constants .BATCHGLM_OPTIM_ADAGRAD ,
2518+ "rmsprop" : pkg_constants .BATCHGLM_OPTIM_RMSPROP ,
2519+ "nr" : pkg_constants .BATCHGLM_OPTIM_NEWTON ,
2520+ "irls" : pkg_constants .BATCHGLM_OPTIM_IRLS
2521+ }
2522+ termination_type = pkg_constants .BATCHGLM_TERMINATION_TYPE
2523+
25322524 if isinstance (training_strategy , str ) and training_strategy .lower () == 'bfgs' :
25332525 lib_size = np .zeros (data .shape [0 ])
25342526 if noise_model == "nb" or noise_model == "negative_binomial" :
@@ -2540,7 +2532,7 @@ def _fit(
25402532 raise ValueError ('base.test(): `noise_model="%s"` not recognized.' % noise_model )
25412533 else :
25422534 if noise_model == "nb" or noise_model == "negative_binomial" :
2543- import batchglm .api .models .nb_glm as test_model
2535+ import batchglm .api .models .glm_nb as test_model
25442536
25452537 logger .info ("Fitting model..." )
25462538 logger .debug (" * Assembling input data..." )
@@ -2565,6 +2557,8 @@ def _fit(
25652557 init_model = init_model ,
25662558 init_a = init_a ,
25672559 init_b = init_b ,
2560+ provide_optimizers = provide_optimizers ,
2561+ termination_type = termination_type ,
25682562 dtype = dtype ,
25692563 ** constructor_args
25702564 )
@@ -2955,8 +2949,6 @@ def wald(
29552949 design_scale = dmat_scale
29562950
29572951 # Coefficients to test:
2958- indep_coef_indices = None
2959- col_indices = None
29602952 if factor_loc_totest is not None :
29612953 # Select coefficients to test via formula model:
29622954 col_indices = np .concatenate ([
@@ -2975,7 +2967,7 @@ def wald(
29752967 elif coef_to_test is not None :
29762968 # Directly select coefficients to test from design matrix (xarray):
29772969 # Check that coefficients to test are not dependent parameters if constraints are given:
2978- # TODO: design_loc is sometimes xarray and sometimes patsy when it arrives here,
2970+ # TODO: design_loc is sometimes xarray and sometimes patsy when it arrives here,
29792971 # should it not always be xarray?
29802972 if isinstance (design_loc , patsy .design_info .DesignMatrix ):
29812973 col_indices = np .asarray ([
@@ -2987,10 +2979,8 @@ def wald(
29872979 list (np .asarray (design_loc .coords ['design_params' ])).index (x )
29882980 for x in coef_to_test
29892981 ])
2990- if constraints_loc is not None :
2991- dep_coef_indices = np .where (np .any (constraints_loc == - 1 , axis = 0 ) == True )[0 ]
2992- assert np .all ([x not in dep_coef_indices for x in col_indices ]), "cannot test dependent coefficient"
2993- indep_coef_indices = np .where (np .any (constraints_loc == - 1 , axis = 0 ) == False )[0 ]
2982+ else :
2983+ raise ValueError ("either set factor_loc_totest or coef_to_test" )
29942984
29952985 ## Fit GLM:
29962986 model = _fit (
@@ -3014,9 +3004,8 @@ def wald(
30143004
30153005 ## Perform DE test:
30163006 de_test = DifferentialExpressionTestWald (
3017- model ,
3018- col_indices = col_indices ,
3019- indep_coefs = indep_coef_indices
3007+ model_estim = model ,
3008+ col_indices = col_indices
30203009 )
30213010
30223011 return de_test
0 commit comments