Skip to content

Commit 7f9cc5d

Browse files
committed
Added offset option for fepois
1 parent e4e5c08 commit 7f9cc5d

File tree

5 files changed

+68
-11
lines changed

5 files changed

+68
-11
lines changed

pyfixest/estimation/FixestMulti_.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,7 @@ def _prepare_estimation(
135135
ssc: Optional[dict[str, Union[str, bool]]] = None,
136136
fixef_rm: str = "none",
137137
drop_intercept: bool = False,
138+
offset: Optional[Union[None, str]] = None,
138139
) -> None:
139140
"""
140141
Prepare model for estimation.
@@ -153,9 +154,13 @@ def _prepare_estimation(
153154
A string or dictionary specifying the type of variance-covariance
154155
matrix to use for inference.
155156
See `feols()` or `fepois()`.
156-
weights : Union[None, np.ndarray], optional
157-
An array of weights.
158-
Either None or a 1D array of length N. Default is None.
157+
weights : Union[None, str], optional
158+
Default is None. Weights for WLS estimation. If None, all observations
159+
are weighted equally. If a string, the name of the column in `data` that
160+
contains the weights.
161+
offset : Union[None, str], optional
162+
Default is None. Offset variable for Poisson regression. If None, no offset.
163+
If a string, the name of the column in `data` that contains the offset.
159164
ssc : dict[str, str], optional
160165
A dictionary specifying the type of standard errors to use for inference.
161166
See `feols()` or `fepois()`.
@@ -179,6 +184,7 @@ def _prepare_estimation(
179184
self._is_multiple_estimation = False
180185
self._drop_intercept = False
181186
self._weights = weights
187+
self._offset = offset
182188
self._has_weights = False
183189
if weights is not None:
184190
self._has_weights = True
@@ -247,6 +253,7 @@ def _estimate_all_models(
247253
_ssc_dict = self._ssc_dict
248254
_drop_intercept = self._drop_intercept
249255
_weights = self._weights
256+
_offset = self._offset
250257
_fixef_tol = self._fixef_tol
251258
_weights_type = self._weights_type
252259
_lean = self._lean
@@ -339,6 +346,7 @@ def _estimate_all_models(
339346
drop_intercept=_drop_intercept,
340347
weights=_weights,
341348
weights_type=_weights_type,
349+
offset=_offset,
342350
solver=solver,
343351
demeaner_backend=demeaner_backend,
344352
collin_tol=collin_tol,

pyfixest/estimation/estimation.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from collections.abc import Mapping
2-
from typing import Any, Optional, Union
2+
from typing import Any, Optional, Union, Sequence
33

44
import pandas as pd
55

@@ -503,6 +503,7 @@ def fepois(
503503
fml: str,
504504
data: DataFrameType, # type: ignore
505505
vcov: Optional[Union[VcovTypeOptions, dict[str, str]]] = None,
506+
offset: Union[None, str] = None,
506507
ssc: Optional[dict[str, Union[str, bool]]] = None,
507508
fixef_rm: FixedRmOptions = "none",
508509
fixef_tol: float = 1e-08,
@@ -545,6 +546,10 @@ def fepois(
545546
Type of variance-covariance matrix for inference. Options include "iid",
546547
"hetero", "HC1", "HC2", "HC3", or a dictionary for CRV1/CRV3 inference.
547548
549+
offset : Union[None, str], optional
550+
Default is None. Offset variable for Poisson regression. If None, no offset.
551+
If a string, the name of the column in `data` that contains the offset.
552+
548553
ssc : str
549554
A ssc object specifying the small sample correction for inference.
550555
@@ -670,6 +675,7 @@ def fepois(
670675
data=data,
671676
vcov=vcov,
672677
weights=weights,
678+
offset=offset,
673679
ssc=ssc,
674680
fixef_rm=fixef_rm,
675681
collin_tol=collin_tol,
@@ -702,7 +708,7 @@ def fepois(
702708
)
703709

704710
fixest._prepare_estimation(
705-
"fepois", fml, vcov, weights, ssc, fixef_rm, drop_intercept
711+
"fepois", fml, vcov, weights, ssc, fixef_rm, drop_intercept, offset=offset
706712
)
707713
if fixest._is_iv:
708714
raise NotImplementedError(
@@ -1001,6 +1007,7 @@ def _estimation_input_checks(
10011007
split: Optional[str],
10021008
fsplit: Optional[str],
10031009
separation_check: Optional[list[str]] = None,
1010+
offset: Optional[Union[None, str]] = None,
10041011
):
10051012
if not isinstance(fml, str):
10061013
raise TypeError("fml must be a string")
@@ -1027,6 +1034,9 @@ def _estimation_input_checks(
10271034
if weights is not None:
10281035
assert weights in data.columns, "weights must be a column in data"
10291036

1037+
if offset is not None:
1038+
assert offset in data.columns, "offset must be a column in data"
1039+
10301040
bool_args = [copy_data, store_data, lean]
10311041
for arg in bool_args:
10321042
if not isinstance(arg, bool):

pyfixest/estimation/feols_.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,7 @@ def __init__(
229229
context: Union[int, Mapping[str, Any]] = 0,
230230
sample_split_var: Optional[str] = None,
231231
sample_split_value: Optional[Union[str, int, float]] = None,
232+
offset: Optional[str] = None,
232233
) -> None:
233234
self._sample_split_value = sample_split_value
234235
self._sample_split_var = sample_split_var
@@ -250,6 +251,7 @@ def __init__(
250251
self._drop_intercept = drop_intercept
251252
self._weights_name = weights
252253
self._weights_type = weights_type
254+
self._offset_name = offset
253255
self._has_weights = weights is not None
254256
self._collin_tol = collin_tol
255257
self._fixef_tol = fixef_tol
@@ -347,6 +349,7 @@ def prepare_model_matrix(self):
347349
drop_singletons=self._drop_singletons,
348350
drop_intercept=self._drop_intercept,
349351
weights=self._weights_name,
352+
offset=self._offset_name,
350353
context=self._context,
351354
)
352355

@@ -357,6 +360,7 @@ def prepare_model_matrix(self):
357360
self._endogvar = mm_dict.get("endogvar")
358361
self._Z = mm_dict.get("Z")
359362
self._weights_df = mm_dict.get("weights_df")
363+
self._offset_df = mm_dict.get("offset_df")
360364
self._na_index = mm_dict.get("na_index")
361365
self._na_index_str = mm_dict.get("na_index_str")
362366
self._icovars = mm_dict.get("icovars")
@@ -372,8 +376,10 @@ def prepare_model_matrix(self):
372376
self._data = _drop_cols(self._data, self._na_index)
373377

374378
self._weights = self._set_weights()
379+
self._offset = self._set_offset()
375380
self._N, self._N_rows = self._set_nobs()
376381

382+
377383
def _set_nobs(self) -> tuple[int, int]:
378384
"""
379385
Fetch the number of observations used in fitting the regression model.
@@ -411,6 +417,27 @@ def _set_weights(self) -> np.ndarray:
411417
_weights = np.ones(N)
412418

413419
return _weights.reshape((N, 1))
420+
421+
def _set_offset(self) -> np.ndarray:
422+
"""
423+
Return the offset used in the regression model.
424+
425+
Returns
426+
-------
427+
np.ndarray
428+
The offset used in the regression model.
429+
If no offset is used, returns an array of zeros
430+
with the same length as the dependent variable array.
431+
"""
432+
433+
N = len(self._Y)
434+
435+
if self._offset_df is not None:
436+
_offset = self._offset_df.to_numpy()
437+
else:
438+
_offset = np.zeros(N)
439+
440+
return _offset.reshape((N, 1))
414441

415442
def demean(self):
416443
"Demean the dependent variable and covariates by the fixed effect(s)."

pyfixest/estimation/fepois_.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@ def __init__(
8080
drop_intercept: bool,
8181
weights: Optional[str],
8282
weights_type: Optional[str],
83+
offset: Optional[float],
8384
collin_tol: float,
8485
fixef_tol: float,
8586
lookup_demeaned_data: dict[str, pd.DataFrame],
@@ -105,6 +106,7 @@ def __init__(
105106
drop_intercept=drop_intercept,
106107
weights=weights,
107108
weights_type=weights_type,
109+
offset=offset,
108110
collin_tol=collin_tol,
109111
fixef_tol=fixef_tol,
110112
lookup_demeaned_data=lookup_demeaned_data,
@@ -147,7 +149,7 @@ def prepare_model_matrix(self):
147149
raise ValueError(
148150
"The dependent variable must be a weakly positive integer."
149151
)
150-
152+
151153
# check for separation
152154
na_separation: list[int] = []
153155
if (
@@ -168,6 +170,7 @@ def prepare_model_matrix(self):
168170
self._Y.drop(na_separation, axis=0, inplace=True)
169171
self._X.drop(na_separation, axis=0, inplace=True)
170172
self._fe.drop(na_separation, axis=0, inplace=True)
173+
self._offset = np.delete(self._offset,na_separation,axis=0) # _offset is a numpy array so we use delete instead of drop
171174
self._data.drop(na_separation, axis=0, inplace=True)
172175
self._N = self._Y.shape[0]
173176

@@ -217,6 +220,7 @@ def get_fit(self) -> None:
217220
_Y = self._Y
218221
_X = self._X
219222
_fe = self._fe
223+
_offset = self._offset
220224
_N = self._N
221225
_convergence = self.convergence # False
222226
_maxiter = self.maxiter
@@ -251,13 +255,13 @@ def compute_deviance(_Y: np.ndarray, mu: np.ndarray):
251255
_mean = np.mean(_Y)
252256
mu = (_Y + _mean) / 2
253257
eta = np.log(mu)
254-
Z = eta + _Y / mu - 1
258+
Z = eta - _offset + _Y / mu - 1
255259
reg_Z = Z.copy()
256260
last = compute_deviance(_Y, mu)
257261

258262
else:
259263
# update w and Z
260-
Z = eta + _Y / mu - 1 # eq (8)
264+
Z = eta - _offset + _Y / mu - 1 # eq (8)
261265
reg_Z = Z.copy() # eq (9)
262266

263267
# tighten HDFE tolerance - currently not possible with PyHDFE
@@ -294,7 +298,7 @@ def compute_deviance(_Y: np.ndarray, mu: np.ndarray):
294298

295299
mu_old = mu.copy()
296300
# more updating
297-
eta = Z - resid
301+
eta = Z - resid + _offset
298302
mu = np.exp(eta)
299303

300304
# same criterion as fixest
@@ -695,3 +699,4 @@ def _fepois_input_checks(drop_singletons: bool, tol: float, maxiter: int):
695699
raise TypeError("maxiter must be integer.")
696700
if maxiter <= 0:
697701
raise AssertionError("maxiter must be greater than 0.")
702+

pyfixest/estimation/model_matrix_fixest_.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ def model_matrix_fixest(
1717
data: pd.DataFrame,
1818
drop_singletons: bool = False,
1919
weights: Optional[str] = None,
20+
offset: Optional[str] = None,
2021
drop_intercept=False,
2122
context: Union[int, Mapping[str, Any]] = 0,
2223
) -> dict:
@@ -123,14 +124,15 @@ def model_matrix_fixest(
123124
**({"fml_first_stage": fml_first_stage} if _is_iv else {}),
124125
**({"fe": wrap_factorize(fval)} if fval != "0" else {}),
125126
**({"weights": weights} if weights is not None else {}),
127+
**({"offset": offset} if offset is not None else {}),
126128
}
127129

128130
FML = Formula(**fml_kwargs)
129131
_context = capture_context(context)
130132
mm = FML.get_model_matrix(
131133
data, output="pandas", context={"factorize": factorize, **_context}
132134
)
133-
endogvar = Z = weights_df = fe = None
135+
endogvar = Z = weights_df = offset_df = fe = None
134136

135137
Y = mm["fml_second_stage"]["lhs"]
136138
X = mm["fml_second_stage"]["rhs"]
@@ -142,8 +144,10 @@ def model_matrix_fixest(
142144
fe = mm["fe"]
143145
if weights is not None:
144146
weights_df = mm["weights"]
147+
if offset is not None:
148+
offset_df = mm["offset"]
145149

146-
for df in [Y, X, Z, endogvar, weights_df]:
150+
for df in [Y, X, Z, endogvar, weights_df,offset_df]:
147151
if df is not None:
148152
cols_to_convert = df.select_dtypes(exclude=["int64", "float64"]).columns
149153
if cols_to_convert.size > 0:
@@ -196,6 +200,8 @@ def model_matrix_fixest(
196200
endogvar = endogvar[keep_idx]
197201
if weights_df is not None:
198202
weights_df = weights_df[keep_idx]
203+
if offset is not None:
204+
offset_df = offset_df[keep_idx]
199205

200206
na_index = _get_na_index(data.shape[0], Y.index)
201207
na_index_str = ",".join(str(x) for x in na_index)
@@ -213,6 +219,7 @@ def model_matrix_fixest(
213219
"endogvar": endogvar,
214220
"Z": Z,
215221
"weights_df": weights_df,
222+
"offset_df": offset_df,
216223
"na_index": na_index,
217224
"na_index_str": na_index_str,
218225
"icovars": _icovars,

0 commit comments

Comments
 (0)