Skip to content

Commit 98c4ab0

Browse files
committed
fix(eda): remove unecessary compute
1 parent bad6a87 commit 98c4ab0

File tree

3 files changed

+22
-10
lines changed

3 files changed

+22
-10
lines changed

dataprep/eda/missing/compute/bivariate.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,7 @@
2020
from .common import LABELS, histogram
2121

2222

23-
@staged
24-
def compute_missing_bivariate( # pylint: disable=too-many-locals
23+
def _compute_missing_bivariate( # pylint: disable=too-many-locals
2524
df: DataArray,
2625
x: str,
2726
y: str,
@@ -32,7 +31,6 @@ def compute_missing_bivariate( # pylint: disable=too-many-locals
3231
# pylint: disable=too-many-arguments
3332
"""Calculate the distribution change on another column y when
3433
the missing values in x is dropped."""
35-
df.compute("nulls")
3634

3735
xloc = df.columns.get_loc(x)
3836
yloc = df.columns.get_loc(y)
@@ -144,3 +142,9 @@ def compute_missing_bivariate( # pylint: disable=too-many-locals
144142
hist=df_ret, x=x, y=y, meta=meta["y"], visual_type="missing_impact_1v1",
145143
)
146144
return itmdt
145+
146+
147+
# Not using decorator here because jupyter autoreload does not support it.
148+
compute_missing_bivariate = staged( # pylint: disable=invalid-name
149+
_compute_missing_bivariate
150+
)

dataprep/eda/missing/compute/nullivariate.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,7 @@
1515
from ...staged import staged
1616

1717

18-
@staged
19-
def compute_missing_nullivariate(
18+
def _compute_missing_nullivariate(
2019
df: DataArray, bins: int
2120
) -> Generator[Any, Any, Intermediate]:
2221
"""Calculate the data for visualizing the plot_missing(df).
@@ -56,6 +55,12 @@ def compute_missing_nullivariate(
5655
)
5756

5857

58+
# Not using decorator here because jupyter autoreload does not support it.
59+
compute_missing_nullivariate = staged( # pylint: disable=invalid-name
60+
_compute_missing_nullivariate
61+
)
62+
63+
5964
def missing_perc_blockwise(bin_size: int) -> Callable[[np.ndarray], np.ndarray]:
6065
"""Compute the missing percentage in a block."""
6166

dataprep/eda/missing/compute/univariate.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,11 @@
1919
from .common import LABELS, histogram
2020

2121

22-
@staged
23-
def compute_missing_univariate( # pylint: disable=too-many-locals
22+
def _compute_missing_univariate( # pylint: disable=too-many-locals
2423
df: DataArray, x: str, bins: int, dtype: Optional[DTypeDef] = None,
2524
) -> Generator[Any, Any, Intermediate]:
2625
"""Calculate the distribution change on other columns when
2726
the missing values in x is dropped."""
28-
df.compute("nulls")
29-
3027
j = df.columns.get_loc(x)
3128

3229
hists = {}
@@ -46,7 +43,7 @@ def compute_missing_univariate( # pylint: disable=too-many-locals
4643
hist_range = (col0.min(axis=0), col0.max(axis=0))
4744

4845
hists[col_name] = [
49-
histogram(col, dtype=dtype, bins=bins, return_edges=True, range=hist_range,)
46+
histogram(col, dtype=dtype, bins=bins, return_edges=True, range=hist_range)
5047
for col in [col0, col1]
5148
]
5249

@@ -97,3 +94,9 @@ def compute_missing_univariate( # pylint: disable=too-many-locals
9794
dfs[col_name] = ret_df
9895

9996
return Intermediate(data=dfs, x=x, meta=meta, visual_type="missing_impact_1vn")
97+
98+
99+
# Not using decorator here because jupyter autoreload does not support it.
100+
compute_missing_univariate = staged( # pylint: disable=invalid-name
101+
_compute_missing_univariate
102+
)

0 commit comments

Comments
 (0)