Skip to content

Commit bad6a87

Browse files
committed
feat(eda): support series as the input
1 parent aeb1295 commit bad6a87

File tree

3 files changed

+9
-1
lines changed

3 files changed

+9
-1
lines changed

dataprep/eda/data_array.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,9 @@ class DataArray:
186186
def __init__(
187187
self, df: DataFrame, value_length: bool = False, repartition: bool = True,
188188
) -> None:
189+
if isinstance(df, (dd.Series, pd.Series)):
190+
df = df.to_frame()
191+
189192
# numpy does not understand pandas types
190193
cat_cols = [
191194
col

dataprep/eda/distribution/compute/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,8 +94,8 @@ def compute(
9494
or dtype = Continuous() or dtype = "Continuous" or dtype = Continuous()
9595
""" # pylint: disable=too-many-locals
9696

97-
df.columns = df.columns.astype(str)
9897
df = to_dask(df)
98+
df.columns = df.columns.astype(str)
9999

100100
if not any((x, y, z)):
101101
return compute_overview(df, bins, ngroups, largest, timeunit, dtype)

dataprep/eda/utils.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,11 @@ def to_dask(df: Union[pd.DataFrame, dd.DataFrame]) -> dd.DataFrame:
3535
"""Convert a dataframe to a dask dataframe."""
3636
if isinstance(df, dd.DataFrame):
3737
return df
38+
elif isinstance(df, dd.Series):
39+
return df.to_frame()
40+
41+
if isinstance(df, pd.Series):
42+
df = df.to_frame()
3843

3944
df_size = df.memory_usage(deep=True).sum()
4045
npartitions = ceil(df_size / 128 / 1024 / 1024) # 128 MB partition size

0 commit comments

Comments
 (0)