Skip to content

Commit 566506d

Browse files
Use adaptor in save_data
1 parent a6fc8fb commit 566506d

File tree

4 files changed

+205
-95
lines changed

4 files changed

+205
-95
lines changed

pins/adaptors.py

Lines changed: 86 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from __future__ import annotations
22

3+
import json
34
from abc import abstractmethod
45
from typing import TYPE_CHECKING, Any, ClassVar, Self, TypeAlias, overload
56

@@ -25,6 +26,47 @@ class _Adaptor:
2526
def __init__(self, data: Any) -> None:
2627
self._d = data
2728

29+
@overload
30+
def write_json(self, file: str) -> None: ...
31+
@overload
32+
def write_json(self, file: None) -> str: ...
33+
def write_json(self, file=None):
34+
if file is None:
35+
msg = (
36+
f"Writing to JSON string rather than file is not supported for "
37+
f"{type(self._d)}"
38+
)
39+
raise NotImplementedError(msg)
40+
41+
import json
42+
43+
json.dump(self._d, open(file, mode="w"))
44+
45+
def write_joblib(self, file: str) -> None:
46+
import joblib
47+
48+
joblib.dump(self._d, file)
49+
50+
def write_csv(self, file: str) -> None:
51+
msg = f"Writing to CSV is not supported for {type(self._d)}"
52+
raise NotImplementedError(msg)
53+
54+
def write_parquet(self, file: str) -> None:
55+
msg = f"Writing to Parquet is not supported for {type(self._d)}"
56+
raise NotImplementedError(msg)
57+
58+
def write_feather(self, file: str) -> None:
59+
msg = f"Writing to Feather is not supported for {type(self._d)}"
60+
raise NotImplementedError(msg)
61+
62+
@property
63+
def data_preview(self) -> str:
64+
# note that the R library uses jsonlite::toJSON
65+
import json
66+
67+
# TODO(compat): set display none in index.html
68+
return json.dumps({})
69+
2870

2971
class _DFAdaptor(_Adaptor):
3072
_d: ClassVar[_DataFrame]
@@ -39,12 +81,20 @@ def columns(self) -> list[Any]: ...
3981
@abstractmethod
4082
def head(self, n: int) -> Self: ...
4183

42-
@abstractmethod
43-
def write_json(self) -> str:
44-
"""Write the dataframe to a JSON string.
84+
@property
85+
def data_preview(self) -> str:
86+
# TODO(compat) is 100 hard-coded?
87+
# Note that we go df -> json -> dict, to take advantage of type conversions in the dataframe library
88+
data: list[dict[Any, Any]] = json.loads(self.head(100).write_json())
89+
columns = [
90+
{"name": [col], "label": [col], "align": ["left"], "type": [""]}
91+
for col in self.columns
92+
]
4593

46-
In the format: list like [{column -> value}, ... , {column -> value}]
47-
"""
94+
# this reproduces R pins behavior, by omitting entries that would be null
95+
data_no_nulls = [{k: v for k, v in row.items() if v is not None} for row in data]
96+
97+
return json.dumps({"data": data_no_nulls, "columns": columns})
4898

4999

50100
class _PandasAdaptor(_DFAdaptor):
@@ -53,22 +103,43 @@ def __init__(self, data: _AbstractPandasFrame) -> None:
53103

54104
@property
55105
def columns(self) -> list[Any]:
56-
return self._d.columns
106+
return self._d.columns.tolist()
57107

58108
def head(self, n: int) -> Self:
59109
return _PandasAdaptor(self._d.head(n))
60110

61-
def write_json(self) -> str:
111+
@overload
112+
def write_json(self, file: str) -> None: ...
113+
@overload
114+
def write_json(self, file: None) -> str: ...
115+
def write_json(self, file=None):
116+
if file is not None:
117+
msg = (
118+
f"Writing to file rather than JSON string is not supported for "
119+
f"{type(self._d)}"
120+
)
121+
raise NotImplementedError(msg)
122+
62123
return self._d.to_json(orient="records")
63124

125+
def write_csv(self, file: str) -> None:
126+
self._d.to_csv(file, index=False)
127+
128+
def write_parquet(self, file: str) -> None:
129+
self._d.to_parquet(file)
130+
131+
def write_feather(self, file: str) -> None:
132+
self._d.to_feather(file)
133+
64134

65135
@overload
66-
def _create_df_adaptor(df: _DataFrame) -> _DFAdaptor: ...
136+
def _create_adaptor(obj: Any) -> _Adaptor: ...
67137
@overload
68-
def _create_df_adaptor(df: _PandasDataFrame) -> _PandasAdaptor: ...
69-
def _create_df_adaptor(df):
70-
if isinstance(df, _AbstractPandasFrame):
71-
return _PandasAdaptor(df)
72-
73-
msg = f"Could not determine dataframe adaptor for {df}"
74-
raise NotImplementedError(msg)
138+
def _create_adaptor(obj: _DataFrame) -> _DFAdaptor: ...
139+
@overload
140+
def _create_adaptor(obj: _PandasDataFrame) -> _PandasAdaptor: ...
141+
def _create_adaptor(obj):
142+
if isinstance(obj, _AbstractPandasFrame):
143+
return _PandasAdaptor(obj)
144+
else:
145+
return _Adaptor(obj)

pins/boards.py

Lines changed: 5 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,11 @@
77
from datetime import datetime, timedelta
88
from io import IOBase
99
from pathlib import Path
10-
from typing import Any, Mapping, Optional, Protocol, Sequence
10+
from typing import Mapping, Optional, Protocol, Sequence
1111

1212
from importlib_resources import files
1313

14-
from .adaptors import _create_df_adaptor, _DFAdaptor
14+
from .adaptors import _create_adaptor
1515
from .cache import PinsCache
1616
from .config import get_allow_rsc_short_name
1717
from .drivers import default_title, load_data, load_file, save_data
@@ -1124,11 +1124,7 @@ def user_name(self):
11241124

11251125
# TODO(NAMC) what about the functions that call this one?
11261126
def prepare_pin_version(self, pin_dir_path, x, name: "str | None", *args, **kwargs):
1127-
try:
1128-
x = _create_df_adaptor(x)
1129-
except NotImplementedError:
1130-
# Not a dataframe.
1131-
pass
1127+
adaptor = _create_adaptor(x)
11321128

11331129
# RSC pin names can have form <user_name>/<name>, but this will try to
11341130
# create the object in a directory named <user_name>. So we grab just
@@ -1138,7 +1134,7 @@ def prepare_pin_version(self, pin_dir_path, x, name: "str | None", *args, **kwar
11381134
# TODO(compat): py pins always uses the short name, R pins uses w/e the
11391135
# user passed, but guessing people want the long name?
11401136
meta = super()._create_meta(
1141-
pin_dir_path, x, short_name, *args, **kwargs
1137+
pin_dir_path, adaptor, short_name, *args, **kwargs
11421138
) # TODO(NAMC) ensure .create_meta can accept adaptor
11431139
meta.name = name
11441140

@@ -1165,36 +1161,9 @@ def prepare_pin_version(self, pin_dir_path, x, name: "str | None", *args, **kwar
11651161
"pin_files": pin_files,
11661162
"pin_metadata": meta,
11671163
"board_deparse": board_deparse(self),
1164+
"data_preview": adaptor.data_preview,
11681165
}
11691166

1170-
# data preview ----
1171-
1172-
# TODO: move out data_preview logic? Can we draw some limits here?
1173-
# note that the R library uses jsonlite::toJSON
1174-
1175-
import json
1176-
1177-
if isinstance(x, _DFAdaptor):
1178-
# TODO(compat) is 100 hard-coded?
1179-
# Note that we go df -> json -> dict, to take advantage of type conversions in the dataframe library
1180-
data: list[dict[Any, Any]] = json.loads(x.head(100).write_json())
1181-
columns = [
1182-
{"name": [col], "label": [col], "align": ["left"], "type": [""]}
1183-
for col in x.columns
1184-
]
1185-
1186-
# this reproduces R pins behavior, by omitting entries that would be null
1187-
data_no_nulls = [
1188-
{k: v for k, v in row.items() if v is not None} for row in data
1189-
]
1190-
1191-
context["data_preview"] = json.dumps(
1192-
{"data": data_no_nulls, "columns": columns}
1193-
)
1194-
else:
1195-
# TODO(compat): set display none in index.html
1196-
context["data_preview"] = json.dumps({})
1197-
11981167
# do not show r code if not round-trip friendly
11991168
if meta.type in ["joblib"]:
12001169
context["show_r_style"] = "display:none"

pins/drivers.py

Lines changed: 11 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
from pathlib import Path
22
from typing import Sequence
33

4+
from pins.adaptors import _create_adaptor
5+
46
from .config import PINS_ENV_INSECURE_READ, get_allow_pickle_read
57
from .errors import PinsInsecureReadError
68
from .meta import Meta
@@ -13,15 +15,6 @@
1315
REQUIRES_SINGLE_FILE = frozenset(["csv", "joblib", "file"])
1416

1517

16-
def _assert_is_pandas_df(x, file_type: str) -> None:
17-
import pandas as pd
18-
19-
if not isinstance(x, pd.DataFrame):
20-
raise NotImplementedError(
21-
f"Currently only pandas.DataFrame can be saved as type {file_type!r}."
22-
)
23-
24-
2518
def load_path(meta, path_to_version):
2619
# Check that only a single file name was given
2720
fnames = [meta.file] if isinstance(meta.file, str) else meta.file
@@ -141,6 +134,8 @@ def save_data(obj, fname, type=None, apply_suffix: bool = True) -> "str | Sequen
141134
# as argument to board, and then type dispatchers for explicit cases
142135
# of saving / loading objects different ways.
143136

137+
adaptor = _create_adaptor(obj)
138+
144139
if apply_suffix:
145140
if type == "file":
146141
suffix = "".join(Path(obj).suffixes)
@@ -152,47 +147,29 @@ def save_data(obj, fname, type=None, apply_suffix: bool = True) -> "str | Sequen
152147
final_name = f"{fname}{suffix}"
153148

154149
if type == "csv":
155-
_assert_is_pandas_df(obj, file_type=type)
156-
157-
obj.to_csv(final_name, index=False)
158-
150+
adaptor.write_csv(final_name)
159151
elif type == "arrow":
160152
# NOTE: R pins accepts the type arrow, and saves it as feather.
161153
# we allow reading this type, but raise an error for writing.
162-
_assert_is_pandas_df(obj, file_type=type)
163-
164-
obj.to_feather(final_name)
165-
154+
adaptor.write_feather(final_name)
166155
elif type == "feather":
167-
_assert_is_pandas_df(obj, file_type=type)
168-
169-
raise NotImplementedError(
156+
msg = (
170157
'Saving data as type "feather" no longer supported. Use type "arrow" instead.'
171158
)
172-
159+
raise NotImplementedError(msg)
173160
elif type == "parquet":
174-
_assert_is_pandas_df(obj, file_type=type)
175-
176-
obj.to_parquet(final_name)
177-
161+
adaptor.write_parquet(final_name)
178162
elif type == "joblib":
179-
import joblib
180-
181-
joblib.dump(obj, final_name)
182-
163+
adaptor.write_joblib(final_name)
183164
elif type == "json":
184-
import json
185-
186-
json.dump(obj, open(final_name, "w"))
187-
165+
adaptor.write_json(final_name)
188166
elif type == "file":
189167
import contextlib
190168
import shutil
191169

192170
# ignore the case where the source is the same as the target
193171
with contextlib.suppress(shutil.SameFileError):
194172
shutil.copyfile(str(obj), final_name)
195-
196173
else:
197174
raise NotImplementedError(f"Cannot save type: {type}")
198175

0 commit comments

Comments
 (0)