Skip to content

Commit 29b1268

Browse files
gustavocidornelaswhoseoyster
authored andcommitted
Add default columnNames for batches of data, tar dataset and config for batches of data, fix create inference pipeline bug, and fix monitoring notebook config
1 parent f48ed41 commit 29b1268

File tree

2 files changed

+30
-9
lines changed

2 files changed

+30
-9
lines changed

examples/monitoring/quickstart/monitoring-quickstart.ipynb

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -52,8 +52,8 @@
5252
"metadata": {},
5353
"outputs": [],
5454
"source": [
55-
"import openlayer\n",
5655
"from openlayer.tasks import TaskType\n",
56+
"import openlayer\n",
5757
"\n",
5858
"client = openlayer.OpenlayerClient(\"YOUR_API_KEY_HERE\")\n",
5959
"project = client.create_or_load_project(\n",
@@ -83,6 +83,16 @@
8383
"# inference_pipeline = project.load_inference_pipeline(name=\"Production\")"
8484
]
8585
},
86+
{
87+
"cell_type": "code",
88+
"execution_count": null,
89+
"id": "61e916c2",
90+
"metadata": {},
91+
"outputs": [],
92+
"source": [
93+
"inference_pipeline"
94+
]
95+
},
8696
{
8797
"cell_type": "markdown",
8898
"id": "39592b32",
@@ -227,8 +237,8 @@
227237
"outputs": [],
228238
"source": [
229239
"batch_1 = production_data.loc[:342]\n",
230-
"batch_2 = production_data.loc[342:684]\n",
231-
"batch_3 = production_data.loc[684:]"
240+
"batch_2 = production_data.loc[343:684]\n",
241+
"batch_3 = production_data.loc[686:]"
232242
]
233243
},
234244
{
@@ -276,7 +286,8 @@
276286
" \"Year\"\n",
277287
" ],\n",
278288
" \"timestampColumnName\": \"timestamp\",\n",
279-
" \"inferenceIdColumnName\": \"inference_id\"\n",
289+
" \"inferenceIdColumnName\": \"inference_id\",\n",
290+
" \"predictionsColumnName\": \"predictions\"\n",
280291
"}\n"
281292
]
282293
},

openlayer/__init__.py

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,6 @@ def create_project(
132132
warnings.warn(
133133
f"Found an existing project with name '{name}'. Loading it instead."
134134
)
135-
return project
136135
except exceptions.OpenlayerResourceNotFound:
137136
# Validate project
138137
project_config = {
@@ -168,7 +167,7 @@ def create_project(
168167
print(
169168
f"Created your project. Navigate to {project.links['app']} to see it."
170169
)
171-
return project
170+
return project
172171

173172
def load_project(self, name: str) -> Project:
174173
"""Loads an existing project from the Openlayer platform.
@@ -956,7 +955,7 @@ def create_inference_pipeline(
956955
"Created your inference pipeline. Navigate to"
957956
f" {inference_pipeline.links['app']} to see it."
958957
)
959-
return inference_pipeline
958+
return inference_pipeline
960959

961960
def load_inference_pipeline(
962961
self,
@@ -1019,6 +1018,9 @@ def upload_reference_dataset(
10191018
dataset_data = DatasetSchema().load(
10201019
{"task_type": task_type.value, **dataset_config}
10211020
)
1021+
# Add default columns if not present
1022+
if dataset_data.get("columnNames") is None:
1023+
dataset_data["columnNames"] = utils.get_column_names(file_path)
10221024

10231025
with tempfile.TemporaryDirectory() as tmp_dir:
10241026
# Copy relevant files to tmp dir
@@ -1110,6 +1112,8 @@ def publish_batch_data(
11101112
)
11111113

11121114
# Add default columns if not present
1115+
if batch_data.get("columnNames") is None:
1116+
batch_data["columnNames"] = list(batch_df.columns)
11131117
columns_to_add = {"timestampColumnName", "inferenceIdColumnName"}
11141118
for column in columns_to_add:
11151119
if batch_data.get(column) is None:
@@ -1123,6 +1127,12 @@ def publish_batch_data(
11231127
with tempfile.TemporaryDirectory() as tmp_dir:
11241128
# Copy save files to tmp dir
11251129
batch_df.to_csv(f"{tmp_dir}/dataset.csv", index=False)
1130+
utils.write_yaml(batch_data, f"{tmp_dir}/dataset_config.yaml")
1131+
1132+
tar_file_path = os.path.join(tmp_dir, "tarfile")
1133+
with tarfile.open(tar_file_path, mode="w:gz") as tar:
1134+
tar.add(tmp_dir, arcname=os.path.basename("batch_data"))
1135+
11261136
payload = {
11271137
"earliestTimestamp": int(earliest_timestamp),
11281138
"latestTimestamp": int(latest_timestamp),
@@ -1132,8 +1142,8 @@ def publish_batch_data(
11321142

11331143
self.api.upload(
11341144
endpoint=f"inference-pipelines/{inference_pipeline_id}/data",
1135-
file_path=f"{tmp_dir}/dataset.csv",
1136-
object_name="dataset.csv",
1145+
file_path=tar_file_path,
1146+
object_name="tarfile",
11371147
body=payload,
11381148
storage_uri_key="storageUri",
11391149
method="POST",

0 commit comments

Comments
 (0)