@@ -132,7 +132,6 @@ def create_project(
132132 warnings .warn (
133133 f"Found an existing project with name '{ name } '. Loading it instead."
134134 )
135- return project
136135 except exceptions .OpenlayerResourceNotFound :
137136 # Validate project
138137 project_config = {
@@ -168,7 +167,7 @@ def create_project(
168167 print (
169168 f"Created your project. Navigate to { project .links ['app' ]} to see it."
170169 )
171- return project
170+ return project
172171
173172 def load_project (self , name : str ) -> Project :
174173 """Loads an existing project from the Openlayer platform.
@@ -956,7 +955,7 @@ def create_inference_pipeline(
956955 "Created your inference pipeline. Navigate to"
957956 f" { inference_pipeline .links ['app' ]} to see it."
958957 )
959- return inference_pipeline
958+ return inference_pipeline
960959
961960 def load_inference_pipeline (
962961 self ,
@@ -1019,6 +1018,9 @@ def upload_reference_dataset(
10191018 dataset_data = DatasetSchema ().load (
10201019 {"task_type" : task_type .value , ** dataset_config }
10211020 )
1021+ # Add default columns if not present
1022+ if dataset_data .get ("columnNames" ) is None :
1023+ dataset_data ["columnNames" ] = utils .get_column_names (file_path )
10221024
10231025 with tempfile .TemporaryDirectory () as tmp_dir :
10241026 # Copy relevant files to tmp dir
@@ -1110,6 +1112,8 @@ def publish_batch_data(
11101112 )
11111113
11121114 # Add default columns if not present
1115+ if batch_data .get ("columnNames" ) is None :
1116+ batch_data ["columnNames" ] = list (batch_df .columns )
11131117 columns_to_add = {"timestampColumnName" , "inferenceIdColumnName" }
11141118 for column in columns_to_add :
11151119 if batch_data .get (column ) is None :
@@ -1123,6 +1127,12 @@ def publish_batch_data(
11231127 with tempfile .TemporaryDirectory () as tmp_dir :
11241128 # Copy save files to tmp dir
11251129 batch_df .to_csv (f"{ tmp_dir } /dataset.csv" , index = False )
1130+ utils .write_yaml (batch_data , f"{ tmp_dir } /dataset_config.yaml" )
1131+
1132+ tar_file_path = os .path .join (tmp_dir , "tarfile" )
1133+ with tarfile .open (tar_file_path , mode = "w:gz" ) as tar :
1134+ tar .add (tmp_dir , arcname = os .path .basename ("batch_data" ))
1135+
11261136 payload = {
11271137 "earliestTimestamp" : int (earliest_timestamp ),
11281138 "latestTimestamp" : int (latest_timestamp ),
@@ -1132,8 +1142,8 @@ def publish_batch_data(
11321142
11331143 self .api .upload (
11341144 endpoint = f"inference-pipelines/{ inference_pipeline_id } /data" ,
1135- file_path = f" { tmp_dir } /dataset.csv" ,
1136- object_name = "dataset.csv " ,
1145+ file_path = tar_file_path ,
1146+ object_name = "tarfile " ,
11371147 body = payload ,
11381148 storage_uri_key = "storageUri" ,
11391149 method = "POST" ,
0 commit comments