Add performGroundTruthMerge attribute to payload

gustavocidornelas · whoseoyster · commit 9fd0d2e6caad · 2023-10-09T20:34:37.000-07:00
diff --git a/openlayer/__init__.py b/openlayer/__init__.py
@@ -1999,13 +1999,16 @@ def publish_batch_data(
         # Get min and max timestamps
         earliest_timestamp = batch_df[batch_data["timestampColumnName"]].min()
         latest_timestamp = batch_df[batch_data["timestampColumnName"]].max()
+        # Check if batch of data contains ground truths
+        contains_ground_truths = self._contains_ground_truths(batch_config=batch_data)
 
         with tempfile.TemporaryDirectory() as tmp_dir:
             # Copy save files to tmp dir
             batch_df.to_csv(f"{tmp_dir}/dataset.csv", index=False)
             payload = {
                 "earliestTimestamp": earliest_timestamp,
                 "latestTimestamp": latest_timestamp,
+                "performGroundTruthMerge": not contains_ground_truths,
                 **batch_data,
             }
 
@@ -2034,6 +2037,14 @@ def _add_default_column(
             df[inference_id_column_name] = [str(uuid.uuid1()) for _ in range(len(df))]
         return config, df
 
+    def _contains_ground_truths(self, batch_config: Dict[str, any]) -> bool:
+        """Checks if the batch of data contains ground truths."""
+        return (
+            batch_config.get("groundTruthColumnName") is not None
+            or batch_config.get("labelColumnName") is not None
+            or batch_config.get("targetColumnName") is not None
+        )
+
     def publish_ground_truths(
         self,
         inference_pipeline_id: str,