@@ -1999,13 +1999,16 @@ def publish_batch_data(
19991999 # Get min and max timestamps
20002000 earliest_timestamp = batch_df [batch_data ["timestampColumnName" ]].min ()
20012001 latest_timestamp = batch_df [batch_data ["timestampColumnName" ]].max ()
2002+ # Check if batch of data contains ground truths
2003+ contains_ground_truths = self ._contains_ground_truths (batch_config = batch_data )
20022004
20032005 with tempfile .TemporaryDirectory () as tmp_dir :
20042006 # Copy save files to tmp dir
20052007 batch_df .to_csv (f"{ tmp_dir } /dataset.csv" , index = False )
20062008 payload = {
20072009 "earliestTimestamp" : earliest_timestamp ,
20082010 "latestTimestamp" : latest_timestamp ,
2011+ "performGroundTruthMerge" : not contains_ground_truths ,
20092012 ** batch_data ,
20102013 }
20112014
@@ -2034,6 +2037,14 @@ def _add_default_column(
20342037 df [inference_id_column_name ] = [str (uuid .uuid1 ()) for _ in range (len (df ))]
20352038 return config , df
20362039
2040+ def _contains_ground_truths (self , batch_config : Dict [str , any ]) -> bool :
2041+ """Checks if the batch of data contains ground truths."""
2042+ return (
2043+ batch_config .get ("groundTruthColumnName" ) is not None
2044+ or batch_config .get ("labelColumnName" ) is not None
2045+ or batch_config .get ("targetColumnName" ) is not None
2046+ )
2047+
20372048 def publish_ground_truths (
20382049 self ,
20392050 inference_pipeline_id : str ,
0 commit comments