1616
1717from ads .common .object_storage_details import ObjectStorageDetails
1818from ads .opctl import logger
19- from ads .opctl .operator .lowcode .anomaly .const import OutputColumns , SupportedMetrics
19+ from ads .opctl .operator .lowcode .anomaly .const import OutputColumns , SupportedMetrics , SUBSAMPLE_THRESHOLD
2020from ads .opctl .operator .lowcode .anomaly .utils import _build_metrics_df , default_signer
2121from ads .opctl .operator .lowcode .common .utils import (
2222 disable_print ,
@@ -79,7 +79,7 @@ def generate_report(self):
7979 anomaly_output , test_data , elapsed_time
8080 )
8181 table_blocks = [
82- rc .DataTable (df , label = col , index = True )
82+ rc .DataTable (df . head ( SUBSAMPLE_THRESHOLD ) if self . spec . subsample_report_data and len ( df ) > SUBSAMPLE_THRESHOLD else df , label = col , index = True )
8383 for col , df in self .datasets .full_data_dict .items ()
8484 ]
8585 data_table = rc .Select (blocks = table_blocks )
@@ -94,20 +94,36 @@ def generate_report(self):
9494 anomaly_col = anomaly_output .get_anomalies_by_cat (category = target )[
9595 OutputColumns .ANOMALY_COL
9696 ]
97+ anomaly_indices = [i for i , index in enumerate (anomaly_col ) if index == 1 ]
98+ downsampled_time_col = time_col
99+ selected_indices = list (range (len (time_col )))
100+ if self .spec .subsample_report_data :
101+ non_anomaly_indices = [i for i in range (len (time_col )) if i not in anomaly_indices ]
102+ # Downsample non-anomalous data if it exceeds the threshold (1000)
103+ if len (non_anomaly_indices ) > SUBSAMPLE_THRESHOLD :
104+ downsampled_non_anomaly_indices = non_anomaly_indices [::len (non_anomaly_indices )// SUBSAMPLE_THRESHOLD ]
105+ selected_indices = anomaly_indices + downsampled_non_anomaly_indices
106+ selected_indices .sort ()
107+ downsampled_time_col = time_col [selected_indices ]
108+
97109 columns = set (df .columns ).difference ({date_column })
98110 for col in columns :
99111 y = df [col ].reset_index (drop = True )
112+
113+ downsampled_y = y [selected_indices ]
114+
100115 fig , ax = plt .subplots (figsize = (8 , 3 ), layout = "constrained" )
101116 ax .grid ()
102- ax .plot (time_col , y , color = "black" )
103- for i , index in enumerate ( anomaly_col ):
104- if index == 1 :
105- ax .scatter (time_col [i ], y [i ], color = "red" , marker = "o" )
117+ ax .plot (downsampled_time_col , downsampled_y , color = "black" )
118+ # Plot anomalies
119+ for i in anomaly_indices :
120+ ax .scatter (time_col [i ], y [i ], color = "red" , marker = "o" )
106121 plt .xlabel (date_column )
107122 plt .ylabel (col )
108123 plt .title (f"`{ col } ` with reference to anomalies" )
109124 figure_blocks .append (rc .Widget (ax ))
110- blocks .append (rc .Group (* figure_blocks , label = target ))
125+
126+ blocks .append (rc .Group (* figure_blocks , label = target ))
111127 plots = rc .Select (blocks )
112128
113129 report_sections = []
0 commit comments