|
22 | 22 | seconds_to_datetime, |
23 | 23 | datetime_to_seconds, |
24 | 24 | ) |
| 25 | +from ads.opctl.operator.lowcode.forecast.utils import _label_encode_dataframe |
25 | 26 |
|
26 | 27 | AUTOMLX_N_ALGOS_TUNED = 4 |
27 | 28 | AUTOMLX_DEFAULT_SCORE_METRIC = "neg_sym_mean_abs_percent_error" |
@@ -51,8 +52,13 @@ def set_kwargs(self): |
51 | 52 | ] = self.spec.preprocessing or model_kwargs_cleaned.get("preprocessing", True) |
52 | 53 | return model_kwargs_cleaned, time_budget |
53 | 54 |
|
54 | | - def preprocess(self, data, series_id=None): |
55 | | - return data.set_index(self.spec.datetime_column.name) |
| 55 | + |
| 56 | + def preprocess(self, data, series_id=None): # TODO: re-use self.le for explanations |
| 57 | + _, df_encoded = _label_encode_dataframe( |
| 58 | + data, |
| 59 | + no_encode={self.spec.datetime_column.name, self.original_target_column}, |
| 60 | + ) |
| 61 | + return df_encoded.set_index(self.spec.datetime_column.name) |
56 | 62 |
|
57 | 63 | @runtime_dependency( |
58 | 64 | module="automlx", |
@@ -105,7 +111,7 @@ def _build_model(self) -> pd.DataFrame: |
105 | 111 |
|
106 | 112 | logger.debug(f"Time Index Monotonic: {data_i.index.is_monotonic}") |
107 | 113 |
|
108 | | - if self.loaded_models is not None: |
| 114 | + if self.loaded_models is not None and s_id in self.loaded_models: |
109 | 115 | model = self.loaded_models[s_id] |
110 | 116 | else: |
111 | 117 | model = automlx.Pipeline( |
@@ -195,82 +201,85 @@ def _generate_report(self): |
195 | 201 | ) |
196 | 202 | selected_models = dict() |
197 | 203 | models = self.models |
198 | | - for i, (s_id, df) in enumerate(self.full_data_dict.items()): |
199 | | - selected_models[s_id] = { |
200 | | - "series_id": s_id, |
201 | | - "selected_model": models[s_id].selected_model_, |
202 | | - "model_params": models[s_id].selected_model_params_, |
203 | | - } |
204 | | - selected_models_df = pd.DataFrame( |
205 | | - selected_models.items(), columns=["series_id", "best_selected_model"] |
206 | | - ) |
207 | | - selected_df = selected_models_df["best_selected_model"].apply(pd.Series) |
208 | | - selected_models_section = dp.Blocks( |
209 | | - "### Best Selected Model", dp.DataTable(selected_df) |
210 | | - ) |
| 204 | + all_sections = [] |
| 205 | + |
| 206 | + if len(self.models) > 0: |
| 207 | + for i, (s_id, m) in enumerate(models.items()): |
| 208 | + selected_models[s_id] = { |
| 209 | + "series_id": s_id, |
| 210 | + "selected_model": m.selected_model_, |
| 211 | + "model_params": m.selected_model_params_, |
| 212 | + } |
| 213 | + selected_models_df = pd.DataFrame( |
| 214 | + selected_models.items(), columns=["series_id", "best_selected_model"] |
| 215 | + ) |
| 216 | + selected_df = selected_models_df["best_selected_model"].apply(pd.Series) |
| 217 | + selected_models_section = dp.Blocks( |
| 218 | + "### Best Selected Model", dp.DataTable(selected_df) |
| 219 | + ) |
211 | 220 |
|
212 | | - all_sections = [selected_models_text, selected_models_section] |
| 221 | + all_sections = [selected_models_text, selected_models_section] |
213 | 222 |
|
214 | 223 | if self.spec.generate_explanations: |
215 | | - # try: |
216 | | - # If the key is present, call the "explain_model" method |
217 | | - self.explain_model() |
218 | | - |
219 | | - # Create a markdown text block for the global explanation section |
220 | | - global_explanation_text = dp.Text( |
221 | | - f"## Global Explanation of Models \n " |
222 | | - "The following tables provide the feature attribution for the global explainability." |
223 | | - ) |
224 | | - |
225 | | - # Convert the global explanation data to a DataFrame |
226 | | - global_explanation_df = pd.DataFrame(self.global_explanation) |
| 224 | + try: |
| 225 | + # If the key is present, call the "explain_model" method |
| 226 | + self.explain_model() |
227 | 227 |
|
228 | | - self.formatted_global_explanation = ( |
229 | | - global_explanation_df / global_explanation_df.sum(axis=0) * 100 |
230 | | - ) |
231 | | - self.formatted_global_explanation = ( |
232 | | - self.formatted_global_explanation.rename( |
233 | | - {self.spec.datetime_column.name: ForecastOutputColumns.DATE}, axis=1 |
| 228 | + # Create a markdown text block for the global explanation section |
| 229 | + global_explanation_text = dp.Text( |
| 230 | + f"## Global Explanation of Models \n " |
| 231 | + "The following tables provide the feature attribution for the global explainability." |
234 | 232 | ) |
235 | | - ) |
236 | 233 |
|
237 | | - # Create a markdown section for the global explainability |
238 | | - global_explanation_section = dp.Blocks( |
239 | | - "### Global Explainability ", |
240 | | - dp.DataTable(self.formatted_global_explanation), |
241 | | - ) |
| 234 | + # Convert the global explanation data to a DataFrame |
| 235 | + global_explanation_df = pd.DataFrame(self.global_explanation) |
242 | 236 |
|
243 | | - aggregate_local_explanations = pd.DataFrame() |
244 | | - for s_id, local_ex_df in self.local_explanation.items(): |
245 | | - local_ex_df_copy = local_ex_df.copy() |
246 | | - local_ex_df_copy["Series"] = s_id |
247 | | - aggregate_local_explanations = pd.concat( |
248 | | - [aggregate_local_explanations, local_ex_df_copy], axis=0 |
| 237 | + self.formatted_global_explanation = ( |
| 238 | + global_explanation_df / global_explanation_df.sum(axis=0) * 100 |
| 239 | + ) |
| 240 | + self.formatted_global_explanation = ( |
| 241 | + self.formatted_global_explanation.rename( |
| 242 | + {self.spec.datetime_column.name: ForecastOutputColumns.DATE}, axis=1 |
| 243 | + ) |
249 | 244 | ) |
250 | | - self.formatted_local_explanation = aggregate_local_explanations |
251 | 245 |
|
252 | | - local_explanation_text = dp.Text(f"## Local Explanation of Models \n ") |
253 | | - blocks = [ |
254 | | - dp.DataTable( |
255 | | - local_ex_df.div(local_ex_df.abs().sum(axis=1), axis=0) * 100, |
256 | | - label=s_id, |
| 246 | + # Create a markdown section for the global explainability |
| 247 | + global_explanation_section = dp.Blocks( |
| 248 | + "### Global Explainability ", |
| 249 | + dp.DataTable(self.formatted_global_explanation), |
257 | 250 | ) |
258 | | - for s_id, local_ex_df in self.local_explanation.items() |
259 | | - ] |
260 | | - local_explanation_section = ( |
261 | | - dp.Select(blocks=blocks) if len(blocks) > 1 else blocks[0] |
262 | | - ) |
263 | 251 |
|
264 | | - # Append the global explanation text and section to the "all_sections" list |
265 | | - all_sections = all_sections + [ |
266 | | - global_explanation_text, |
267 | | - global_explanation_section, |
268 | | - local_explanation_text, |
269 | | - local_explanation_section, |
270 | | - ] |
271 | | - # except Exception as e: |
272 | | - # logger.warn(f"Failed to generate Explanations with error: {e}.") |
273 | | - # logger.debug(f"Full Traceback: {traceback.format_exc()}") |
| 252 | + aggregate_local_explanations = pd.DataFrame() |
| 253 | + for s_id, local_ex_df in self.local_explanation.items(): |
| 254 | + local_ex_df_copy = local_ex_df.copy() |
| 255 | + local_ex_df_copy["Series"] = s_id |
| 256 | + aggregate_local_explanations = pd.concat( |
| 257 | + [aggregate_local_explanations, local_ex_df_copy], axis=0 |
| 258 | + ) |
| 259 | + self.formatted_local_explanation = aggregate_local_explanations |
| 260 | + |
| 261 | + local_explanation_text = dp.Text(f"## Local Explanation of Models \n ") |
| 262 | + blocks = [ |
| 263 | + dp.DataTable( |
| 264 | + local_ex_df.div(local_ex_df.abs().sum(axis=1), axis=0) * 100, |
| 265 | + label=s_id, |
| 266 | + ) |
| 267 | + for s_id, local_ex_df in self.local_explanation.items() |
| 268 | + ] |
| 269 | + local_explanation_section = ( |
| 270 | + dp.Select(blocks=blocks) if len(blocks) > 1 else blocks[0] |
| 271 | + ) |
| 272 | + |
| 273 | + # Append the global explanation text and section to the "all_sections" list |
| 274 | + all_sections = all_sections + [ |
| 275 | + global_explanation_text, |
| 276 | + global_explanation_section, |
| 277 | + local_explanation_text, |
| 278 | + local_explanation_section, |
| 279 | + ] |
| 280 | + except Exception as e: |
| 281 | + logger.warn(f"Failed to generate Explanations with error: {e}.") |
| 282 | + logger.debug(f"Full Traceback: {traceback.format_exc()}") |
274 | 283 |
|
275 | 284 | model_description = dp.Text( |
276 | 285 | "The AutoMLx model automatically preprocesses, selects and engineers " |
|
0 commit comments