2626 ForecastInputDataError ,
2727)
2828from ads .opctl .operator .cmd import run
29-
29+ import math
3030
3131NUM_ROWS = 1000
3232NUM_SERIES = 10
@@ -172,8 +172,9 @@ def run_yaml(tmpdirname, yaml_i, output_data_path):
172172 run (yaml_i , backend = "operator.local" , debug = True )
173173 subprocess .run (f"ls -a { output_data_path } " , shell = True )
174174
175- test_metrics = pd .read_csv (f"{ tmpdirname } /results/test_metrics.csv" )
176- print (test_metrics )
175+ if 'test_data' in yaml_i ['spec' ]:
176+ test_metrics = pd .read_csv (f"{ tmpdirname } /results/test_metrics.csv" )
177+ print (test_metrics )
177178 train_metrics = pd .read_csv (f"{ tmpdirname } /results/metrics.csv" )
178179 print (train_metrics )
179180
@@ -185,6 +186,7 @@ def populate_yaml(
185186 additional_data_path = None ,
186187 test_data_path = None ,
187188 output_data_path = None ,
189+ preprocessing = None ,
188190):
189191 if historical_data_path is None :
190192 historical_data_path , additional_data_path , test_data_path = setup_rossman ()
@@ -204,7 +206,8 @@ def populate_yaml(
204206 yaml_i ["spec" ]["datetime_column" ]["name" ] = "Date"
205207 yaml_i ["spec" ]["target_category_columns" ] = ["Store" ]
206208 yaml_i ["spec" ]["horizon" ] = HORIZON
207-
209+ if preprocessing :
210+ yaml_i ["spec" ]["preprocessing" ] = preprocessing
208211 if generate_train_metrics :
209212 yaml_i ["spec" ]["generate_metrics" ] = generate_train_metrics
210213 if model == "autots" :
@@ -372,6 +375,7 @@ def test_0_series(operator_setup, model):
372375 historical_data_path = historical_data_path ,
373376 additional_data_path = additional_data_path ,
374377 test_data_path = test_data_path ,
378+ preprocessing = {"enabled" : False }
375379 )
376380 with pytest .raises (DataMismatchError ):
377381 run_yaml (
@@ -429,6 +433,49 @@ def test_invalid_dates(operator_setup, model):
429433 )
430434
431435
436+ def test_disabling_outlier_treatment (operator_setup ):
437+ tmpdirname = operator_setup
438+ NUM_ROWS = 100
439+ hist_data_0 = pd .concat (
440+ [
441+ HISTORICAL_DATETIME_COL [: NUM_ROWS - HORIZON ],
442+ TARGET_COL [: NUM_ROWS - HORIZON ],
443+ ],
444+ axis = 1 ,
445+ )
446+ outliers = [1000 , - 800 ]
447+ hist_data_0 .at [40 , 'Sales' ] = outliers [0 ]
448+ hist_data_0 .at [75 , 'Sales' ] = outliers [1 ]
449+ historical_data_path , additional_data_path , test_data_path = setup_artificial_data (
450+ tmpdirname , hist_data_0
451+ )
452+
453+ yaml_i , output_data_path = populate_yaml (
454+ tmpdirname = tmpdirname ,
455+ model = "arima" ,
456+ historical_data_path = historical_data_path
457+ )
458+ yaml_i ["spec" ].pop ("target_category_columns" )
459+ yaml_i ["spec" ].pop ("additional_data" )
460+
461+ # running default pipeline where outlier will be treated
462+ run_yaml (tmpdirname = tmpdirname , yaml_i = yaml_i , output_data_path = output_data_path )
463+ forecast_without_outlier = pd .read_csv (f"{ tmpdirname } /results/forecast.csv" )
464+ input_vals_without_outlier = set (forecast_without_outlier ['input_value' ])
465+ assert all (
466+ item not in input_vals_without_outlier for item in outliers ), "forecast file should not contain any outliers"
467+
468+ # switching off outlier_treatment
469+ preprocessing_steps = {"missing_value_imputation" : True , "outlier_treatment" : False }
470+ preprocessing = {"enabled" : True , "steps" : preprocessing_steps }
471+ yaml_i ["spec" ]["preprocessing" ] = preprocessing
472+ run_yaml (tmpdirname = tmpdirname , yaml_i = yaml_i , output_data_path = output_data_path )
473+ forecast_with_outlier = pd .read_csv (f"{ tmpdirname } /results/forecast.csv" )
474+ input_vals_with_outlier = set (forecast_with_outlier ['input_value' ])
475+ assert all (
476+ item in input_vals_with_outlier for item in outliers ), "forecast file should contain all the outliers"
477+
478+
432479@pytest .mark .parametrize ("model" , MODELS )
433480def test_2_series (operator_setup , model ):
434481 # Test w and w/o add data
@@ -454,12 +501,14 @@ def split_df(df):
454501 historical_data_path , additional_data_path , test_data_path = setup_artificial_data (
455502 tmpdirname , hist_data , add_data , test_data
456503 )
504+ preprocessing_steps = {"missing_value_imputation" : True , "outlier_treatment" : False }
457505 yaml_i , output_data_path = populate_yaml (
458506 tmpdirname = tmpdirname ,
459507 model = model ,
460508 historical_data_path = historical_data_path ,
461509 additional_data_path = additional_data_path ,
462510 test_data_path = test_data_path ,
511+ preprocessing = {"enabled" : True , "steps" : preprocessing_steps }
463512 )
464513 with pytest .raises (DataMismatchError ):
465514 # 4 columns in historical data, but only 1 cat col specified
0 commit comments