From 034131946e44c6be78463fbe768f3cb87fb4b71e Mon Sep 17 00:00:00 2001 From: Anirudh Sengar Date: Sun, 16 Nov 2025 08:27:59 +0400 Subject: [PATCH 1/6] extend sktime ForecastingOptCV with broadcasting options and returned parameters Signed-off-by: Anirudh Sengar --- .../integrations/sktime/_forecasting.py | 188 +++++++++++++++++- 1 file changed, 187 insertions(+), 1 deletion(-) diff --git a/src/hyperactive/integrations/sktime/_forecasting.py b/src/hyperactive/integrations/sktime/_forecasting.py index 82f56b74..85927262 100644 --- a/src/hyperactive/integrations/sktime/_forecasting.py +++ b/src/hyperactive/integrations/sktime/_forecasting.py @@ -151,6 +151,24 @@ class ForecastingOptCV(_DelegatedForecaster): - "logger_name": str, default="ray"; name of the logger to use. - "mute_warnings": bool, default=False; if True, suppresses warnings + tune_by_instance : bool, optional (default=False) + Whether to tune parameter by each time series instance separately, + in case of Panel or Hierarchical data passed to the tuning estimator. + Only applies if time series passed are Panel or Hierarchical. + If True, clones of the forecaster will be fit to each instance separately, + and are available in fields of the ``forecasters_`` attribute. + Has the same effect as applying ForecastByLevel wrapper to self. + If False, the same best parameter is selected for all instances. + + tune_by_variable : bool, optional (default=False) + Whether to tune parameter by each time series variable separately, + in case of multivariate data passed to the tuning estimator. + Only applies if time series passed are strictly multivariate. + If True, clones of the forecaster will be fit to each variable separately, + and are available in fields of the ``forecasters_`` attribute. + Has the same effect as applying ColumnEnsembleForecaster wrapper to self. + If False, the same best parameter is selected for all variables. + Example ------- Any available tuning engine from hyperactive can be used, for example: @@ -215,6 +233,8 @@ def __init__( cv_X=None, backend=None, backend_params=None, + tune_by_instance=False, + tune_by_variable=False, ): self.forecaster = forecaster self.optimizer = optimizer @@ -227,6 +247,8 @@ def __init__( self.cv_X = cv_X self.backend = backend self.backend_params = backend_params + self.tune_by_instance = tune_by_instance + self.tune_by_variable = tune_by_variable super().__init__() def _fit(self, y, X, fh): @@ -245,12 +267,22 @@ def _fit(self, y, X, fh): ------- self : returns an instance of self. """ + import time + from sktime.utils.validation.forecasting import check_scoring + # Handle broadcasting options + if self.tune_by_instance or self.tune_by_variable: + return self._fit_with_broadcasting(y, X, fh) + forecaster = self.forecaster.clone() scoring = check_scoring(self.scoring, obj=self) # scoring_name = f"test_{scoring.name}" + self.scorer_ = scoring + + # Count number of CV splits + self.n_splits_ = self.cv.get_n_splits(y) experiment = SktimeForecastingExperiment( forecaster=forecaster, @@ -272,9 +304,31 @@ def _fit(self, y, X, fh): self.best_params_ = best_params self.best_forecaster_ = forecaster.set_params(**best_params) - # Refit model with best parameters. + # Store cv_results from optimizer if available + if hasattr(optimizer, "results"): + self.cv_results_ = optimizer.results + else: + # Create a basic cv_results_ dict + self.cv_results_ = {"best_params": best_params} + + # Store best_index_ and best_score_ if available from optimizer + if hasattr(optimizer, "best_score"): + self.best_score_ = optimizer.best_score + else: + # Calculate best score by evaluating best params + best_score, _ = experiment.score(best_params) + self.best_score_ = best_score + + self.best_index_ = 0 # For single best result + + # Refit model with best parameters and track time. if self.refit: + start_time = time.time() self.best_forecaster_.fit(y=y, X=X, fh=fh) + end_time = time.time() + self.refit_time_ = end_time - start_time + else: + self.refit_time_ = 0.0 return self @@ -311,6 +365,138 @@ def _predict(self, fh, X): ) return super()._predict(fh=fh, X=X) + def _fit_with_broadcasting(self, y, X, fh): + """Fit with broadcasting options (tune_by_instance or tune_by_variable). + + Parameters + ---------- + y : pd.Series or pd.DataFrame + Target time series to which to fit the forecaster. + X : pd.DataFrame, optional (default=None) + Exogenous variables + fh : int, list or np.array, optional (default=None) + The forecasters horizon with the steps ahead to to predict. + + Returns + ------- + self : returns an instance of self. + """ + import pandas as pd + from sktime.utils.validation.forecasting import check_scoring + + scoring = check_scoring(self.scoring, obj=self) + self.scorer_ = scoring + self.n_splits_ = self.cv.get_n_splits(y) + + # Determine if we need to broadcast + is_panel = "MultiIndex" in str(type(getattr(y, "index", None))) + is_multivariate = isinstance(y, pd.DataFrame) and len(y.columns) > 1 + + forecasters_list = [] + + # Handle tune_by_instance for Panel/Hierarchical data + if self.tune_by_instance and is_panel: + # Get unique instances + if hasattr(y.index, "levels"): + instances = y.index.get_level_values(0).unique() + else: + instances = [0] # Single instance fallback + + for instance in instances: + # Extract instance data + if hasattr(y.index, "levels"): + y_instance = y.loc[instance] + X_instance = X.loc[instance] if X is not None else None + else: + y_instance = y + X_instance = X + + # Fit for this instance + tuner = type(self)( + forecaster=self.forecaster.clone(), + optimizer=self.optimizer.clone(), + cv=self.cv, + strategy=self.strategy, + update_behaviour=self.update_behaviour, + scoring=self.scoring, + refit=self.refit, + error_score=self.error_score, + cv_X=self.cv_X, + backend=self.backend, + backend_params=self.backend_params, + tune_by_instance=False, + tune_by_variable=self.tune_by_variable, + ) + tuner.fit(y_instance, X=X_instance, fh=fh) + + forecasters_list.append( + { + "instance": instance, + "forecaster": tuner.best_forecaster_, + "best_params": tuner.best_params_, + "best_score": tuner.best_score_, + } + ) + + # Store as DataFrame + self.forecasters_ = pd.DataFrame(forecasters_list) + # Set a representative best_forecaster_ + self.best_forecaster_ = forecasters_list[0]["forecaster"] + self.best_params_ = forecasters_list[0]["best_params"] + self.best_score_ = forecasters_list[0]["best_score"] + + # Handle tune_by_variable for multivariate data + elif self.tune_by_variable and is_multivariate: + variables = y.columns + + for variable in variables: + # Extract variable data + y_var = y[[variable]] + X_var = X if X is not None else None + + # Fit for this variable + tuner = type(self)( + forecaster=self.forecaster.clone(), + optimizer=self.optimizer.clone(), + cv=self.cv, + strategy=self.strategy, + update_behaviour=self.update_behaviour, + scoring=self.scoring, + refit=self.refit, + error_score=self.error_score, + cv_X=self.cv_X, + backend=self.backend, + backend_params=self.backend_params, + tune_by_instance=False, + tune_by_variable=False, + ) + tuner.fit(y_var, X=X_var, fh=fh) + + forecasters_list.append( + { + "variable": variable, + "forecaster": tuner.best_forecaster_, + "best_params": tuner.best_params_, + "best_score": tuner.best_score_, + } + ) + + # Store as DataFrame + self.forecasters_ = pd.DataFrame(forecasters_list) + # Set a representative best_forecaster_ + self.best_forecaster_ = forecasters_list[0]["forecaster"] + self.best_params_ = forecasters_list[0]["best_params"] + self.best_score_ = forecasters_list[0]["best_score"] + else: + # If broadcasting was requested but not applicable, fall back to regular fit + return self._fit(y, X, fh) + + self.best_index_ = 0 + self.cv_results_ = {"forecasters": self.forecasters_} + self.refit_time_ = 0.0 + + return self + def _update(self, y, X=None, update_params=True): """Update time series to incremental training data. From 74ed4a060d495ff36b67587db19be011ce59999d Mon Sep 17 00:00:00 2001 From: Anirudh Sengar Date: Sun, 16 Nov 2025 13:24:08 +0400 Subject: [PATCH 2/6] Improve sktime ForecastingOptCV broadcasting Signed-off-by: Anirudh Sengar --- .../integrations/sktime/_forecasting.py | 62 +++++++++++++------ .../sktime/tests/test_sktime_estimators.py | 37 ++++++++++- 2 files changed, 80 insertions(+), 19 deletions(-) diff --git a/src/hyperactive/integrations/sktime/_forecasting.py b/src/hyperactive/integrations/sktime/_forecasting.py index 85927262..c6d01aa9 100644 --- a/src/hyperactive/integrations/sktime/_forecasting.py +++ b/src/hyperactive/integrations/sktime/_forecasting.py @@ -267,18 +267,23 @@ def _fit(self, y, X, fh): ------- self : returns an instance of self. """ + # Handle broadcasting options when requested and applicable + if self.tune_by_instance or self.tune_by_variable: + broadcasted = self._fit_with_broadcasting(y, X, fh) + if broadcasted: + return self + + return self._fit_single(y, X, fh) + + def _fit_single(self, y, X, fh): + """Run the core fit logic without broadcasting shortcuts.""" import time from sktime.utils.validation.forecasting import check_scoring - # Handle broadcasting options - if self.tune_by_instance or self.tune_by_variable: - return self._fit_with_broadcasting(y, X, fh) - forecaster = self.forecaster.clone() scoring = check_scoring(self.scoring, obj=self) - # scoring_name = f"test_{scoring.name}" self.scorer_ = scoring # Count number of CV splits @@ -379,7 +384,8 @@ def _fit_with_broadcasting(self, y, X, fh): Returns ------- - self : returns an instance of self. + bool + True if broadcasting was performed, False otherwise. """ import pandas as pd from sktime.utils.validation.forecasting import check_scoring @@ -393,9 +399,12 @@ def _fit_with_broadcasting(self, y, X, fh): is_multivariate = isinstance(y, pd.DataFrame) and len(y.columns) > 1 forecasters_list = [] + refit_times = [] + broadcast_handled = False # Handle tune_by_instance for Panel/Hierarchical data if self.tune_by_instance and is_panel: + broadcast_handled = True # Get unique instances if hasattr(y.index, "levels"): instances = y.index.get_level_values(0).unique() @@ -435,18 +444,17 @@ def _fit_with_broadcasting(self, y, X, fh): "forecaster": tuner.best_forecaster_, "best_params": tuner.best_params_, "best_score": tuner.best_score_, + "refit_time": getattr(tuner, "refit_time_", 0.0), } ) + refit_times.append(getattr(tuner, "refit_time_", 0.0)) # Store as DataFrame self.forecasters_ = pd.DataFrame(forecasters_list) - # Set a representative best_forecaster_ - self.best_forecaster_ = forecasters_list[0]["forecaster"] - self.best_params_ = forecasters_list[0]["best_params"] - self.best_score_ = forecasters_list[0]["best_score"] # Handle tune_by_variable for multivariate data elif self.tune_by_variable and is_multivariate: + broadcast_handled = True variables = y.columns for variable in variables: @@ -478,24 +486,42 @@ def _fit_with_broadcasting(self, y, X, fh): "forecaster": tuner.best_forecaster_, "best_params": tuner.best_params_, "best_score": tuner.best_score_, + "refit_time": getattr(tuner, "refit_time_", 0.0), } ) + refit_times.append(getattr(tuner, "refit_time_", 0.0)) # Store as DataFrame self.forecasters_ = pd.DataFrame(forecasters_list) - # Set a representative best_forecaster_ - self.best_forecaster_ = forecasters_list[0]["forecaster"] - self.best_params_ = forecasters_list[0]["best_params"] - self.best_score_ = forecasters_list[0]["best_score"] else: # If broadcasting was requested but not applicable, fall back to regular fit - return self._fit(y, X, fh) + return False + + if not forecasters_list: + raise RuntimeError( + "Broadcasting was requested but no forecasters were fitted." + ) + + # Determine best forecaster based on available scores + scores = [entry.get("best_score") for entry in forecasters_list] + score_values = [np.inf if score is None else score for score in scores] + best_index = int(np.argmin(score_values)) + best_entry = forecasters_list[best_index] + + self.best_forecaster_ = best_entry["forecaster"] + self.best_params_ = best_entry["best_params"] + self.best_score_ = best_entry.get("best_score") + self.best_index_ = best_index - self.best_index_ = 0 self.cv_results_ = {"forecasters": self.forecasters_} - self.refit_time_ = 0.0 - return self + # Aggregate refit times from each cloned tuner + if self.refit: + self.refit_time_ = float(np.sum(refit_times)) + else: + self.refit_time_ = 0.0 + + return broadcast_handled def _update(self, y, X=None, update_params=True): """Update time series to incremental training data. diff --git a/src/hyperactive/integrations/sktime/tests/test_sktime_estimators.py b/src/hyperactive/integrations/sktime/tests/test_sktime_estimators.py index eeed78d3..0001fc9d 100644 --- a/src/hyperactive/integrations/sktime/tests/test_sktime_estimators.py +++ b/src/hyperactive/integrations/sktime/tests/test_sktime_estimators.py @@ -4,13 +4,19 @@ import pytest from skbase.utils.dependencies import _check_soft_dependencies -if _check_soft_dependencies("sktime", severity="none"): +SKTIME_AVAILABLE = _check_soft_dependencies("sktime", severity="none") + +if SKTIME_AVAILABLE: from hyperactive.integrations.sktime import ForecastingOptCV, TSCOptCV EST_TO_TEST = [ForecastingOptCV, TSCOptCV] else: EST_TO_TEST = [] +pytestmark = pytest.mark.skipif( + not SKTIME_AVAILABLE, reason="sktime soft dependency not available" +) + @pytest.mark.parametrize("estimator", EST_TO_TEST) def test_sktime_estimator(estimator): @@ -20,3 +26,32 @@ def test_sktime_estimator(estimator): check_estimator(estimator, raise_exceptions=True) # The above line collects all API conformance tests in sktime and runs them. # It will raise an error if the estimator is not API conformant. + + +def test_tune_by_instance_fallback_when_not_panel(): + """Ensure tune_by_instance gracefully falls back for univariate data.""" + import numpy as np + import pandas as pd + from sktime.forecasting.naive import NaiveForecaster + from sktime.split import SingleWindowSplitter + + from hyperactive.opt.gridsearch import GridSearchSk + + y = pd.Series(np.arange(24, dtype=float)) + fh = [1] + splitter = SingleWindowSplitter(fh=fh, window_length=12) + optimizer = GridSearchSk(param_grid={"window_length": [2, 4]}) + + tuner = ForecastingOptCV( + forecaster=NaiveForecaster(strategy="last"), + optimizer=optimizer, + cv=splitter, + tune_by_instance=True, + ) + + tuned = tuner.fit(y, fh=fh) + + assert isinstance(tuned.best_params_, dict) + assert tuned.best_index_ == 0 + assert not hasattr(tuned, "forecasters_") + assert tuned.refit_time_ >= 0.0 From 4fb6fa299457029cec80d94cab728dba15039325 Mon Sep 17 00:00:00 2001 From: Anirudh Sengar Date: Mon, 24 Nov 2025 11:06:29 +0400 Subject: [PATCH 3/6] Revert re-implementation Signed-off-by: Anirudh Sengar --- .../integrations/sktime/_forecasting.py | 216 +----------------- .../sktime/tests/test_sktime_estimators.py | 37 +-- 2 files changed, 3 insertions(+), 250 deletions(-) diff --git a/src/hyperactive/integrations/sktime/_forecasting.py b/src/hyperactive/integrations/sktime/_forecasting.py index c6d01aa9..82f56b74 100644 --- a/src/hyperactive/integrations/sktime/_forecasting.py +++ b/src/hyperactive/integrations/sktime/_forecasting.py @@ -151,24 +151,6 @@ class ForecastingOptCV(_DelegatedForecaster): - "logger_name": str, default="ray"; name of the logger to use. - "mute_warnings": bool, default=False; if True, suppresses warnings - tune_by_instance : bool, optional (default=False) - Whether to tune parameter by each time series instance separately, - in case of Panel or Hierarchical data passed to the tuning estimator. - Only applies if time series passed are Panel or Hierarchical. - If True, clones of the forecaster will be fit to each instance separately, - and are available in fields of the ``forecasters_`` attribute. - Has the same effect as applying ForecastByLevel wrapper to self. - If False, the same best parameter is selected for all instances. - - tune_by_variable : bool, optional (default=False) - Whether to tune parameter by each time series variable separately, - in case of multivariate data passed to the tuning estimator. - Only applies if time series passed are strictly multivariate. - If True, clones of the forecaster will be fit to each variable separately, - and are available in fields of the ``forecasters_`` attribute. - Has the same effect as applying ColumnEnsembleForecaster wrapper to self. - If False, the same best parameter is selected for all variables. - Example ------- Any available tuning engine from hyperactive can be used, for example: @@ -233,8 +215,6 @@ def __init__( cv_X=None, backend=None, backend_params=None, - tune_by_instance=False, - tune_by_variable=False, ): self.forecaster = forecaster self.optimizer = optimizer @@ -247,8 +227,6 @@ def __init__( self.cv_X = cv_X self.backend = backend self.backend_params = backend_params - self.tune_by_instance = tune_by_instance - self.tune_by_variable = tune_by_variable super().__init__() def _fit(self, y, X, fh): @@ -267,27 +245,12 @@ def _fit(self, y, X, fh): ------- self : returns an instance of self. """ - # Handle broadcasting options when requested and applicable - if self.tune_by_instance or self.tune_by_variable: - broadcasted = self._fit_with_broadcasting(y, X, fh) - if broadcasted: - return self - - return self._fit_single(y, X, fh) - - def _fit_single(self, y, X, fh): - """Run the core fit logic without broadcasting shortcuts.""" - import time - from sktime.utils.validation.forecasting import check_scoring forecaster = self.forecaster.clone() scoring = check_scoring(self.scoring, obj=self) - self.scorer_ = scoring - - # Count number of CV splits - self.n_splits_ = self.cv.get_n_splits(y) + # scoring_name = f"test_{scoring.name}" experiment = SktimeForecastingExperiment( forecaster=forecaster, @@ -309,31 +272,9 @@ def _fit_single(self, y, X, fh): self.best_params_ = best_params self.best_forecaster_ = forecaster.set_params(**best_params) - # Store cv_results from optimizer if available - if hasattr(optimizer, "results"): - self.cv_results_ = optimizer.results - else: - # Create a basic cv_results_ dict - self.cv_results_ = {"best_params": best_params} - - # Store best_index_ and best_score_ if available from optimizer - if hasattr(optimizer, "best_score"): - self.best_score_ = optimizer.best_score - else: - # Calculate best score by evaluating best params - best_score, _ = experiment.score(best_params) - self.best_score_ = best_score - - self.best_index_ = 0 # For single best result - - # Refit model with best parameters and track time. + # Refit model with best parameters. if self.refit: - start_time = time.time() self.best_forecaster_.fit(y=y, X=X, fh=fh) - end_time = time.time() - self.refit_time_ = end_time - start_time - else: - self.refit_time_ = 0.0 return self @@ -370,159 +311,6 @@ def _predict(self, fh, X): ) return super()._predict(fh=fh, X=X) - def _fit_with_broadcasting(self, y, X, fh): - """Fit with broadcasting options (tune_by_instance or tune_by_variable). - - Parameters - ---------- - y : pd.Series or pd.DataFrame - Target time series to which to fit the forecaster. - X : pd.DataFrame, optional (default=None) - Exogenous variables - fh : int, list or np.array, optional (default=None) - The forecasters horizon with the steps ahead to to predict. - - Returns - ------- - bool - True if broadcasting was performed, False otherwise. - """ - import pandas as pd - from sktime.utils.validation.forecasting import check_scoring - - scoring = check_scoring(self.scoring, obj=self) - self.scorer_ = scoring - self.n_splits_ = self.cv.get_n_splits(y) - - # Determine if we need to broadcast - is_panel = "MultiIndex" in str(type(getattr(y, "index", None))) - is_multivariate = isinstance(y, pd.DataFrame) and len(y.columns) > 1 - - forecasters_list = [] - refit_times = [] - broadcast_handled = False - - # Handle tune_by_instance for Panel/Hierarchical data - if self.tune_by_instance and is_panel: - broadcast_handled = True - # Get unique instances - if hasattr(y.index, "levels"): - instances = y.index.get_level_values(0).unique() - else: - instances = [0] # Single instance fallback - - for instance in instances: - # Extract instance data - if hasattr(y.index, "levels"): - y_instance = y.loc[instance] - X_instance = X.loc[instance] if X is not None else None - else: - y_instance = y - X_instance = X - - # Fit for this instance - tuner = type(self)( - forecaster=self.forecaster.clone(), - optimizer=self.optimizer.clone(), - cv=self.cv, - strategy=self.strategy, - update_behaviour=self.update_behaviour, - scoring=self.scoring, - refit=self.refit, - error_score=self.error_score, - cv_X=self.cv_X, - backend=self.backend, - backend_params=self.backend_params, - tune_by_instance=False, - tune_by_variable=self.tune_by_variable, - ) - tuner.fit(y_instance, X=X_instance, fh=fh) - - forecasters_list.append( - { - "instance": instance, - "forecaster": tuner.best_forecaster_, - "best_params": tuner.best_params_, - "best_score": tuner.best_score_, - "refit_time": getattr(tuner, "refit_time_", 0.0), - } - ) - refit_times.append(getattr(tuner, "refit_time_", 0.0)) - - # Store as DataFrame - self.forecasters_ = pd.DataFrame(forecasters_list) - - # Handle tune_by_variable for multivariate data - elif self.tune_by_variable and is_multivariate: - broadcast_handled = True - variables = y.columns - - for variable in variables: - # Extract variable data - y_var = y[[variable]] - X_var = X if X is not None else None - - # Fit for this variable - tuner = type(self)( - forecaster=self.forecaster.clone(), - optimizer=self.optimizer.clone(), - cv=self.cv, - strategy=self.strategy, - update_behaviour=self.update_behaviour, - scoring=self.scoring, - refit=self.refit, - error_score=self.error_score, - cv_X=self.cv_X, - backend=self.backend, - backend_params=self.backend_params, - tune_by_instance=False, - tune_by_variable=False, - ) - tuner.fit(y_var, X=X_var, fh=fh) - - forecasters_list.append( - { - "variable": variable, - "forecaster": tuner.best_forecaster_, - "best_params": tuner.best_params_, - "best_score": tuner.best_score_, - "refit_time": getattr(tuner, "refit_time_", 0.0), - } - ) - refit_times.append(getattr(tuner, "refit_time_", 0.0)) - - # Store as DataFrame - self.forecasters_ = pd.DataFrame(forecasters_list) - else: - # If broadcasting was requested but not applicable, fall back to regular fit - return False - - if not forecasters_list: - raise RuntimeError( - "Broadcasting was requested but no forecasters were fitted." - ) - - # Determine best forecaster based on available scores - scores = [entry.get("best_score") for entry in forecasters_list] - score_values = [np.inf if score is None else score for score in scores] - best_index = int(np.argmin(score_values)) - best_entry = forecasters_list[best_index] - - self.best_forecaster_ = best_entry["forecaster"] - self.best_params_ = best_entry["best_params"] - self.best_score_ = best_entry.get("best_score") - self.best_index_ = best_index - - self.cv_results_ = {"forecasters": self.forecasters_} - - # Aggregate refit times from each cloned tuner - if self.refit: - self.refit_time_ = float(np.sum(refit_times)) - else: - self.refit_time_ = 0.0 - - return broadcast_handled - def _update(self, y, X=None, update_params=True): """Update time series to incremental training data. diff --git a/src/hyperactive/integrations/sktime/tests/test_sktime_estimators.py b/src/hyperactive/integrations/sktime/tests/test_sktime_estimators.py index 0001fc9d..eeed78d3 100644 --- a/src/hyperactive/integrations/sktime/tests/test_sktime_estimators.py +++ b/src/hyperactive/integrations/sktime/tests/test_sktime_estimators.py @@ -4,19 +4,13 @@ import pytest from skbase.utils.dependencies import _check_soft_dependencies -SKTIME_AVAILABLE = _check_soft_dependencies("sktime", severity="none") - -if SKTIME_AVAILABLE: +if _check_soft_dependencies("sktime", severity="none"): from hyperactive.integrations.sktime import ForecastingOptCV, TSCOptCV EST_TO_TEST = [ForecastingOptCV, TSCOptCV] else: EST_TO_TEST = [] -pytestmark = pytest.mark.skipif( - not SKTIME_AVAILABLE, reason="sktime soft dependency not available" -) - @pytest.mark.parametrize("estimator", EST_TO_TEST) def test_sktime_estimator(estimator): @@ -26,32 +20,3 @@ def test_sktime_estimator(estimator): check_estimator(estimator, raise_exceptions=True) # The above line collects all API conformance tests in sktime and runs them. # It will raise an error if the estimator is not API conformant. - - -def test_tune_by_instance_fallback_when_not_panel(): - """Ensure tune_by_instance gracefully falls back for univariate data.""" - import numpy as np - import pandas as pd - from sktime.forecasting.naive import NaiveForecaster - from sktime.split import SingleWindowSplitter - - from hyperactive.opt.gridsearch import GridSearchSk - - y = pd.Series(np.arange(24, dtype=float)) - fh = [1] - splitter = SingleWindowSplitter(fh=fh, window_length=12) - optimizer = GridSearchSk(param_grid={"window_length": [2, 4]}) - - tuner = ForecastingOptCV( - forecaster=NaiveForecaster(strategy="last"), - optimizer=optimizer, - cv=splitter, - tune_by_instance=True, - ) - - tuned = tuner.fit(y, fh=fh) - - assert isinstance(tuned.best_params_, dict) - assert tuned.best_index_ == 0 - assert not hasattr(tuned, "forecasters_") - assert tuned.refit_time_ >= 0.0 From ea9d52af9bb8532e9681c3e2613d7abe3269bdfd Mon Sep 17 00:00:00 2001 From: Anirudh Sengar Date: Mon, 24 Nov 2025 11:59:27 +0400 Subject: [PATCH 4/6] Use existing implementation Signed-off-by: Anirudh Sengar --- examples/integrations/README.md | 8 ++ .../integrations/sktime/_forecasting.py | 82 ++++++++++++++++++- .../sktime/tests/test_sktime_estimators.py | 51 ++++++++++++ 3 files changed, 140 insertions(+), 1 deletion(-) diff --git a/examples/integrations/README.md b/examples/integrations/README.md index d049161b..f3fc11dd 100644 --- a/examples/integrations/README.md +++ b/examples/integrations/README.md @@ -99,6 +99,14 @@ y_pred = tuned_naive.predict() # 4. obtaining best parameters and best forecaster best_params = tuned_naive.best_params_ best_forecaster = tuned_naive.best_forecaster_ + +# Broadcasting options & diagnostics + +``ForecastingOptCV`` mirrors ``ForecastingGridSearchCV`` by exposing +``tune_by_instance`` and ``tune_by_variable`` flags for automatic sktime +broadcasting over panel or multivariate data. After ``fit`` completes you also +gain access to ``best_score_``, ``cv_results_`` (per-fold backtesting results), +``n_splits_``, ``scorer_``, and ``refit_time_`` for downstream inspection. ``` ## Integration with sktime - time series classification diff --git a/src/hyperactive/integrations/sktime/_forecasting.py b/src/hyperactive/integrations/sktime/_forecasting.py index 82f56b74..7565177e 100644 --- a/src/hyperactive/integrations/sktime/_forecasting.py +++ b/src/hyperactive/integrations/sktime/_forecasting.py @@ -1,13 +1,20 @@ # copyright: hyperactive developers, MIT License (see LICENSE file) +import time + import numpy as np from skbase.utils.dependencies import _check_soft_dependencies -if _check_soft_dependencies("sktime", severity="none"): +_HAS_SKTIME = _check_soft_dependencies("sktime", severity="none") + +if _HAS_SKTIME: + from sktime.datatypes import mtype_to_scitype from sktime.forecasting.base._delegate import _DelegatedForecaster else: from skbase.base import BaseEstimator as _DelegatedForecaster + mtype_to_scitype = None + from hyperactive.experiment.integrations.sktime_forecasting import ( SktimeForecastingExperiment, ) @@ -151,6 +158,15 @@ class ForecastingOptCV(_DelegatedForecaster): - "logger_name": str, default="ray"; name of the logger to use. - "mute_warnings": bool, default=False; if True, suppresses warnings + tune_by_instance : bool, optional (default=False) + Whether to tune parameters separately for each time series instance when + panel or hierarchical data is passed. Mirrors ``ForecastingGridSearchCV`` + semantics by delegating broadcasting to sktime's vectorization logic. + tune_by_variable : bool, optional (default=False) + Whether to tune parameters per variable for strictly multivariate series. + When enabled, only univariate targets are accepted and internal + broadcasting is handled by sktime. + Example ------- Any available tuning engine from hyperactive can be used, for example: @@ -215,6 +231,8 @@ def __init__( cv_X=None, backend=None, backend_params=None, + tune_by_instance=False, + tune_by_variable=False, ): self.forecaster = forecaster self.optimizer = optimizer @@ -227,8 +245,20 @@ def __init__( self.cv_X = cv_X self.backend = backend self.backend_params = backend_params + self.tune_by_instance = tune_by_instance + self.tune_by_variable = tune_by_variable super().__init__() + if _HAS_SKTIME: + self._set_delegated_tags(delegate=self.forecaster) + tags_to_clone = ["y_inner_mtype", "X_inner_mtype"] + self.clone_tags(self.forecaster, tags_to_clone) + self._extend_to_all_scitypes("y_inner_mtype") + self._extend_to_all_scitypes("X_inner_mtype") + + if self.tune_by_variable: + self.set_tags(**{"scitype:y": "univariate"}) + def _fit(self, y, X, fh): """Fit to training data. @@ -250,6 +280,16 @@ def _fit(self, y, X, fh): forecaster = self.forecaster.clone() scoring = check_scoring(self.scoring, obj=self) + self.scorer_ = scoring + get_n_splits = getattr(self.cv, "get_n_splits", None) + if callable(get_n_splits): + try: + self.n_splits_ = get_n_splits(y) + except TypeError: + # fallback for splitters that expect no args + self.n_splits_ = get_n_splits() + else: + self.n_splits_ = None # scoring_name = f"test_{scoring.name}" experiment = SktimeForecastingExperiment( @@ -270,14 +310,54 @@ def _fit(self, y, X, fh): best_params = optimizer.solve() self.best_params_ = best_params + self.best_index_ = getattr(optimizer, "best_index_", None) + raw_best_score, best_metadata = experiment.evaluate(best_params) + self.best_score_ = float(raw_best_score) + results_table = best_metadata.get("results") if best_metadata else None + if results_table is not None: + try: + self.cv_results_ = results_table.copy() + except AttributeError: + self.cv_results_ = results_table + else: + self.cv_results_ = None self.best_forecaster_ = forecaster.set_params(**best_params) # Refit model with best parameters. if self.refit: + refit_start = time.perf_counter() self.best_forecaster_.fit(y=y, X=X, fh=fh) + self.refit_time_ = time.perf_counter() - refit_start + else: + self.refit_time_ = 0.0 return self + def _extend_to_all_scitypes(self, tagname): + """Ensure mtypes for all scitypes are present in tag ``tagname``.""" + if not _HAS_SKTIME: + return + + tagval = self.get_tag(tagname) + if not isinstance(tagval, list): + tagval = [tagval] + scitypes = mtype_to_scitype(tagval, return_unique=True) + + if "Series" not in scitypes: + tagval = tagval + ["pd.DataFrame"] + elif "pd.Series" in tagval and "pd.DataFrame" not in tagval: + tagval = ["pd.DataFrame"] + tagval + + if "Panel" not in scitypes: + tagval = tagval + ["pd-multiindex"] + if "Hierarchical" not in scitypes: + tagval = tagval + ["pd_multiindex_hier"] + + if self.tune_by_instance: + tagval = [x for x in tagval if mtype_to_scitype(x) == "Series"] + + self.set_tags(**{tagname: tagval}) + def _predict(self, fh, X): """Forecast time series at future horizon. diff --git a/src/hyperactive/integrations/sktime/tests/test_sktime_estimators.py b/src/hyperactive/integrations/sktime/tests/test_sktime_estimators.py index eeed78d3..a421b32e 100644 --- a/src/hyperactive/integrations/sktime/tests/test_sktime_estimators.py +++ b/src/hyperactive/integrations/sktime/tests/test_sktime_estimators.py @@ -1,11 +1,18 @@ """Integration tests for sktime tuners.""" # copyright: hyperactive developers, MIT License (see LICENSE file) +import numpy as np import pytest from skbase.utils.dependencies import _check_soft_dependencies if _check_soft_dependencies("sktime", severity="none"): + from sktime.datasets import load_airline + from sktime.forecasting.naive import NaiveForecaster + from sktime.performance_metrics.forecasting import MeanAbsolutePercentageError + from sktime.split import ExpandingWindowSplitter + from hyperactive.integrations.sktime import ForecastingOptCV, TSCOptCV + from hyperactive.opt import GridSearchSk EST_TO_TEST = [ForecastingOptCV, TSCOptCV] else: @@ -20,3 +27,47 @@ def test_sktime_estimator(estimator): check_estimator(estimator, raise_exceptions=True) # The above line collects all API conformance tests in sktime and runs them. # It will raise an error if the estimator is not API conformant. + + +@pytest.mark.skipif(not EST_TO_TEST, reason="sktime not installed") +def test_forecasting_opt_cv_sets_attributes(): + """ForecastingOptCV exposes useful attributes after fitting.""" + fh = [1, 2] + y = load_airline().iloc[:36] + cv = ExpandingWindowSplitter(initial_window=24, step_length=6, fh=fh) + optimizer = GridSearchSk(param_grid={"strategy": ["last", "mean"]}) + + tuner = ForecastingOptCV( + forecaster=NaiveForecaster(), + optimizer=optimizer, + cv=cv, + scoring=MeanAbsolutePercentageError(symmetric=True), + backend="None", + ) + + tuner.fit(y=y, fh=fh) + + assert tuner.scorer_.name == "MeanAbsolutePercentageError" + assert tuner.n_splits_ == cv.get_n_splits(y) + assert tuner.refit_time_ >= 0 + + metric_col = "test_" + tuner.scorer_.name + assert metric_col in tuner.cv_results_.columns + assert np.isclose(tuner.best_score_, tuner.cv_results_[metric_col].mean()) + + +@pytest.mark.skipif(not EST_TO_TEST, reason="sktime not installed") +def test_forecasting_opt_cv_tune_by_flags(): + """Tune-by flags should adjust estimator tags.""" + tuner = ForecastingOptCV( + forecaster=NaiveForecaster(), + optimizer=GridSearchSk(param_grid={"strategy": ["last"]}), + cv=ExpandingWindowSplitter(initial_window=5, step_length=1, fh=[1]), + tune_by_instance=True, + tune_by_variable=True, + ) + + assert tuner.get_tag("scitype:y") == "univariate" + y_mtypes = tuner.get_tag("y_inner_mtype") + assert "pd-multiindex" not in y_mtypes + assert "pd_multiindex_hier" not in y_mtypes From 89c01b48ecd094084b6140fb8e61051727573fde Mon Sep 17 00:00:00 2001 From: Anirudh Sengar Date: Wed, 26 Nov 2025 11:36:34 +0400 Subject: [PATCH 5/6] minor fixes Signed-off-by: Anirudh Sengar --- src/hyperactive/integrations/sktime/_forecasting.py | 6 +----- .../sktime/tests/test_sktime_estimators.py | 10 ++++++++-- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/src/hyperactive/integrations/sktime/_forecasting.py b/src/hyperactive/integrations/sktime/_forecasting.py index 7565177e..bbcbf8f6 100644 --- a/src/hyperactive/integrations/sktime/_forecasting.py +++ b/src/hyperactive/integrations/sktime/_forecasting.py @@ -8,13 +8,10 @@ _HAS_SKTIME = _check_soft_dependencies("sktime", severity="none") if _HAS_SKTIME: - from sktime.datatypes import mtype_to_scitype from sktime.forecasting.base._delegate import _DelegatedForecaster else: from skbase.base import BaseEstimator as _DelegatedForecaster - mtype_to_scitype = None - from hyperactive.experiment.integrations.sktime_forecasting import ( SktimeForecastingExperiment, ) @@ -335,8 +332,7 @@ def _fit(self, y, X, fh): def _extend_to_all_scitypes(self, tagname): """Ensure mtypes for all scitypes are present in tag ``tagname``.""" - if not _HAS_SKTIME: - return + from sktime.datatypes import mtype_to_scitype tagval = self.get_tag(tagname) if not isinstance(tagval, list): diff --git a/src/hyperactive/integrations/sktime/tests/test_sktime_estimators.py b/src/hyperactive/integrations/sktime/tests/test_sktime_estimators.py index a421b32e..3444ae1d 100644 --- a/src/hyperactive/integrations/sktime/tests/test_sktime_estimators.py +++ b/src/hyperactive/integrations/sktime/tests/test_sktime_estimators.py @@ -29,7 +29,10 @@ def test_sktime_estimator(estimator): # It will raise an error if the estimator is not API conformant. -@pytest.mark.skipif(not EST_TO_TEST, reason="sktime not installed") +@pytest.mark.skipif( + not _check_soft_dependencies("sktime", severity="none"), + reason="sktime not installed", +) def test_forecasting_opt_cv_sets_attributes(): """ForecastingOptCV exposes useful attributes after fitting.""" fh = [1, 2] @@ -56,7 +59,10 @@ def test_forecasting_opt_cv_sets_attributes(): assert np.isclose(tuner.best_score_, tuner.cv_results_[metric_col].mean()) -@pytest.mark.skipif(not EST_TO_TEST, reason="sktime not installed") +@pytest.mark.skipif( + not _check_soft_dependencies("sktime", severity="none"), + reason="sktime not installed", +) def test_forecasting_opt_cv_tune_by_flags(): """Tune-by flags should adjust estimator tags.""" tuner = ForecastingOptCV( From c012b5111b7f2f82de1e8e0a8364bb60d9a84459 Mon Sep 17 00:00:00 2001 From: Anirudh Sengar Date: Fri, 28 Nov 2025 17:39:59 +0400 Subject: [PATCH 6/6] Docstring changes --- examples/integrations/README.md | 8 ------- .../integrations/sktime/_forecasting.py | 21 +++++++++++++++++++ 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/examples/integrations/README.md b/examples/integrations/README.md index f3fc11dd..d049161b 100644 --- a/examples/integrations/README.md +++ b/examples/integrations/README.md @@ -99,14 +99,6 @@ y_pred = tuned_naive.predict() # 4. obtaining best parameters and best forecaster best_params = tuned_naive.best_params_ best_forecaster = tuned_naive.best_forecaster_ - -# Broadcasting options & diagnostics - -``ForecastingOptCV`` mirrors ``ForecastingGridSearchCV`` by exposing -``tune_by_instance`` and ``tune_by_variable`` flags for automatic sktime -broadcasting over panel or multivariate data. After ``fit`` completes you also -gain access to ``best_score_``, ``cv_results_`` (per-fold backtesting results), -``n_splits_``, ``scorer_``, and ``refit_time_`` for downstream inspection. ``` ## Integration with sktime - time series classification diff --git a/src/hyperactive/integrations/sktime/_forecasting.py b/src/hyperactive/integrations/sktime/_forecasting.py index bbcbf8f6..5f1be1a9 100644 --- a/src/hyperactive/integrations/sktime/_forecasting.py +++ b/src/hyperactive/integrations/sktime/_forecasting.py @@ -202,6 +202,27 @@ class ForecastingOptCV(_DelegatedForecaster): 3. obtaining best parameters and best forecaster >>> best_params = tuned_naive.best_params_ >>> best_forecaster = tuned_naive.best_forecaster_ + + Attributes + ---------- + best_params_ : dict + Best parameter values returned by the optimizer. + best_forecaster_ : estimator + Fitted estimator with the best parameters. + best_score_ : float + Score of the best model (according to ``scoring``, after hyperactive's + "higher-is-better" normalization). + best_index_ : int or None + Index of the best parameter combination if the optimizer exposes it. + scorer_ : BaseMetric + The scoring object resolved by ``check_scoring``. + n_splits_ : int or None + Number of splits produced by ``cv`` (if the splitter exposes it). + cv_results_ : pd.DataFrame or None + Evaluation table returned by ``sktime.evaluate`` for the winning parameters. + (Full per-candidate traces require the optimizer to provide detailed metadata.) + refit_time_ : float + Time in seconds to refit the best forecaster when ``refit=True``. """ _tags = {