diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 5476c0cf..9e495401 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -108,6 +108,15 @@ jobs: steps: - uses: actions/checkout@v4 + - name: Free Disk Space (Ubuntu) + if: runner.os == 'Linux' + run: | + sudo rm -rf /usr/share/dotnet + sudo rm -rf /usr/local/lib/android + sudo rm -rf /opt/ghc + sudo rm -rf /opt/hostedtoolcache/CodeQL + sudo docker image prune --all --force + - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v5 with: @@ -167,6 +176,15 @@ jobs: with: fetch-depth: 0 + - name: Free Disk Space (Ubuntu) + if: runner.os == 'Linux' + run: | + sudo rm -rf /usr/share/dotnet + sudo rm -rf /usr/local/lib/android + sudo rm -rf /opt/ghc + sudo rm -rf /opt/hostedtoolcache/CodeQL + sudo docker image prune --all --force + - name: Set up Python 3.11 uses: actions/setup-python@v5 with: diff --git a/examples/skforecast/skforecast_example.py b/examples/skforecast/skforecast_example.py new file mode 100644 index 00000000..9a39090b --- /dev/null +++ b/examples/skforecast/skforecast_example.py @@ -0,0 +1,63 @@ +""" +Skforecast Integration Example - Hyperparameter Tuning for Time Series Forecasting + +This example demonstrates how to use Hyperactive to tune hyperparameters of a +skforecast ForecasterRecursive model. It uses the SkforecastOptCV class which +provides a familiar sklearn-like API for integrating skforecast models with +Hyperactive's optimization algorithms. + +Characteristics: +- Integration with skforecast's backtesting functionality +- Tuning of regressor hyperparameters (e.g., RandomForestRegressor) +- Uses HillClimbing optimizer (can be swapped for any Hyperactive optimizer) +- Time series cross-validation via backtesting +""" + +import numpy as np +import pandas as pd +from skforecast.recursive import ForecasterRecursive +from sklearn.ensemble import RandomForestRegressor +from hyperactive.opt import HillClimbing +from hyperactive.integrations.skforecast import SkforecastOptCV + +# Generate synthetic data +data = pd.Series( + np.random.randn(100), + index=pd.date_range(start="2020-01-01", periods=100, freq="D"), + name="y", +) + +# Define forecaster +forecaster = ForecasterRecursive( + regressor=RandomForestRegressor(random_state=123), lags=5 +) + +# Define optimizer +optimizer = HillClimbing( + search_space={ + "n_estimators": list(range(10, 100, 10)), + "max_depth": list(range(2, 10)), + }, + n_iter=10, +) + +# Define SkforecastOptCV +opt_cv = SkforecastOptCV( + forecaster=forecaster, + optimizer=optimizer, + steps=5, + metric="mean_squared_error", + initial_train_size=50, + verbose=True, +) + +# Fit +print("Fitting...") +opt_cv.fit(y=data) + +# Predict +print("Predicting...") +predictions = opt_cv.predict(steps=5) +print("Predictions:") +print(predictions) +print("Best params:", opt_cv.best_params_) diff --git a/pyproject.toml b/pyproject.toml index a11b720e..1f654d76 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -52,6 +52,16 @@ sklearn-integration = [ sktime-integration = [ "skpro", 'sktime; python_version < "3.14"', + 'skforecast; python_version < "3.14"', +] +skforecast-integration = [ + 'skforecast; python_version < "3.14"', +] +integrations = [ + "scikit-learn <1.8.0", + "skpro", + 'sktime; python_version < "3.14"', + 'skforecast; python_version < "3.14"', ] build = [ "setuptools", @@ -77,7 +87,6 @@ all_extras = [ "lightning", ] - [project.urls] "Homepage" = "https://github.com/SimonBlanke/Hyperactive" "Bug Reports" = "https://github.com/SimonBlanke/Hyperactive/issues" diff --git a/src/hyperactive/experiment/integrations/__init__.py b/src/hyperactive/experiment/integrations/__init__.py index c302e25a..e7e5659b 100644 --- a/src/hyperactive/experiment/integrations/__init__.py +++ b/src/hyperactive/experiment/integrations/__init__.py @@ -1,6 +1,9 @@ """Integrations with packages for tuning.""" # copyright: hyperactive developers, MIT License (see LICENSE file) +from hyperactive.experiment.integrations.skforecast_forecasting import ( + SkforecastExperiment, +) from hyperactive.experiment.integrations.sklearn_cv import SklearnCvExperiment from hyperactive.experiment.integrations.skpro_probareg import ( SkproProbaRegExperiment, @@ -20,5 +23,6 @@ "SkproProbaRegExperiment", "SktimeClassificationExperiment", "SktimeForecastingExperiment", + "SkforecastExperiment", "TorchExperiment", ] diff --git a/src/hyperactive/experiment/integrations/skforecast_forecasting.py b/src/hyperactive/experiment/integrations/skforecast_forecasting.py new file mode 100644 index 00000000..e6de8fb5 --- /dev/null +++ b/src/hyperactive/experiment/integrations/skforecast_forecasting.py @@ -0,0 +1,230 @@ +"""Experiment adapter for skforecast backtesting experiments.""" +# copyright: hyperactive developers, MIT License (see LICENSE file) + +import copy + +from hyperactive.base import BaseExperiment + + +class SkforecastExperiment(BaseExperiment): + """Experiment adapter for skforecast backtesting experiments. + + This class is used to perform backtesting experiments using a given + skforecast forecaster. It allows for hyperparameter tuning and evaluation of + the model's performance. + + Parameters + ---------- + forecaster : skforecast forecaster + skforecast forecaster to benchmark. + + y : pandas Series + Target time series used in the evaluation experiment. + + exog : pandas Series or DataFrame, default=None + Exogenous variable/s used in the evaluation experiment. + + steps : int + Number of steps to predict. + + metric : str or callable + Metric used to quantify the goodness of fit of the model. + If string, it must be a metric name allowed by skforecast + (e.g., 'mean_squared_error'). + If callable, it must take (y_true, y_pred) and return a float. + + initial_train_size : int + Number of samples in the initial training set. + + refit : bool, default=False + Whether to re-fit the forecaster in each iteration. + + fixed_train_size : bool, default=False + If True, the train size doesn't increase but moves by `steps` in each iteration. + + gap : int, default=0 + Number of samples to exclude from the end of each training set and the + start of the test set. + + allow_incomplete_fold : bool, default=True + If True, the last fold is allowed to have fewer samples than `steps`. + + return_best : bool, default=False + If True, the best model is returned. + + n_jobs : int or 'auto', default="auto" + Number of jobs to run in parallel. + + verbose : bool, default=False + Print summary figures. + + show_progress : bool, default=False + Whether to show a progress bar. + + higher_is_better : bool, default=False + Whether higher metric values indicate better performance. + Set to False (default) for error metrics like MSE, MAE, MAPE where + lower values are better. Set to True for metrics like R2 where + higher values indicate better model performance. + """ + + _tags = { + "authors": ["Omswastik-11", "JoaquinAmatRodrigo"], + "maintainers": ["Omswastik-11", "fkiraly", "JoaquinAmatRodrigo", "SimonBlanke"], + "python_dependencies": "skforecast", + } + + def __init__( + self, + forecaster, + y, + steps, + metric, + initial_train_size, + exog=None, + refit=False, + fixed_train_size=False, + gap=0, + allow_incomplete_fold=True, + return_best=False, + n_jobs="auto", + verbose=False, + show_progress=False, + higher_is_better=False, + ): + self.forecaster = forecaster + self.y = y + self.steps = steps + self.metric = metric + self.initial_train_size = initial_train_size + self.exog = exog + self.refit = refit + self.fixed_train_size = fixed_train_size + self.gap = gap + self.allow_incomplete_fold = allow_incomplete_fold + self.return_best = return_best + self.n_jobs = n_jobs + self.verbose = verbose + self.show_progress = show_progress + self.higher_is_better = higher_is_better + + super().__init__() + + # Set the optimization direction based on higher_is_better parameter + higher_or_lower = "higher" if higher_is_better else "lower" + self.set_tags(**{"property:higher_or_lower_is_better": higher_or_lower}) + + @classmethod + def get_test_params(cls, parameter_set="default"): + """Return testing parameter settings for the estimator. + + Parameters + ---------- + parameter_set : str, default="default" + Name of the parameter set to return. + + Returns + ------- + params : dict or list of dict, default = {} + Parameters to create testing instances of the class + Each dict are parameters to construct an "interesting" test instance, + i.e., MyClass(**params) or MyClass(**params[i]) creates a valid test + instance. + create_test_instance uses the first (or only) dictionary in `params` + """ + from skbase.utils.dependencies import _check_soft_dependencies + + if not _check_soft_dependencies("skforecast", severity="none"): + return [] + + import numpy as np + import pandas as pd + from skforecast.recursive import ForecasterRecursive + from sklearn.ensemble import RandomForestRegressor + + forecaster = ForecasterRecursive( + regressor=RandomForestRegressor(random_state=123), + lags=2, + ) + + y = pd.Series( + np.random.randn(20), + index=pd.date_range(start="2020-01-01", periods=20, freq="D"), + name="y", + ) + + params = { + "forecaster": forecaster, + "y": y, + "steps": 3, + "metric": "mean_squared_error", + "initial_train_size": 10, + } + return [params] + + @classmethod + def _get_score_params(cls): + """Return settings for testing score/evaluate functions. Used in tests only. + + Returns a list, the i-th element should be valid arguments for + self.evaluate and self.score, of an instance constructed with + self.get_test_params()[i]. + + Returns + ------- + list of dict + The parameters to be used for scoring. + """ + return [{"n_estimators": 5}] + + def _evaluate(self, params): + """Evaluate the parameters. + + Parameters + ---------- + params : dict with string keys + Parameters to evaluate. + + Returns + ------- + float + The value of the parameters as per evaluation. + dict + Additional metadata about the search. + """ + from skforecast.model_selection import TimeSeriesFold, backtesting_forecaster + + forecaster = copy.deepcopy(self.forecaster) + forecaster.set_params(params) + + cv = TimeSeriesFold( + steps=self.steps, + initial_train_size=self.initial_train_size, + refit=self.refit, + fixed_train_size=self.fixed_train_size, + gap=self.gap, + allow_incomplete_fold=self.allow_incomplete_fold, + ) + + results, _ = backtesting_forecaster( + forecaster=forecaster, + y=self.y, + cv=cv, + metric=self.metric, + exog=self.exog, + n_jobs=self.n_jobs, + verbose=self.verbose, + show_progress=self.show_progress, + ) + + if isinstance(self.metric, str): + metric_name = self.metric + else: + metric_name = ( + self.metric.__name__ if hasattr(self.metric, "__name__") else "score" + ) + + # backtesting_forecaster returns a DataFrame + res_float = results[metric_name].iloc[0] + + return res_float, {"results": results} diff --git a/src/hyperactive/integrations/skforecast/__init__.py b/src/hyperactive/integrations/skforecast/__init__.py new file mode 100644 index 00000000..99eb49ce --- /dev/null +++ b/src/hyperactive/integrations/skforecast/__init__.py @@ -0,0 +1,6 @@ +"""Skforecast integration package.""" +# copyright: hyperactive developers, MIT License (see LICENSE file) + +from hyperactive.integrations.skforecast.skforecast_opt_cv import SkforecastOptCV + +__all__ = ["SkforecastOptCV"] diff --git a/src/hyperactive/integrations/skforecast/skforecast_opt_cv.py b/src/hyperactive/integrations/skforecast/skforecast_opt_cv.py new file mode 100644 index 00000000..56ee0351 --- /dev/null +++ b/src/hyperactive/integrations/skforecast/skforecast_opt_cv.py @@ -0,0 +1,220 @@ +"""Skforecast integration for hyperactive.""" +# copyright: hyperactive developers, MIT License (see LICENSE file) + +import copy + +from sklearn.base import BaseEstimator + +from hyperactive.experiment.integrations.skforecast_forecasting import ( + SkforecastExperiment, +) + + +class SkforecastOptCV(BaseEstimator): + """Tune a skforecast forecaster via any optimizer in the hyperactive toolbox. + + Parameters + ---------- + forecaster : skforecast forecaster + The forecaster to tune. + + optimizer : hyperactive BaseOptimizer + The optimizer to be used for hyperparameter search. + + steps : int + Number of steps to predict. + + metric : str or callable + Metric used to quantify the goodness of fit of the model. + If string, it must be a metric name allowed by skforecast + (e.g., 'mean_squared_error'). + If callable, it must take (y_true, y_pred) and return a float. + + initial_train_size : int + Number of samples in the initial training set. + + exog : pandas Series or DataFrame, default=None + Exogenous variable/s used in the evaluation experiment. + + refit : bool, default=False + Whether to re-fit the forecaster in each iteration. + + fixed_train_size : bool, default=False + If True, the train size doesn't increase but moves by `steps` in each iteration. + + gap : int, default=0 + Number of samples to exclude from the end of each training set and the + start of the test set. + + allow_incomplete_fold : bool, default=True + If True, the last fold is allowed to have fewer samples than `steps`. + + return_best : bool, default=False + If True, the best model is returned. + + n_jobs : int or 'auto', default="auto" + Number of jobs to run in parallel. + + verbose : bool, default=False + Print summary figures. + + show_progress : bool, default=False + Whether to show a progress bar. + + higher_is_better : bool, default=False + Whether higher metric values indicate better performance. + Set to False (default) for error metrics like MSE, MAE, MAPE where + lower values are better. Set to True for metrics like R2 where + higher values indicate better model performance. + """ + + _tags = { + "authors": ["Omswastik-11", "JoaquinAmatRodrigo"], + "maintainers": ["Omswastik-11", "fkiraly", "JoaquinAmatRodrigo", "SimonBlanke"], + "python_dependencies": "skforecast", + } + + def __init__( + self, + forecaster, + optimizer, + steps, + metric, + initial_train_size, + exog=None, + refit=False, + fixed_train_size=False, + gap=0, + allow_incomplete_fold=True, + return_best=False, + n_jobs="auto", + verbose=False, + show_progress=False, + higher_is_better=False, + ): + self.forecaster = forecaster + self.optimizer = optimizer + self.steps = steps + self.metric = metric + self.initial_train_size = initial_train_size + self.exog = exog + self.refit = refit + self.fixed_train_size = fixed_train_size + self.gap = gap + self.allow_incomplete_fold = allow_incomplete_fold + self.return_best = return_best + self.n_jobs = n_jobs + self.verbose = verbose + self.show_progress = show_progress + self.higher_is_better = higher_is_better + + @classmethod + def get_test_params(cls, parameter_set="default"): + """Return testing parameter settings for the estimator. + + Parameters + ---------- + parameter_set : str, default="default" + Name of the parameter set to return. + + Returns + ------- + params : dict or list of dict, default = {} + Parameters to create testing instances of the class + Each dict are parameters to construct an "interesting" test instance, + i.e., MyClass(**params) or MyClass(**params[i]) creates a valid test + instance. + create_test_instance uses the first (or only) dictionary in `params` + """ + from skbase.utils.dependencies import _check_soft_dependencies + + if not _check_soft_dependencies("skforecast", severity="none"): + return [] + + from skforecast.recursive import ForecasterRecursive + from sklearn.ensemble import RandomForestRegressor + + from hyperactive import HillClimbingOptimizer + + forecaster = ForecasterRecursive( + regressor=RandomForestRegressor(random_state=123), + lags=2, + ) + optimizer = HillClimbingOptimizer() + + params = { + "forecaster": forecaster, + "optimizer": optimizer, + "steps": 3, + "metric": "mean_squared_error", + "initial_train_size": 10, + } + return [params] + + def fit(self, y, exog=None): + """Fit to training data. + + Parameters + ---------- + y : pandas Series + Target time series to which to fit the forecaster. + exog : pandas Series or DataFrame, optional + Exogenous variables. + + Returns + ------- + self : returns an instance of self. + """ + current_exog = exog if exog is not None else self.exog + + experiment = SkforecastExperiment( + forecaster=self.forecaster, + y=y, + steps=self.steps, + metric=self.metric, + initial_train_size=self.initial_train_size, + exog=current_exog, + refit=self.refit, + fixed_train_size=self.fixed_train_size, + gap=self.gap, + allow_incomplete_fold=self.allow_incomplete_fold, + return_best=self.return_best, + n_jobs=self.n_jobs, + verbose=self.verbose, + show_progress=self.show_progress, + higher_is_better=self.higher_is_better, + ) + + if hasattr(self.optimizer, "clone"): + optimizer = self.optimizer.clone() + else: + optimizer = copy.deepcopy(self.optimizer) + + optimizer.set_params(experiment=experiment) + best_params = optimizer.solve() + + self.best_params_ = best_params + self.best_forecaster_ = copy.deepcopy(self.forecaster) + self.best_forecaster_.set_params(best_params) + + # Refit model with best parameters on the whole dataset + self.best_forecaster_.fit(y=y, exog=current_exog) + + return self + + def predict(self, steps, exog=None, **kwargs): + """Forecast time series at future horizon. + + Parameters + ---------- + steps : int + Number of steps to predict. + exog : pandas Series or DataFrame, optional + Exogenous variables. + + Returns + ------- + predictions : pandas Series + Predicted values. + """ + return self.best_forecaster_.predict(steps=steps, exog=exog, **kwargs) diff --git a/src/hyperactive/integrations/skforecast/tests/test_skforecast.py b/src/hyperactive/integrations/skforecast/tests/test_skforecast.py new file mode 100644 index 00000000..ec8b5c6a --- /dev/null +++ b/src/hyperactive/integrations/skforecast/tests/test_skforecast.py @@ -0,0 +1,58 @@ +"""Test skforecast integration.""" + +import numpy as np +import pandas as pd +import pytest +from sklearn.ensemble import RandomForestRegressor + +from hyperactive.integrations.skforecast import SkforecastOptCV +from hyperactive.opt import HillClimbing + +try: + from skforecast.recursive import ForecasterRecursive +except ImportError: + pass + + +@pytest.fixture +def data(): + """Create test data.""" + return pd.Series( + np.random.randn(100), + index=pd.date_range(start="2020-01-01", periods=100, freq="D"), + name="y", + ) + + +def test_skforecast_opt_cv(data): + """Test SkforecastOptCV.""" + pytest.importorskip("skforecast") + + forecaster = ForecasterRecursive( + regressor=RandomForestRegressor(random_state=123), lags=5 + ) + + optimizer = HillClimbing( + search_space={ + "n_estimators": [10, 20], + "max_depth": [2, 5], + }, + n_iter=2, + ) + + opt_cv = SkforecastOptCV( + forecaster=forecaster, + optimizer=optimizer, + steps=5, + metric="mean_squared_error", + initial_train_size=50, + verbose=False, + ) + + opt_cv.fit(y=data) + predictions = opt_cv.predict(steps=5) + + assert len(predictions) == 5 + assert isinstance(predictions, pd.Series) + assert "n_estimators" in opt_cv.best_params_ + assert "max_depth" in opt_cv.best_params_