Skip to content
Open
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions examples/integrations/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,14 @@ y_pred = tuned_naive.predict()
# 4. obtaining best parameters and best forecaster
best_params = tuned_naive.best_params_
best_forecaster = tuned_naive.best_forecaster_

# Broadcasting options & diagnostics

``ForecastingOptCV`` mirrors ``ForecastingGridSearchCV`` by exposing
``tune_by_instance`` and ``tune_by_variable`` flags for automatic sktime
broadcasting over panel or multivariate data. After ``fit`` completes you also
gain access to ``best_score_``, ``cv_results_`` (per-fold backtesting results),
``n_splits_``, ``scorer_``, and ``refit_time_`` for downstream inspection.
```

## Integration with sktime - time series classification
Expand Down
78 changes: 77 additions & 1 deletion src/hyperactive/integrations/sktime/_forecasting.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
# copyright: hyperactive developers, MIT License (see LICENSE file)

import time

import numpy as np
from skbase.utils.dependencies import _check_soft_dependencies

if _check_soft_dependencies("sktime", severity="none"):
_HAS_SKTIME = _check_soft_dependencies("sktime", severity="none")

if _HAS_SKTIME:
from sktime.forecasting.base._delegate import _DelegatedForecaster
else:
from skbase.base import BaseEstimator as _DelegatedForecaster
Expand Down Expand Up @@ -151,6 +155,15 @@ class ForecastingOptCV(_DelegatedForecaster):
- "logger_name": str, default="ray"; name of the logger to use.
- "mute_warnings": bool, default=False; if True, suppresses warnings

tune_by_instance : bool, optional (default=False)
Whether to tune parameters separately for each time series instance when
panel or hierarchical data is passed. Mirrors ``ForecastingGridSearchCV``
semantics by delegating broadcasting to sktime's vectorization logic.
tune_by_variable : bool, optional (default=False)
Whether to tune parameters per variable for strictly multivariate series.
When enabled, only univariate targets are accepted and internal
broadcasting is handled by sktime.

Example
-------
Any available tuning engine from hyperactive can be used, for example:
Expand Down Expand Up @@ -215,6 +228,8 @@ def __init__(
cv_X=None,
backend=None,
backend_params=None,
tune_by_instance=False,
tune_by_variable=False,
):
self.forecaster = forecaster
self.optimizer = optimizer
Expand All @@ -227,8 +242,20 @@ def __init__(
self.cv_X = cv_X
self.backend = backend
self.backend_params = backend_params
self.tune_by_instance = tune_by_instance
self.tune_by_variable = tune_by_variable
super().__init__()

if _HAS_SKTIME:
self._set_delegated_tags(delegate=self.forecaster)
tags_to_clone = ["y_inner_mtype", "X_inner_mtype"]
self.clone_tags(self.forecaster, tags_to_clone)
self._extend_to_all_scitypes("y_inner_mtype")
self._extend_to_all_scitypes("X_inner_mtype")

if self.tune_by_variable:
self.set_tags(**{"scitype:y": "univariate"})

def _fit(self, y, X, fh):
"""Fit to training data.

Expand All @@ -250,6 +277,16 @@ def _fit(self, y, X, fh):
forecaster = self.forecaster.clone()

scoring = check_scoring(self.scoring, obj=self)
self.scorer_ = scoring
get_n_splits = getattr(self.cv, "get_n_splits", None)
if callable(get_n_splits):
try:
self.n_splits_ = get_n_splits(y)
except TypeError:
# fallback for splitters that expect no args
self.n_splits_ = get_n_splits()
else:
self.n_splits_ = None
# scoring_name = f"test_{scoring.name}"

experiment = SktimeForecastingExperiment(
Expand All @@ -270,14 +307,53 @@ def _fit(self, y, X, fh):
best_params = optimizer.solve()

self.best_params_ = best_params
self.best_index_ = getattr(optimizer, "best_index_", None)
raw_best_score, best_metadata = experiment.evaluate(best_params)
self.best_score_ = float(raw_best_score)
results_table = best_metadata.get("results") if best_metadata else None
if results_table is not None:
try:
self.cv_results_ = results_table.copy()
except AttributeError:
self.cv_results_ = results_table
else:
self.cv_results_ = None
self.best_forecaster_ = forecaster.set_params(**best_params)

# Refit model with best parameters.
if self.refit:
refit_start = time.perf_counter()
self.best_forecaster_.fit(y=y, X=X, fh=fh)
self.refit_time_ = time.perf_counter() - refit_start
else:
self.refit_time_ = 0.0

return self

def _extend_to_all_scitypes(self, tagname):
"""Ensure mtypes for all scitypes are present in tag ``tagname``."""
from sktime.datatypes import mtype_to_scitype

tagval = self.get_tag(tagname)
if not isinstance(tagval, list):
tagval = [tagval]
scitypes = mtype_to_scitype(tagval, return_unique=True)

if "Series" not in scitypes:
tagval = tagval + ["pd.DataFrame"]
elif "pd.Series" in tagval and "pd.DataFrame" not in tagval:
tagval = ["pd.DataFrame"] + tagval

if "Panel" not in scitypes:
tagval = tagval + ["pd-multiindex"]
if "Hierarchical" not in scitypes:
tagval = tagval + ["pd_multiindex_hier"]

if self.tune_by_instance:
tagval = [x for x in tagval if mtype_to_scitype(x) == "Series"]

self.set_tags(**{tagname: tagval})

def _predict(self, fh, X):
"""Forecast time series at future horizon.

Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,18 @@
"""Integration tests for sktime tuners."""
# copyright: hyperactive developers, MIT License (see LICENSE file)

import numpy as np
import pytest
from skbase.utils.dependencies import _check_soft_dependencies

if _check_soft_dependencies("sktime", severity="none"):
from sktime.datasets import load_airline
from sktime.forecasting.naive import NaiveForecaster
from sktime.performance_metrics.forecasting import MeanAbsolutePercentageError
from sktime.split import ExpandingWindowSplitter

from hyperactive.integrations.sktime import ForecastingOptCV, TSCOptCV
from hyperactive.opt import GridSearchSk

EST_TO_TEST = [ForecastingOptCV, TSCOptCV]
else:
Expand All @@ -20,3 +27,53 @@ def test_sktime_estimator(estimator):
check_estimator(estimator, raise_exceptions=True)
# The above line collects all API conformance tests in sktime and runs them.
# It will raise an error if the estimator is not API conformant.


@pytest.mark.skipif(
not _check_soft_dependencies("sktime", severity="none"),
reason="sktime not installed",
)
def test_forecasting_opt_cv_sets_attributes():
"""ForecastingOptCV exposes useful attributes after fitting."""
fh = [1, 2]
y = load_airline().iloc[:36]
cv = ExpandingWindowSplitter(initial_window=24, step_length=6, fh=fh)
optimizer = GridSearchSk(param_grid={"strategy": ["last", "mean"]})

tuner = ForecastingOptCV(
forecaster=NaiveForecaster(),
optimizer=optimizer,
cv=cv,
scoring=MeanAbsolutePercentageError(symmetric=True),
backend="None",
)

tuner.fit(y=y, fh=fh)

assert tuner.scorer_.name == "MeanAbsolutePercentageError"
assert tuner.n_splits_ == cv.get_n_splits(y)
assert tuner.refit_time_ >= 0

metric_col = "test_" + tuner.scorer_.name
assert metric_col in tuner.cv_results_.columns
assert np.isclose(tuner.best_score_, tuner.cv_results_[metric_col].mean())


@pytest.mark.skipif(
not _check_soft_dependencies("sktime", severity="none"),
reason="sktime not installed",
)
def test_forecasting_opt_cv_tune_by_flags():
"""Tune-by flags should adjust estimator tags."""
tuner = ForecastingOptCV(
forecaster=NaiveForecaster(),
optimizer=GridSearchSk(param_grid={"strategy": ["last"]}),
cv=ExpandingWindowSplitter(initial_window=5, step_length=1, fh=[1]),
tune_by_instance=True,
tune_by_variable=True,
)

assert tuner.get_tag("scitype:y") == "univariate"
y_mtypes = tuner.get_tag("y_inner_mtype")
assert "pd-multiindex" not in y_mtypes
assert "pd_multiindex_hier" not in y_mtypes