Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 65 additions & 4 deletions src/hyperactive/opt/_adapters/_gfo.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,14 @@ class _BaseGFOadapter(BaseOptimizer):
_tags = {
"authors": "SimonBlanke",
"python_dependencies": ["gradient-free-optimizers>=1.5.0"],
"capability:categorical": "encoded",
}

def __init__(self):
super().__init__()

self._categorical_mappings = {}

if self.initialize is None:
self._initialize = {"grid": 4, "random": 2, "vertices": 4}
else:
Expand Down Expand Up @@ -87,8 +90,18 @@ def _handle_gfo_defaults(self, search_config):
def _to_dict_np(self, search_space):
"""Coerce the search space to a format suitable for gfo optimizers.

gfo expects dicts of numpy arrays, not lists.
This method coerces lists or tuples in the search space to numpy arrays.
gfo expects dicts of numpy arrays, not lists. This method coerces
lists or tuples in the search space to numpy arrays.

In addition, this handles categorical dimensions by encoding them to
consecutive integers, while keeping track of the original levels in
``self._categorical_mappings`` for decoding during evaluation and when
returning ``best_params_``. A dimension is treated as categorical if
its dtype is non-numeric (string, object, or boolean).

Note: ``np.unique`` sorts values, so the encoding is not order-preserving.
For example, ``["rbf", "linear"]`` becomes ``["linear", "rbf"]`` with
indices ``[1, 0]}``.

Parameters
----------
Expand All @@ -108,9 +121,50 @@ def coerce_to_numpy(arr):
return np.array(arr)
return arr

coerced_search_space = {k: coerce_to_numpy(v) for k, v in search_space.items()}
self._categorical_mappings = {}
coerced_search_space = {}

for key, value in search_space.items():
arr = coerce_to_numpy(value)

if arr.dtype.kind in ("O", "U", "S", "b"):
unique_vals, inverse = np.unique(arr, return_inverse=True)
self._categorical_mappings[key] = list(unique_vals)
coerced_search_space[key] = inverse.astype(int)
else:
coerced_search_space[key] = arr

return coerced_search_space

def _decode_categoricals(self, params):
"""Decode integer-encoded categoricals back to original levels.

Parameters
----------
params : dict
Parameter dict as used inside the optimizer/backend.

Returns
-------
dict
Parameter dict with any encoded categoricals mapped back to their
original values, if mappings are present.
"""
if not self._categorical_mappings:
return params

decoded = dict(params)
for key, categories in self._categorical_mappings.items():
if key not in decoded:
continue
try:
idx = int(decoded[key])
except (TypeError, ValueError):
continue
if 0 <= idx < len(categories):
decoded[key] = categories[idx]
return decoded

def _solve(self, experiment, **search_config):
"""Run the optimization search process.

Expand All @@ -133,13 +187,20 @@ def _solve(self, experiment, **search_config):
gfo_cls = self._get_gfo_class()
gfopt = gfo_cls(**search_config)

def _objective(params):
decoded_params = self._decode_categoricals(params)
score, _ = experiment.score(decoded_params)
return score

with StdoutMute(active=not self.verbose):
gfopt.search(
objective_function=experiment.score,
objective_function=_objective,
n_iter=n_iter,
max_time=max_time,
)

best_params = gfopt.best_para
best_params = self._decode_categoricals(best_params)
return best_params

@classmethod
Expand Down
47 changes: 40 additions & 7 deletions src/hyperactive/tests/test_all_objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
# default is False, can be set to True by pytest --only_changed_modules True flag
ONLY_CHANGED_MODULES = False


class PackageConfig:
"""Contains package config variables for test classes."""

Expand Down Expand Up @@ -50,9 +49,10 @@ class PackageConfig:
"info:local_vs_global", # "local", "mixed", "global"
"info:explore_vs_exploit", # "explore", "exploit", "mixed"
"info:compute", # "low", "middle", "high"
# capabilities
"capability:categorical",
]


class BaseFixtureGenerator(PackageConfig, _BaseFixtureGenerator):
"""Fixture generator for base testing functionality in sktime.

Expand Down Expand Up @@ -134,7 +134,6 @@ def softdeps_present(obj):
# which sequence the conditional fixtures are generated in
fixture_sequence = ["object_class", "object_instance"]


class TestAllObjects(BaseFixtureGenerator, _TestAllObjects):
"""Generic tests for all objects in the package."""

Expand Down Expand Up @@ -167,7 +166,6 @@ def test_valid_object_tags(self, object_instance):

super().test_valid_object_class_tags(object_instance)


class ExperimentFixtureGenerator(BaseFixtureGenerator):
"""Fixture generator for experiments.

Expand All @@ -182,7 +180,6 @@ class ExperimentFixtureGenerator(BaseFixtureGenerator):

object_type_filter = "experiment"


class TestAllExperiments(ExperimentFixtureGenerator, _QuickTester):
"""Module level tests for all experiment classes."""

Expand Down Expand Up @@ -238,7 +235,6 @@ def test_score_function(self, object_class):
elif sign_tag == "lower" and det_tag == "deterministic":
assert score == -e_score


class OptimizerFixtureGenerator(BaseFixtureGenerator):
"""Fixture generator for optimizers.

Expand All @@ -253,7 +249,6 @@ class OptimizerFixtureGenerator(BaseFixtureGenerator):

object_type_filter = "optimizer"


class TestAllOptimizers(OptimizerFixtureGenerator, _QuickTester):
"""Module level tests for all optimizer classes."""

Expand Down Expand Up @@ -349,6 +344,43 @@ def test_gfo_integration(self, object_instance):
assert "C" in best_params, "Best parameters should contain 'C'"
assert "gamma" in best_params, "Best parameters should contain 'gamma'"

def test_gfo_categorical_encoding(self, object_instance):
"""GFO optimizers should handle categoricals via internal encoding."""
from hyperactive.opt._adapters._gfo import _BaseGFOadapter

if not isinstance(object_instance, _BaseGFOadapter):
return None

import numpy as np
from sklearn.datasets import load_iris
from sklearn.svm import SVC

from hyperactive.experiment.integrations import SklearnCvExperiment

X, y = load_iris(return_X_y=True)
sklearn_exp = SklearnCvExperiment(estimator=SVC(), X=X, y=y)

search_space = {
"C": np.array([0.1, 1.0]),
"kernel": np.array(["linear", "rbf"]),
}
_config = {
"search_space": search_space,
"n_iter": 5,
"experiment": sklearn_exp,
}
optimizer = object_instance.clone().set_params(**_config)
optimizer.solve()
best_params = optimizer.best_params_

assert isinstance(best_params, dict)
assert "kernel" in best_params
assert best_params["kernel"] in {"linear", "rbf"}

# Verify internal categorical mappings were populated correctly
assert "kernel" in optimizer._categorical_mappings
assert set(optimizer._categorical_mappings["kernel"]) == {"linear", "rbf"}

def test_selection_direction_backend(self, object_instance):
"""Backends return argmax over standardized scores on controlled setup.

Expand Down Expand Up @@ -462,3 +494,4 @@ def _assert_good(best_params):

# For other backends, no-op here; targeted direction tests live elsewhere
return None