diff --git a/bayes_optim/acquisition/acquisition_fun.py b/bayes_optim/acquisition/acquisition_fun.py index 9f4ee80..ade18bb 100644 --- a/bayes_optim/acquisition/acquisition_fun.py +++ b/bayes_optim/acquisition/acquisition_fun.py @@ -4,9 +4,9 @@ from typing import Tuple, Union import numpy as np +from bayes_optim.mylogging import eprintf from numpy import exp, sqrt from scipy.stats import norm -from bayes_optim.mylogging import eprintf from ..surrogate import GaussianProcess, RandomForest @@ -156,23 +156,29 @@ def __call__( ) -> Union[np.ndarray, Tuple[np.ndarray, np.ndarray]]: X = self.check_X(X) n_sample = X.shape[0] - y_hat, sd = self._predict(X) + xi = 0.0 + if return_dx: + mu, std, mu_grad, std_grad = self._model.predict( + X, return_std=True, return_mean_grad=True, return_std_grad=True + ) + else: + mu, std = self._model.predict(X, return_std=True) # if the Kriging variance is to small # TODO: check the rationale of 1e-6 and why the ratio if intended if hasattr(self._model, "sigma2"): - if sd / np.sqrt(self._model.sigma2) < 1e-6: + if std / np.sqrt(self._model.sigma2) < 1e-6: return (0, np.zeros((len(X[0]), 1))) if return_dx else 0 else: # TODO: implement a counterpart of 'sigma2' for randomforest # or simply put a try...except around the code below - if sd < 1e-10: + if std < 1e-10: return (0, np.zeros((len(X[0]), 1))) if return_dx else 0 try: - xcr_ = self.plugin - y_hat - xcr = xcr_ / sd - xcr_prob, xcr_dens = norm.cdf(xcr), norm.pdf(xcr) - value = xcr_ * xcr_prob + sd * xcr_dens + impr = self.plugin - xi - mu + xcr = impr / std + cdf, pdf = norm.cdf(xcr), norm.pdf(xcr) + value = impr * cdf + std * pdf if n_sample == 1: value = sum(value) except Exception: # in case of numerical errors @@ -180,10 +186,14 @@ def __call__( value = 0 if return_dx: - y_dx, sd2_dx = self._gradient(X) - sd_dx = sd2_dx / (2.0 * sd) try: - dx = -y_dx * xcr_prob + sd_dx * xcr_dens + improve_grad = -mu_grad * std - std_grad * impr + improve_grad /= std**2 + cdf_grad = improve_grad * pdf + pdf_grad = -impr * cdf_grad + exploit_grad = -mu_grad * cdf - pdf_grad + explore_grad = std_grad * pdf + pdf_grad + dx = exploit_grad + explore_grad except Exception: dx = np.zeros((len(X[0]), 1)) return value, dx diff --git a/bayes_optim/acquisition/optim/__init__.py b/bayes_optim/acquisition/optim/__init__.py index d55d253..67376cd 100644 --- a/bayes_optim/acquisition/optim/__init__.py +++ b/bayes_optim/acquisition/optim/__init__.py @@ -1,5 +1,6 @@ import logging import time +from abc import ABC, abstractmethod from typing import Callable, List, Tuple import numpy as np @@ -19,6 +20,7 @@ "default_AQ_max_FEs", "default_AQ_n_restart", "default_AQ_wait_iter", + "OptimizationListener", ] @@ -52,9 +54,16 @@ def __call__(self, x: np.ndarray) -> Tuple[float, np.ndarray]: return f, fg +class OptimizationListener(ABC): + @abstractmethod + def on_optimum_found(self, fopt, xopt): + pass + + def argmax_restart( obj_func: Callable, search_space, + obj_func_: Callable = None, h: Callable = None, g: Callable = None, eval_budget: int = 100, @@ -62,6 +71,7 @@ def argmax_restart( wait_iter: int = 3, optimizer: str = "BFGS", logger: logging.Logger = None, + listener: OptimizationListener = None, ): # lists of the best solutions and acquisition values from each restart xopt, fopt = [], [] @@ -71,16 +81,25 @@ def argmax_restart( optimizer = "MIES" logger.warning("L-BFGS-B can only be applied on continuous search space") + X = search_space.sample(int(2e2 * search_space.dim), method="uniform") + if obj_func_ is not None: + af_value = [obj_func_(x) for x in X] + else: + af_value = [obj_func(x)[0] for x in X] + + idx = np.argsort(af_value)[::-1] + X = X[idx, :] + for iteration in range(n_restart): - x0 = search_space.sample(N=1, method="uniform")[0] + x0 = X[iteration] if optimizer == "BFGS": bounds = np.array(search_space.bounds) # TODO: this is still subject to testing xopt_, fopt_, stop_dict = fmin_l_bfgs_b( Penalized(obj_func, h, g), x0, - pgtol=1e-8, - factr=1e6, + factr=1e5, + approx_grad=False, bounds=bounds, maxfun=eval_budget, ) @@ -90,7 +109,7 @@ def argmax_restart( fopt_ = -fopt_ if logger is not None and stop_dict["warnflag"] != 0: - logger.debug("L-BFGS-B terminated abnormally with the state: %s" % stop_dict) + logger.info("L-BFGS-B terminated abnormally with the state: %s" % stop_dict) elif optimizer == "OnePlusOne_Cholesky_CMA": opt = OnePlusOne_Cholesky_CMA( @@ -132,9 +151,8 @@ def argmax_restart( wait_count = 0 if logger is not None: - logger.debug( - "restart : %d - funcalls : %d - Fopt : %f" - % (iteration + 1, stop_dict["funcalls"], fopt_) + logger.info( + f"restart : {iteration + 1} - funcalls : {stop_dict['funcalls']} - Fopt : {fopt_:.8e}" ) else: wait_count += 1 @@ -143,7 +161,11 @@ def argmax_restart( xopt.append(xopt_) fopt.append(fopt_) - if eval_budget <= 0 or wait_count >= wait_iter: + if listener is not None: + listener.on_optimum_found(fopt_, xopt_) + + if eval_budget <= 0: + # or wait_count >= wait_iter: break if len(xopt) == 0: diff --git a/bayes_optim/acquisition/optim/one_plus_one_cma_es.py b/bayes_optim/acquisition/optim/one_plus_one_cma_es.py index c8295f8..b4cbc55 100644 --- a/bayes_optim/acquisition/optim/one_plus_one_cma_es.py +++ b/bayes_optim/acquisition/optim/one_plus_one_cma_es.py @@ -7,6 +7,8 @@ from ...search_space import RealSpace, SearchSpace from ...utils import dynamic_penalty, get_logger, handle_box_constraint, set_bounds +from bayes_optim.mylogging import eprintf +import random Vector = List[float] Matrix = List[Vector] @@ -182,6 +184,7 @@ def random_seed(self, seed): self._random_seed = int(seed) if self._random_seed: np.random.seed(self._random_seed) + random.seed(self._random_seed) @property def C(self): @@ -213,7 +216,7 @@ def x(self, x): else: # sample `x` u.a.r. in `[lb, ub]` assert all(~np.isinf(self.lb)) & all(~np.isinf(self.ub)) - x = (self.ub - self.lb) * np.random.rand(self.dim) + self.lb + x = np.array([random.uniform(self.lb[i], self.ub[i]) for i in range(self.dim)]) self._x = x y = self.evaluate(x) diff --git a/bayes_optim/acquisition/optim/option.py b/bayes_optim/acquisition/optim/option.py index c0dbe67..1a2055a 100644 --- a/bayes_optim/acquisition/optim/option.py +++ b/bayes_optim/acquisition/optim/option.py @@ -6,8 +6,8 @@ "MIES": lambda dim: int(1e3 * dim), "BFGS": lambda dim: int(1e2 * dim), "OnePlusOne_Cholesky_CMA": lambda dim: int(1e3 * dim), - "DE": lambda dim: int(1e3 * dim) + "DE": lambda dim: int(1e3 * dim), } -default_AQ_n_restart: Callable = lambda dim: int(5 * dim) +default_AQ_n_restart: Callable = lambda dim: max(5, int(dim / 2)) default_AQ_wait_iter: int = 3 diff --git a/bayes_optim/base.py b/bayes_optim/base.py index eb1a36d..fef7a9f 100644 --- a/bayes_optim/base.py +++ b/bayes_optim/base.py @@ -1,21 +1,30 @@ import functools import os +import time from abc import abstractmethod -from copy import copy -from typing import Any, Callable, Dict, List, Optional, Tuple, Union from copy import copy, deepcopy +from typing import Any, Callable, Dict, List, Optional, Tuple, Union import dill import numpy as np +from sklearn.base import clone, is_regressor from sklearn.metrics import mean_absolute_percentage_error, r2_score from bayes_optim.utils.logger import dump_logger, load_logger from ._base import BaseOptimizer from .acquisition import acquisition_fun as AcquisitionFunction -from .acquisition.optim import argmax_restart, default_AQ_max_FEs, default_AQ_n_restart, default_AQ_wait_iter +from .acquisition.optim import ( + OptimizationListener, + argmax_restart, + default_AQ_max_FEs, + default_AQ_n_restart, + default_AQ_wait_iter, +) +from .mylogging import * from .search_space import RealSpace from .solution import Solution +from .surrogate.gaussian_process import ConstantKernel, GaussianProcess, Matern from .utils import ( arg_to_int, dynamic_penalty, @@ -25,7 +34,6 @@ timeit, ) from .utils.exception import AskEmptyError, FlatFitnessError -from .mylogging import * __authors__ = ["Hao Wang"] @@ -118,7 +126,7 @@ def __init__( self._acquisition_par = acquisition_par if acquisition_par else {} # the callback functions executed after every call of `arg_max_acquisition` self._acquisition_callbacks = [] - self.model = model + self._model = model self._eval_type = eval_type self._set_internal_optimization(acquisition_optimization) @@ -126,8 +134,33 @@ def __init__( self._set_aux_vars() self.warm_data = warm_data - # global GLOBAL_CHARTS_SAVER1 - # GLOBAL_CHARTS_SAVER1 = MyChartSaver('BO', 'BO', self._search_space.bounds, self.obj_fun) + self.acq_opt_time = 0 + self.mode_fit_time = 0 + + self.my_seed = kwargs['random_seed'] + + if self._model is None: + self._model = self.create_default_model(self.search_space, self.my_seed) + self.model = clone(self._model) + self.__have_doe = False + + def create_default_model(self, my_search_space, random_seed): + lb, ub = np.array(my_search_space.bounds).T + cov_amplitude = ConstantKernel(1.0, (0.001, 100.0)) + dim = my_search_space.dim + other_kernel = Matern( + length_scale=np.ones(dim), length_scale_bounds=[(0.001, 100)] * dim, nu=2.5 + ) + model = GaussianProcess( + kernel=cov_amplitude * other_kernel, + normalize_y=True, + noise="gaussian", + n_restarts_optimizer=5, + lb=lb, + ub=ub, + ) + model.set_params(noise=0.1**2, random_state=random_seed) + return model @property def acquisition_fun(self): @@ -243,7 +276,7 @@ def _get_acq_function_search_space(self, fixed): def _get_acq_function_var_names(self): return self._search_space.var_name - def __set_argmax(self, fixed: Dict = None): + def __set_argmax(self, fixed: Dict = None, listener: OptimizationListener = None): """Set ``self._argmax_restart`` for optimizing the acquisition function""" fixed = {} if fixed is None else fixed self.acq_function_search_space = self._get_acq_function_search_space(fixed) @@ -256,6 +289,7 @@ def __set_argmax(self, fixed: Dict = None): n_restart=self.AQ_n_restart, wait_iter=self.AQ_wait_iter, optimizer=self._optimizer, + listener=listener, ) def _check_params(self): @@ -265,16 +299,28 @@ def _check_params(self): assert hasattr(AcquisitionFunction, self._acquisition_fun) + def __log_doe(self, X, Y): + if not self.__have_doe: + self.__have_doe = True + print(' DoE_value DoE_point') + for x, y in zip(X, Y): + print(' ', y, x) + sys.stdout.flush() + def step(self): self.logger.info(f"iteration {self.iter_count} starts...") X = self.ask() func_vals = self.evaluate(X) eprintf("candidate solution", X, "function values", func_vals) + self.__log_doe(X, func_vals) self.tell(X, func_vals) @timeit def ask( - self, n_point: int = None, fixed: Dict[str, Union[float, int, str]] = None + self, + n_point: int = None, + fixed: Dict[str, Union[float, int, str]] = None, + listener: OptimizationListener = None, ) -> Union[List[list], List[dict]]: """suggest a list of candidate solutions @@ -285,16 +331,19 @@ def ask( fixed : Dict[str, Union[float, int, str]], optional a dictionary specifies the decision variables fixed and the value to which those are fixed, by default None + listener : OptimizationListener listener of the events that happen + while the acquisition function is been optimized Returns ------- Union[List[list], List[dict]] the suggested candidates """ + start = time.time() if self.model is not None and self.model.is_fitted: n_point = self.n_point if n_point is None else n_point msg = f"asking {n_point} points:" - X = self.arg_max_acquisition(n_point=n_point, fixed=fixed) + X = self.arg_max_acquisition(n_point=n_point, fixed=fixed, listener=listener) X = self.pre_eval_check(X) # validate the new candidate solutions if len(X) < n_point: self.logger.warning( @@ -321,6 +370,7 @@ def ask( for i, _ in enumerate(X): self.logger.info(f"#{i + 1} - {self._to_pheno(X[i])}") + self.acq_opt_time = time.time() - start return self._to_pheno(X) @timeit @@ -376,8 +426,6 @@ def tell( self.iter_count += 1 self.hist_f.append(xopt.fitness) - # GLOBAL_CHARTS_SAVER1.save(self.iter_count, self.data) - def create_DoE(self, n_point: int, fixed: Dict = None) -> List: """get the initial sample points using Design of Experiemnt (DoE) methods @@ -399,9 +447,10 @@ def create_DoE(self, n_point: int, fixed: Dict = None) -> List: while n_point: # NOTE: random sampling could generate duplicated points again # keep sampling until getting enough points + search_space.random_seed = np.random.randint(1000) X = search_space.sample( n_point, - method="LHS" if n_point > 1 else "uniform", + method="uniform" if n_point > 1 else "uniform", h=partial_argument(self._h, self.search_space.var_name, fixed) if self._h else None, g=partial_argument(self._g, self.search_space.var_name, fixed) if self._g else None, ).tolist() @@ -442,6 +491,7 @@ def post_eval_check(self, X: Solution) -> Solution: def update_model(self): # TODO: implement a proper model selection here # TODO: in case r2 is really poor, re-fit the model or log-transform `fitness`? + start = time.time() data = self.data fitness = data.fitness @@ -453,20 +503,27 @@ def update_model(self): if len(fitness) > 5 and np.isclose(_std, 0): raise FlatFitnessError() - fitness_ = fitness if np.isclose(_std, 0) else (fitness - np.mean(fitness)) / np.std(fitness) + # fitness_ = fitness if np.isclose(_std, 0) else (fitness - np.mean(fitness)) / np.std(fitness) + fitness_ = fitness self.fmin, self.fmax = np.min(fitness_), np.max(fitness_) self.frange = self.fmax - self.fmin - self.model.fit(data, fitness_.reshape(-1, 1)) + self.model = clone(self._model) + self.model.fit(np.array(data.tolist()), fitness_) fitness_hat = self.model.predict(data) r2 = r2_score(fitness_, fitness_hat) MAPE = mean_absolute_percentage_error(fitness_, fitness_hat) self.logger.info(f"model r2: {r2}, MAPE: {MAPE}") + self.mode_fit_time = time.time() - start @timeit def arg_max_acquisition( - self, n_point: int = None, return_value: bool = False, fixed: Dict = None + self, + n_point: int = None, + return_value: bool = False, + fixed: Dict = None, + listener: OptimizationListener = None, ) -> List[list]: """Global Optimization of the acquisition function / Infill criterion @@ -480,15 +537,15 @@ def arg_max_acquisition( self.logger.debug("acquisition optimziation...") n_point = self.n_point if n_point is None else int(n_point) return_dx = self._optimizer == "BFGS" - self.__set_argmax(fixed) + self.__set_argmax(fixed, listener) if n_point > 1: # multi-point/batch sequential strategy candidates, values = self._batch_arg_max_acquisition( n_point=n_point, return_dx=return_dx, fixed=fixed ) else: # single-point strategy - criteria = self._create_acquisition(par={}, return_dx=return_dx, fixed=fixed) - candidates, values = self._argmax_restart(criteria, logger=self.logger) + criteria, criteria_ = self._create_acquisition(par={}, return_dx=return_dx, fixed=fixed) + candidates, values = self._argmax_restart(criteria, obj_func_=criteria_, logger=self.logger) candidates, values = [candidates], [values] candidates = [c for c in candidates if len(c) != 0] @@ -510,7 +567,7 @@ def _create_acquisition( var_name=self.search_space.var_name, fixed=fixed, reduce_output=return_dx, - ) + ), functools.partial(criterion, return_dx=False) def _batch_arg_max_acquisition(self, n_point: int, return_dx: int, fixed: Dict): raise NotImplementedError diff --git a/bayes_optim/extension.py b/bayes_optim/extension.py index fffbac4..64babee 100644 --- a/bayes_optim/extension.py +++ b/bayes_optim/extension.py @@ -11,8 +11,10 @@ from sklearn.decomposition import PCA from sklearn.metrics import mean_absolute_percentage_error, r2_score from abc import ABC, abstractmethod +from sklearn.base import clone, is_regressor from .acquisition import acquisition_fun as AcquisitionFunction +from .acquisition.optim import OptimizationListener from .bayes_opt import BO, ParallelBO from .search_space import RealSpace, SearchSpace from .solution import Solution @@ -23,6 +25,8 @@ from .mylogging import * import time +import bisect +import scipy GLOBAL_CHARTS_SAVER = None @@ -85,6 +89,7 @@ def __init__(self, dimensions: int, minimize: bool = True, kernel_fit_strategy: self.kernel_config = kernel_config self.N_same_kernel = 1 self.__count_same_kernel = self.N_same_kernel + self.__is_kernel_refit_needed = True @staticmethod def _check_input(X): @@ -105,7 +110,8 @@ def fit_transform(self, X: np.ndarray, y: np.ndarray) -> np.ndarray: def fit(self, X: np.ndarray, y: np.ndarray) -> KernelTransform: X_scaled = self._weighting_scheme(X, y) - self._fit_kernel_parameters(X_scaled) + if self.__is_kernel_refit_needed: + self.__fit_kernel_parameters(X_scaled, KernelParamsOptimizerSearch) return super().fit(X_scaled) def transform(self, X: np.ndarray) -> np.ndarray: @@ -116,6 +122,9 @@ def inverse_transform(self, Y: np.ndarray) -> np.ndarray: Y = self._check_input(Y) return super().inverse_transform(Y) + def set_kernel_refit_needed(self, is_needed): + self.__is_kernel_refit_needed = is_needed + def get_kernel_parameters(self): return self.kernel_config @@ -131,21 +140,18 @@ def _weighting_scheme(self, X, y): X_scaled = X_centered * w.reshape(-1, 1) return X_scaled - def _fit_kernel_parameters(self, X): - if self.__count_same_kernel < self.N_same_kernel: - self.__count_same_kernel += 1 - return - self.__count_same_kernel = 0 + def __fit_kernel_parameters(self, X, SearchStrategy): + self.__is_kernel_refit_needed = False if self.kernel_fit_strategy is KernelFitStrategy.FIXED_KERNEL: pass elif self.kernel_fit_strategy is KernelFitStrategy.AUTO: - kernel_name, kernel_parameters, result = KernelParamsGridSearch(self.X_initial_space, X, self.epsilon).find_best_kernel(['rbf']) + kernel_name, kernel_parameters, result = SearchStrategy(self.X_initial_space, X, self.epsilon).find_best_kernel(['rbf']) self.kernel_config = {'kernel_name': kernel_name, 'kernel_parameters': kernel_parameters} elif self.kernel_fit_strategy is KernelFitStrategy.LIST_OF_KERNEL: - kernel_name, kernel_parameters, result = KernelParamsGridSearch(self.X_initial_space, X, self.epsilon).find_best_kernel(self.kernel_config['kernel_names']) + kernel_name, kernel_parameters, result = SearchStrategy(self.X_initial_space, X, self.epsilon).find_best_kernel(self.kernel_config['kernel_names']) self.kernel_config = {'kernel_name': kernel_name, 'kernel_parameters': kernel_parameters} else: - raise ValueError(f'Kernel fit strategy str(KernelFitStrategy) is not known') + raise ValueError(f'Kernel fit strategy {str(KernelFitStrategy)} is not known') class KernelParamsSearchStrategy(ABC): @@ -174,20 +180,33 @@ def find_best_for_kernel(self, kernel_name: str): return kernel_name, params, result @staticmethod - def _try_kernel(parameters, epsilon, X_initial_space, X_weighted, kernel_name): + def __try_kernel(parameters, epsilon, X_initial_space, X_weighted, kernel_name): + kpca = MyKernelPCA(epsilon, X_initial_space, {'kernel_name': kernel_name, 'kernel_parameters': parameters}) + kpca.fit(X_weighted) + if kpca.too_compressed: + return int(1e9), 0 + return kpca.k, kpca.extracted_information + + @staticmethod + def __try_rbf_kernel(parameters, epsilon, X_initial_space, X_weighted, kernel_name, l2_norm_matrix): kpca = MyKernelPCA(epsilon, X_initial_space, {'kernel_name': kernel_name, 'kernel_parameters': parameters}) + kpca._set_reuse_rbf_data(l2_norm_matrix) kpca.fit(X_weighted) if kpca.too_compressed: return int(1e9), 0 return kpca.k, kpca.extracted_information @abstractmethod - def find_best_for_rbf(self): + def _optimize_rbf_gamma(f): pass - @abstractmethod + def find_best_for_rbf(self): + self.__l2_norm_matrix = [[np.sum((np.array(a) - np.array(b)) ** 2) for a in self._X_weighted] for b in self._X_weighted] + f = functools.partial(KernelParamsSearchStrategy.__try_rbf_kernel, epsilon=self._epsilon, X_initial_space=self._X, X_weighted=self._X_weighted, kernel_name='rbf', l2_norm_matrix=self.__l2_norm_matrix) + return self._optimize_rbf_gamma(f) + def find_best_for_poly(self): - pass + raise NotImplementedError class KernelParamsGridSearch(KernelParamsSearchStrategy): @@ -231,22 +250,61 @@ def __gamma_sequential_grid_minimizer(f, start, end, steps): eprintf(f"Min dimensionality is {mi}, with max extracted information {max_second_value} and parameters {optimal_parameter}") return {'gamma': optimal_parameter}, mi + def _optimize_rbf_gamma(self, f): + return KernelParamsGridSearch.__gamma_sequential_grid_minimizer(f, 1e-4, 1, 100) + + +class KernelParamsOptimizerSearch(KernelParamsSearchStrategy): @staticmethod - def __try_kernel(parameters, epsilon, X_initial_space, X_weighted, kernel_name, l2_norm_matrix): - kpca = MyKernelPCA(epsilon, X_initial_space, {'kernel_name': kernel_name, 'kernel_parameters': parameters}) - kpca._set_reuse_rbf_data(l2_norm_matrix) - kpca.fit(X_weighted) - if kpca.too_compressed: - return int(1e9), 0 - return kpca.k, kpca.extracted_information + def __f_wrapper(f, x): + a, b = f({'gamma': x[0]}) + return float(a - b) - def find_best_for_rbf(self): - self.__l2_norm_matrix = [[np.sum((np.array(a) - np.array(b)) ** 2) for a in self._X_weighted] for b in self._X_weighted] - f = functools.partial(KernelParamsGridSearch.__try_kernel, epsilon=self._epsilon, X_initial_space=self._X, X_weighted=self._X_weighted, kernel_name='rbf', l2_norm_matrix=self.__l2_norm_matrix) - return KernelParamsGridSearch.__gamma_sequential_grid_minimizer(f, 1e-4, 1., 100) + def _get_componenets_and_info(self, fvalue): + min_components = int(math.floor(fvalue)) + 1 + info = min_components - fvalue + return min_components, info - def find_best_for_poly(self): - raise NotImplementedError + def __opt(self, f): + res = scipy.optimize.minimize(f, method='L-BFGS-B', x0=[0.05], bounds=[(1e-4, 2.)], options={'maxiter':200}) + gamma = res.x[0] + fopt, info = self._get_componenets_and_info(res.fun) + eprintf(f"Min dimensionality is {fopt}, with extracted information {info} and parameters gamma={gamma}") + return {'gamma': gamma}, fopt + + def _optimize_rbf_gamma(self, f): + return self.__opt(functools.partial(KernelParamsOptimizerSearch.__f_wrapper, f)) + + +class KernelParamsCombinedSearch(KernelParamsOptimizerSearch): + @staticmethod + def __f_wrapper(f, x): + a, b = f({'gamma': x[0]}) + return float(a - b) + + def __opt(self, f, initial, lb, ub): + res = scipy.optimize.minimize(f, method='L-BFGS-B', x0=[initial], bounds=[(lb, ub)], options={'maxiter':100}) + gamma = res.x[0] + fopt, info = self._get_componenets_and_info(res.fun) + eprintf(f"Min dimensionality is {fopt}, with extracted information {info} and parameters gamma={gamma}") + return {'gamma': gamma}, fopt + + def __grid_search(self, f, begin, step_size, evals): + mi = float('inf') + arg_mi = 0. + for i in range(evals): + gamma = begin + i * step_size + cur = f([gamma]) + if cur < mi: + mi = cur + arg_mi = gamma + return arg_mi + + def _optimize_rbf_gamma(self, f): + f = functools.partial(KernelParamsCombinedSearch.__f_wrapper, f) + begin, step_size, evals = 0.01, 0.009, 100 + x0 = self.__grid_search(f, begin, step_size, evals) + return self.__opt(f, x0, x0 - step_size, x0 + step_size) def penalized_acquisition(x, acquisition_func, bounds, pca, return_dx): @@ -304,6 +362,8 @@ def __init__(self, n_components: Union[float, int] = None, **kwargs): self._pca = LinearTransform(n_components=n_components, svd_solver="full", minimize=self.minimize) global GLOBAL_CHARTS_SAVER GLOBAL_CHARTS_SAVER = MyChartSaver('PCABO-1', 'PCABO', self._search_space.bounds, self.obj_fun) + self.acq_opt_time = 0 + self.mode_fit_time = 0 @staticmethod def _compute_bounds(pca: PCA, search_space: SearchSpace) -> List[float]: @@ -329,11 +389,17 @@ def _create_acquisition(self, fun=None, par=None, return_dx=False, **kwargs) -> # wrap the penalized acquisition function for handling the box constraints return functools.partial( penalized_acquisition, - acquisition_func=acquisition_func, + acquisition_func=acquisition_func[0], bounds=self.__search_space.bounds, # hyperbox in the original space pca=self._pca, return_dx=return_dx, - ) + ), functools.partial( + penalized_acquisition, + acquisition_func=acquisition_func[1], + bounds=self.__search_space.bounds, # hyperbox in the original space + pca=self._pca, + return_dx=False, + ) def pre_eval_check(self, X: List) -> List: """Note that we do not check against duplicated point in PCA-BO since those points are @@ -352,7 +418,9 @@ def xopt(self): return self._xopt def ask(self, n_point: int = None) -> List[List[float]]: + start = time.time() new_x1 = self._pca.inverse_transform(super().ask(n_point)) + self.acq_opt_time = time.time() - start new_x = new_x1[0] is_inside = sum(1 for i in range(len(new_x)) if self.__search_space.bounds[i][0]<= new_x[i] <= self.__search_space.bounds[i][1])==len(new_x) eprintf("Is inside?", is_inside) @@ -391,27 +459,15 @@ def update_model(self, X: np.ndarray, y: np.ndarray): self._search_space = RealSpace(bounds) bounds = np.asarray(bounds) dim = self._search_space.dim - self.model = GaussianProcess( - mean=trend.constant_trend(dim), - corr="matern", - thetaL=1e-3 * (bounds[:, 1] - bounds[:, 0]), - thetaU=1e3 * (bounds[:, 1] - bounds[:, 0]), - nugget=1e-6, - noise_estim=False, - optimizer="BFGS", - wait_iter=3, - random_start=max(10, dim), - likelihood="concentrated", - eval_budget=100 * dim, - ) - + self.model = self.create_default_model(self.search_space, self.my_seed) _std = np.std(y) - y_ = y if np.isclose(_std, 0) else (y - np.mean(y)) / _std - + y_ = y self.fmin, self.fmax = np.min(y_), np.max(y_) self.frange = self.fmax - self.fmin + start = time.time() self.model.fit(X, y_) + self.mode_fit_time = time.time() - start GLOBAL_CHARTS_SAVER.save_model(self.model, X, y_) y_hat = self.model.predict(X) @@ -436,6 +492,8 @@ def __init__(self, max_information_loss=0.2, kernel_fit_strategy: KernelFitStrat assert isinstance(self._search_space, RealSpace) self.__search_space = deepcopy(self._search_space) # the original search space self._pca = KernelTransform(dimensions=self.search_space.dim, minimize=self.minimize, X_initial_space=[], epsilon=max_information_loss, kernel_fit_strategy=kernel_fit_strategy, kernel_config=kernel_config, NN=NN) + self.acq_opt_time = 0 + self.mode_fit_time = 0 @staticmethod def my_acquisition_function(x, lower_dimensional_acquisition_function, kpca): @@ -684,6 +742,11 @@ def __init__(self, max_information_loss=0.2, kernel_fit_strategy: KernelFitStrat self.__search_space = deepcopy(self._search_space) # the original search space self._pca = KernelTransform(dimensions=self.search_space.dim, minimize=self.minimize, X_initial_space=[], epsilon=max_information_loss, kernel_fit_strategy=kernel_fit_strategy, kernel_config=kernel_config, NN=NN) self._pca.enable_inverse_transform(self.__search_space.bounds) + self.out_solutions = 0 + self.ordered_container = KernelPCABO.MyOrderedContainer(self.minimize) + self.ratio_of_best_for_kernel_refit = 0.2 + self.acq_opt_time = 0 + self.mode_fit_time = 0 @staticmethod def _compute_bounds(kpca: MyKernelPCA, search_space: SearchSpace) -> List[float]: @@ -741,9 +804,42 @@ def xopt(self): self._xopt = self.data[np.where(self.data.fitness == fopt)[0][0]] return self._xopt + class MyAcqOptimizationListener(OptimizationListener): + def __init__(self): + self.fopts = [] + self.xopts = [] + + def on_optimum_found(self, fopt, xopt): + self.fopts.append(fopt) + self.xopts.append(xopt) + def ask(self, n_point: int = None) -> List[List[float]]: eprintf("Beginning of acq optimization") - return self._pca.inverse_transform(super().ask(n_point)) + listener = KernelPCABO.MyAcqOptimizationListener() + start = time.time() + X = super().ask(n_point, listener=listener) + self.acq_opt_time = time.time() - start + if len(X) > 1: + return X + inds = np.argsort(listener.fopts)[::-1] + first_point = None + bounds = self.__search_space.bounds + for point_number, ind in enumerate(inds): + new_point = self._pca.inverse_transform(listener.xopts[ind])[0] + if point_number == 0: + first_point = new_point + is_out = False + for i in range(len(bounds)): + if new_point[i] < bounds[i][0]: + new_point[i] = bounds[i][0] + is_out = True + if new_point[i] > bounds[i][1]: + new_point[i] = bounds[i][1] + is_out = True + if not is_out: + return [new_point] + self.out_solutions += 1 + return [first_point] def _run_experiment(self, bounds): eprintf('==================== Experiment =========================') @@ -764,6 +860,36 @@ def get_lower_space_dimensionality(self): def get_extracted_information(self): return self._pca.extracted_information + class MyOrderedContainer: + def __init__(self, minimize=True): + self.data = [] + self.minimize = minimize + + def __len__(self): + return len(self.data) + + def add_element(self, element): + if self.minimize: + bisect.insort_right(self.data, element) + else: + bisect.insort_left(self.data, element) + + def kth(self, k): + if not self.minimize: + return self.data(len(self.data) - k) + return self.data[k] + + def find_pos(self, element): + if not self.minimize: + return bisect.bisect_left(self.data, element) + return bisect.bisect_right(self.data, element) + + + def __is_promising_y(self, y): + pos = self.ordered_container.find_pos(y) + n = len(self.ordered_container) + return pos / n <= self.ratio_of_best_for_kernel_refit if n != 0 else True + def tell(self, new_X, new_y): self.logger.info(f"observing {len(new_X)} points:") for i, x in enumerate(new_X): @@ -780,6 +906,10 @@ def tell(self, new_X, new_y): new_X = self.post_eval_check(new_X) # remove NaN's self.data = self.data + new_X if hasattr(self, "data") else new_X + for y in new_y: + if self.__is_promising_y(y): + self._pca.set_kernel_refit_needed(True) + self.ordered_container.add_element(y) # re-fit the PCA transformation self._pca.set_initial_space_points(self.data) X = self._pca.fit_transform(np.array(self.data), self.data.fitness) @@ -799,27 +929,18 @@ def update_model(self, X: np.ndarray, y: np.ndarray): # is dynamic) dim = self._search_space.dim bounds = np.asarray(self._search_space.bounds) - self.model = GaussianProcess( - mean=trend.constant_trend(dim), - corr="matern", - thetaL=1e-3 * (bounds[:, 1] - bounds[:, 0]), - thetaU=1e3 * (bounds[:, 1] - bounds[:, 0]), - nugget=1e-6, - noise_estim=False, - optimizer="BFGS", - wait_iter=3, - random_start=max(10, dim), - likelihood="concentrated", - eval_budget=100 * dim, - ) + self._model = self.create_default_model(self._search_space, self.my_seed) + self.model = clone(self._model) _std = np.std(y) - y_ = y if np.isclose(_std, 0) else (y - np.mean(y)) / _std + y_ = y self.fmin, self.fmax = np.min(y_), np.max(y_) self.frange = self.fmax - self.fmin + start = time.time() self.model.fit(X, y_) + self.mode_fit_time = time.time() - start GLOBAL_CHARTS_SAVER.save_model(self.model, X, y_) y_hat = self.model.predict(X) diff --git a/bayes_optim/kpca.py b/bayes_optim/kpca.py index d235e14..df9a716 100644 --- a/bayes_optim/kpca.py +++ b/bayes_optim/kpca.py @@ -72,20 +72,19 @@ def enable_inverse_transform(self, bounds): def set_initial_space_points(self, X): self.X_initial_space = X - def __center_G(self, G): + def __center_G(self, G: np.ndarray) -> np.ndarray: ns = len(G) - line = [0.] * len(G) - for i in range(len(G)): - line[i] = sum(G[i]) - all_sum = sum(line) - return [[G[i][j] - line[i]/ns - line[j]/ns + all_sum/ns**2 for j in range(len(G[i]))] for i in range(len(G))] + O = np.ones((ns, ns)) + M = G @ O + return G - M / ns - M.T / ns + M.T.dot(O) / ns**2 @staticmethod - def __center_gram_line(g): - delta = sum(g) / len(g) - for i in range(len(g)): - g[i] -= delta - return g + def __center_g(g: np.ndarray, G: np.ndarray) -> np.ndarray: + n = len(g) + g = g.reshape(-1, 1) + t = G @ np.ones((n, 1)) + g = g - t / n - np.ones((n, n)) @ g / n + np.tile(np.sum(t) / n**2, (n, 1)) + return [x[0] for x in g] def __sorted_eig(self, X): values, vectors = np.linalg.eig(X) @@ -114,9 +113,9 @@ def l2(x): return ans @staticmethod - def f(X, good_subspace, k, V, z_star, bounds, w): + def f(X, good_subspace, k, V, z_star, bounds, G, w): x_ = MyKernelPCA.linear_combination(w, good_subspace) - g_star = MyKernelPCA.__center_gram_line([k(X[i], x_) for i in range(len(X))]) + g_star = MyKernelPCA.__center_g(np.array([k(X[i], x_) for i in range(len(X))]), G) bounds_ = np.atleast_2d(bounds) idx_lower = np.where(x_ < bounds_[:, 0])[0] idx_upper = np.where(x_ > bounds_[:, 1])[0] @@ -163,15 +162,12 @@ def fit(self, X_weighted: np.ndarray): self.kernel = create_kernel(self.kernel_config['kernel_name'], self.kernel_config['kernel_parameters']) self.X_weighted = X_weighted G = self.__compute_G(X_weighted) - G_centered = self.__center_G(G) - self.too_compressed = self.__is_too_compressed(G_centered) - eignValues, eignVectors = self.__sorted_eig(G_centered) - eignValues[eignValues < 0] = 0 - for e in eignValues: - if e < 0: - breakpoint() + self.G_centered = self.__center_G(G) + self.too_compressed = self.__is_too_compressed(self.G_centered) + eignValues, eignVectors = self.__sorted_eig(self.G_centered) eignValues = eignValues.view(np.float64) eignVectors = eignVectors.view(np.float64) + eignValues[eignValues < 0] = 0 eignValuesSum = sum(t for t in eignValues) s = 0 self.k = 0 @@ -186,7 +182,7 @@ def transform(self, X: np.ndarray): X_gram_lines = [] for x in X: g = self.__get_gram_line(self.X_initial_space, x) - g = self.__center_gram_line(g) + g = self.__center_g(g, self.G_centered) X_gram_lines.append(g) M = np.transpose(X_gram_lines) return np.transpose((self.V @ M)[:self.k]) @@ -209,7 +205,7 @@ def inverse_transform(self, Y: np.ndarray): raise ValueError(f"dimensionality of point is supposed to be {self.k}, but it is {len(y)}, the point {y}") good_subspace = self.get_good_subspace(y) # good_subspace, V1 = self.X_initial_space, self.V - partial_f = partial(MyKernelPCA.f, self.X_initial_space, good_subspace, self.kernel, self.V, y, self.bounds) + partial_f = partial(MyKernelPCA.f, self.X_initial_space, good_subspace, self.kernel, self.V, y, self.bounds, self.G_centered) initial_weights = np.zeros(len(good_subspace)) w0, fopt, *rest = optimize.fmin_bfgs(partial_f, initial_weights, full_output=True, disp=False) inversed = MyKernelPCA.linear_combination(w0, good_subspace) diff --git a/bayes_optim/mylogging.py b/bayes_optim/mylogging.py index a60cefa..9714982 100644 --- a/bayes_optim/mylogging.py +++ b/bayes_optim/mylogging.py @@ -1,6 +1,6 @@ import sys -import matplotlib as mpl -import matplotlib.pyplot as plt +# import matplotlib as mpl +# import matplotlib.pyplot as plt import os import math import statistics @@ -18,7 +18,7 @@ def save(self, fig, name): fig.savefig(self.path + name + '.' + self.extension) -MODE = Enum('EXECUTION MODE', 'DEBUG RELEASE') +MODE = Enum('EXECUTION MODE', 'DEBUG RELEASE DEBUG_CHARTS') MY_EXECUTION_MODE = MODE.RELEASE @@ -35,7 +35,7 @@ def fprintf(*args, **kwargs): class MyChartSaver: def __init__(self, folder_name, name, bounds, obj_function): - if MY_EXECUTION_MODE is not MODE.DEBUG: + if MY_EXECUTION_MODE is not MODE.DEBUG_CHARTS: return directory = './'+folder_name+'/' self.saver = PictureSaver(directory, "-"+name, "png") @@ -107,12 +107,12 @@ def __get_sorted_var_columns_pairs(X): return var_col def set_iter_number(self, iter_number): - if MY_EXECUTION_MODE is not MODE.DEBUG: + if MY_EXECUTION_MODE is not MODE.DEBUG_CHARTS: return self.iter_number = iter_number def create_figure_with_domain(self): - if MY_EXECUTION_MODE is not MODE.DEBUG: + if MY_EXECUTION_MODE is not MODE.DEBUG_CHARTS: return fig = plt.figure() plt.xlim(list(self.bounds[0])) @@ -121,27 +121,27 @@ def create_figure_with_domain(self): return fig def add_evaluated_points(self, iter_number, X): - if MY_EXECUTION_MODE is not MODE.DEBUG: + if MY_EXECUTION_MODE is not MODE.DEBUG_CHARTS: return plt.title(f'Iteration number {iter_number}, last point is ({X[-1][0]:.4f}, {X[-1][1]:.4f})') plt.scatter(X[:-1, 0], X[:-1, 1], c='black', marker='X') plt.scatter(X[-1][0], X[-1][1], c='red', marker='X') def add_mainfold(self, X_transformed, inverser): - if MY_EXECUTION_MODE is not MODE.DEBUG: + if MY_EXECUTION_MODE is not MODE.DEBUG_CHARTS: return X = inverser.inverse_transform(X_transformed) plt.scatter(X[:, 0], X[:, 1], c='green') def save(self, iter_number, X): - if MY_EXECUTION_MODE is not MODE.DEBUG: + if MY_EXECUTION_MODE is not MODE.DEBUG_CHARTS: return fig = self.create_figure_with_domain() self.add_evaluated_points(iter_number, X) self.saver.save(fig, f"DoE-{iter_number}") def save_with_manifold(self, iter_number, X, X_transformed, lb_f, ub_f, inverser): - if MY_EXECUTION_MODE is not MODE.DEBUG: + if MY_EXECUTION_MODE is not MODE.DEBUG_CHARTS: return if len(X_transformed[0]) > 1: return @@ -159,7 +159,7 @@ def save_with_manifold(self, iter_number, X, X_transformed, lb_f, ub_f, inverser self.saver.save(fig, f"DoE-{iter_number}") def save_feature_space(self, X, y): - if MY_EXECUTION_MODE is not MODE.DEBUG: + if MY_EXECUTION_MODE is not MODE.DEBUG_CHARTS: return fig = plt.figure() colors = MyChartSaver.__compute_colours_2(y) @@ -168,7 +168,7 @@ def save_feature_space(self, X, y): self.saver.save(fig, f"Feature-Space-{self.iter_number}") def save_variances(self, X): - if MY_EXECUTION_MODE is not MODE.DEBUG: + if MY_EXECUTION_MODE is not MODE.DEBUG_CHARTS: return fig = plt.figure() var = self.__get_sorted_var_columns_pairs(X) @@ -181,7 +181,7 @@ def save_variances(self, X): self.saver.save(fig, f'Variance-{self.iter_number}') def save_model(self, model, X, y_): - if MY_EXECUTION_MODE is not MODE.DEBUG: + if MY_EXECUTION_MODE is not MODE.DEBUG_CHARTS: return if len(X[0]) > 1: fig = plt.figure() @@ -194,7 +194,7 @@ def save_model(self, model, X, y_): X_ = np.concatenate((X_, np.array([x[0] for x in X])), axis=0) X_.sort() Y_predicted = model.predict(np.array([[x] for x in X_])) - Y_predicted = [y[0] for y in Y_predicted] + # Y_predicted = [y[0] for y in Y_predicted] plt.plot(X_, Y_predicted) plt.title(f'Model function after iteration {self.iter_number}') plt.scatter(X, y_, c='red') diff --git a/bayes_optim/search_space/samplers.py b/bayes_optim/search_space/samplers.py index f56f95e..8d28c7b 100644 --- a/bayes_optim/search_space/samplers.py +++ b/bayes_optim/search_space/samplers.py @@ -10,7 +10,7 @@ __author__ = "Hao Wang" -warnings.filterwarnings("error") +# warnings.filterwarnings("error") class SCMC: diff --git a/bayes_optim/search_space/search_space.py b/bayes_optim/search_space/search_space.py index a01926f..3180c87 100644 --- a/bayes_optim/search_space/search_space.py +++ b/bayes_optim/search_space/search_space.py @@ -189,9 +189,7 @@ def _set_structure(self, structure: Union[dict, List[Node]] = None): key = var.conditions["vars"][0] if key not in var.conditions["vars"]: raise ValueError(f"variable {var} not in {self}") - _structure.setdefault(key, []).append( - {"name": var.name, "condition": var.conditions["string"]} - ) + _structure.setdefault(key, []).append({"name": var.name, "condition": var.conditions["string"]}) self.structure = [t.remove(self.var_name, invert=True) for t in Node.from_dict(_structure)] self.structure = [t for t in self.structure if t] @@ -223,9 +221,7 @@ def _set_index(self): self.__dict__[attr_mask] = mask self.__dict__[attr_id] = np.nonzero(mask)[0] - self.categorical_id = np.r_[ - self.discrete_id, self.ordinal_id, self.bool_id, self.subset_id - ] + self.categorical_id = np.r_[self.discrete_id, self.ordinal_id, self.bool_id, self.subset_id] self.categorical_mask = np.bitwise_or( self.bool_mask, np.bitwise_or(self.discrete_mask, self.ordinal_mask, self.subset_mask), @@ -236,9 +232,7 @@ def _set_levels(self): """Set categorical levels for all nominal variables""" if self.dim > 0: self.levels = ( - {i: self._bounds[i] for i in self.categorical_id} - if len(self.categorical_id) > 0 - else {} + {i: self._bounds[i] for i in self.categorical_id} if len(self.categorical_id) > 0 else {} ) def __getitem__(self, index) -> Union[SearchSpace, Variable]: @@ -321,9 +315,7 @@ def __add__(self, space) -> SearchSpace: random_seed = self.random_seed if self.random_seed else space.random_seed data = deepcopy(self.data) + space.data if hasattr(self, "structure"): - structure = [t.deepcopy() for t in self.structure] + [ - t.deepcopy() for t in space.structure - ] + structure = [t.deepcopy() for t in self.structure] + [t.deepcopy() for t in space.structure] else: structure = {} return SearchSpace(data, random_seed, structure) @@ -397,9 +389,7 @@ def __imul__(self, N: int) -> SearchSpace: SearchSpace `self` """ - self._set_data( - self.data + [deepcopy(var) for _ in range(max(1, int(N - 1))) for var in self.data] - ) + self._set_data(self.data + [deepcopy(var) for _ in range(max(1, int(N - 1))) for var in self.data]) return self def __repr__(self): @@ -536,14 +526,12 @@ def sample( try: # NOTE: equality constraints are converted to an epsilon-tude around the # corresponding manifold - idx_h = ( - list(map(lambda x: all(np.isclose(np.abs(h(x)), 0, atol=tol)), S)) - if h - else [True] * n - ) - idx_g = list(map(lambda x: np.all(np.asarray(g(x)) <= 0), S)) if g else [True] * n - idx = np.bitwise_and(idx_h, idx_g) - S = S[idx, :] + if h: + idx = list(map(lambda x: all(np.isclose(np.abs(h(x)), 0, atol=tol)), S)) + S = S[idx, :] + if g: + idx = list(map(lambda x: np.all(np.asarray(g(x)) <= 0), S)) + S = S[idx, :] except Exception as e: raise ConstraintEvaluationError(S, str(e)) from None @@ -662,8 +650,7 @@ def from_dict(cls, param: dict) -> SearchSpace: ] elif v["type"] in ["i", "int", "integer"]: # integer-valued parameter _vars = [ - Integer(bounds, name=k, default_value=default_value, step=v.pop("step", 1)) - for _ in N + Integer(bounds, name=k, default_value=default_value, step=v.pop("step", 1)) for _ in N ] elif v["type"] in ["o", "ordinal"]: # ordinal parameter _vars = [Ordinal(bounds, name=k, default_value=default_value) for _ in N] diff --git a/bayes_optim/surrogate/gaussian_process/__init__.py b/bayes_optim/surrogate/gaussian_process/__init__.py index dbf9559..563a055 100644 --- a/bayes_optim/surrogate/gaussian_process/__init__.py +++ b/bayes_optim/surrogate/gaussian_process/__init__.py @@ -1,230 +1,390 @@ from __future__ import annotations import warnings -from functools import partial -from typing import List, Tuple, Union import numpy as np -from scipy.linalg import solve_triangular -from scipy.spatial.distance import cdist +import scipy +import sklearn +from scipy.linalg import cho_solve, solve_triangular from sklearn.gaussian_process import GaussianProcessRegressor as sk_gpr -from sklearn.gaussian_process.kernels import RBF, Matern, WhiteKernel +from sklearn.utils import check_array -from ...search_space import RealSpace, SearchSpace -from ...utils import safe_divide from . import trend -from .gpr import GaussianProcess +from .kernels import RBF, ConstantKernel, Matern, Sum, WhiteKernel from .utils import MSLL, SMSE -# from sklearn.model_selection import cross_val_score - - warnings.filterwarnings("ignore") __author__ = "Hao Wang" -__all__ = ["SMSE", "MSLL", "GaussianProcess", "trend"] - - -class GaussianProcessSklearn: - """Wrapper for sklearn's GPR model""" +__all__ = ["SMSE", "MSLL", "GaussianProcess", "trend", "RBF", "ConstantKernel", "Matern"] + + +def _param_for_white_kernel_in_Sum(kernel, kernel_str=""): + """ + Check if a WhiteKernel exists in a Sum Kernel + and if it does return the corresponding key in + `kernel.get_params()` + """ + if kernel_str != "": + kernel_str = kernel_str + "__" + + if isinstance(kernel, Sum): + for param, child in kernel.get_params(deep=False).items(): + if isinstance(child, WhiteKernel): + return True, kernel_str + param + else: + present, child_str = _param_for_white_kernel_in_Sum(child, kernel_str + param) + if present: + return True, child_str + + return False, "_" + + +class GaussianProcess(sk_gpr): + """ + GaussianProcessRegressor that allows noise tunability. + + The implementation is based on Algorithm 2.1 of Gaussian Processes + for Machine Learning (GPML) by Rasmussen and Williams. + + In addition to standard scikit-learn estimator API, + GaussianProcessRegressor: + + * allows prediction without prior fitting (based on the GP prior); + * provides an additional method sample_y(X), which evaluates samples + drawn from the GPR (prior or posterior) at given inputs; + * exposes a method log_marginal_likelihood(theta), which can be used + externally for other ways of selecting hyperparameters, e.g., via + Markov chain Monte Carlo. + + Parameters + ---------- + kernel : kernel object + The kernel specifying the covariance function of the GP. If None is + passed, the kernel "1.0 * RBF(1.0)" is used as default. Note that + the kernel's hyperparameters are optimized during fitting. + + alpha : float or array-like, optional (default: 1e-10) + Value added to the diagonal of the kernel matrix during fitting. + Larger values correspond to increased noise level in the observations + and reduce potential numerical issue during fitting. If an array is + passed, it must have the same number of entries as the data used for + fitting and is used as datapoint-dependent noise level. Note that this + is equivalent to adding a WhiteKernel with c=alpha. Allowing to specify + the noise level directly as a parameter is mainly for convenience and + for consistency with Ridge. + + optimizer : string or callable, optional (default: "fmin_l_bfgs_b") + Can either be one of the internally supported optimizers for optimizing + the kernel's parameters, specified by a string, or an externally + defined optimizer passed as a callable. If a callable is passed, it + must have the signature:: + + def optimizer(obj_func, initial_theta, bounds): + # * 'obj_func' is the objective function to be maximized, which + # takes the hyperparameters theta as parameter and an + # optional flag eval_gradient, which determines if the + # gradient is returned additionally to the function value + # * 'initial_theta': the initial value for theta, which can be + # used by local optimizers + # * 'bounds': the bounds on the values of theta + .... + # Returned are the best found hyperparameters theta and + # the corresponding value of the target function. + return theta_opt, func_min + + Per default, the 'fmin_l_bfgs_b' algorithm from scipy.optimize + is used. If None is passed, the kernel's parameters are kept fixed. + Available internal optimizers are:: + + 'fmin_l_bfgs_b' + + n_restarts_optimizer : int, optional (default: 0) + The number of restarts of the optimizer for finding the kernel's + parameters which maximize the log-marginal likelihood. The first run + of the optimizer is performed from the kernel's initial parameters, + the remaining ones (if any) from thetas sampled log-uniform randomly + from the space of allowed theta-values. If greater than 0, all bounds + must be finite. Note that n_restarts_optimizer == 0 implies that one + run is performed. + + normalize_y : boolean, optional (default: False) + Whether the target values y are normalized, i.e., the mean of the + observed target values become zero. This parameter should be set to + True if the target values' mean is expected to differ considerable from + zero. When enabled, the normalization effectively modifies the GP's + prior based on the data, which contradicts the likelihood principle; + normalization is thus disabled per default. + + copy_X_train : bool, optional (default: True) + If True, a persistent copy of the training data is stored in the + object. Otherwise, just a reference to the training data is stored, + which might cause predictions to change if the data is modified + externally. + + random_state : integer or numpy.RandomState, optional + The generator used to initialize the centers. If an integer is + given, it fixes the seed. Defaults to the global numpy random + number generator. + + noise : string, "gaussian", optional + If set to "gaussian", then it is assumed that `y` is a noisy + estimate of `f(x)` where the noise is gaussian. + + Attributes + ---------- + X_train_ : array-like, shape = (n_samples, n_features) + Feature values in training data (also required for prediction) + + y_train_ : array-like, shape = (n_samples, [n_output_dims]) + Target values in training data (also required for prediction) + + kernel_ kernel object + The kernel used for prediction. The structure of the kernel is the + same as the one passed as parameter but with optimized hyperparameters + + L_ : array-like, shape = (n_samples, n_samples) + Lower-triangular Cholesky decomposition of the kernel in ``X_train_`` + + alpha_ : array-like, shape = (n_samples,) + Dual coefficients of training data points in kernel space + + log_marginal_likelihood_value_ : float + The log-marginal-likelihood of ``self.kernel_.theta`` + + noise_ : float + Estimate of the gaussian noise. Useful only when noise is set to + "gaussian". + """ def __init__( self, - domain: SearchSpace, - codomain: SearchSpace = None, - n_obj: int = 1, - optimizer: str = "fmin_l_bfgs_b", - n_restarts_optimizer: int = 0, - normalize_y: bool = True, - length_scale_bounds: Union[Tuple[float, float], List[Tuple[float, float]]] = (1e-10, 1e10), + kernel=None, + alpha=1e-10, + optimizer="fmin_l_bfgs_b", + n_restarts_optimizer=0, + normalize_y=False, + copy_X_train=True, + random_state=None, + noise=None, + lb=None, + ub=None, ): - assert isinstance(domain, RealSpace) - assert codomain is None or isinstance(codomain, RealSpace) - self.domain = domain - # TODO: this is not used for now, which should implement restriction on the co-domain - self.codomain = codomain - self.dim = self.domain.dim - self.n_obj = n_obj + self.noise = noise self.is_fitted = False - - self._set_length_scale_bounds(length_scale_bounds, np.atleast_2d(self.domain.bounds)) - self._set_kernels() - - if n_restarts_optimizer == 0: - n_restarts_optimizer = int(3 * self.dim) - - self._gpr_cls = partial( - sk_gpr, - normalize_y=normalize_y, - alpha=1e-8, + self.lb = lb + self.ub = ub + super().__init__( + kernel=kernel, + alpha=alpha, optimizer=optimizer, n_restarts_optimizer=n_restarts_optimizer, + normalize_y=normalize_y, + copy_X_train=copy_X_train, + random_state=random_state, ) - def _set_length_scale_bounds(self, length_scale_bounds, bounds) -> np.ndarray: - length_scale_bounds = np.atleast_2d(length_scale_bounds) - if len(length_scale_bounds) == 1: - length_scale_bounds = np.repeat(length_scale_bounds, self.dim, axis=0) - self.length_scale_bounds = length_scale_bounds * (bounds[:, [1]] - bounds[:, [0]]) - - def _set_kernels(self): - kernel = [ - Matern( - length_scale=np.ones(self.dim), - length_scale_bounds=self.length_scale_bounds, - nu=1.5, - ), - Matern( - length_scale=np.ones(self.dim), - length_scale_bounds=self.length_scale_bounds, - nu=2.5, - ), - RBF( - length_scale=np.ones(self.dim), - length_scale_bounds=self.length_scale_bounds, - ), - ] - self._kernel = [ - 1.0 * k + WhiteKernel(noise_level=0.1, noise_level_bounds=(1e-15, 1e15)) for k in kernel - ] - - def _check_dims(self, X: np.ndarray, y: np.ndarray): - """check if the input/output dimensions are consistent with dimensions of the domain/co-domain""" - if self.dim != X.shape[1]: - raise ValueError( - f"X has {X.shape[1]} variables, which does not match the dimension of the domain {self.dim}" - ) - if len(y.shape) == 1: - if self.n_obj != 1: - raise ValueError( - f"y has one variable, which does not match " - f"the dimension of the co-domain {self.n_obj}" - ) - else: - if self.n_obj != y.shape[1]: - raise ValueError( - f"y has {y.shape[1]} variables, which does not match " - f"the dimension of the co-domain {self.n_obj}" - ) - - def fit(self, X: np.ndarray, y: np.ndarray) -> GaussianProcess: + def fit(self, X, y): """Fit Gaussian process regression model. Parameters ---------- - X : array-like of shape (n_samples, n_features) or list of object - Feature vectors or other representations of training data. - y : array-like of shape (n_samples,) or (n_samples, n_targets) - Target values. + X : array-like, shape = (n_samples, n_features) + Training data + + y : array-like, shape = (n_samples, [n_output_dims]) + Target values Returns ------- - self : object - `GaussianProcess` class instance. + self + Returns an instance of self. """ - X = np.array(X.tolist()) - self._check_dims(X, y) - self._gprs = list() - # cv = min(len(X), 3) - - for i in range(self.n_obj): - # TODO: to verify this cross-validation procedure - # score = -np.inf - # kernel = None - # for k in self._kernel: - # scores = cross_val_score(self._gpr_cls(kernel=k), X, y[:, i], cv=cv, scoring="r2", n_jobs=cv) - # score_ = np.nanmean(scores) - # if score_ > score: - # score = score_ - # kernel = k - - gpr = self._gpr_cls(kernel=self._kernel[0]).fit(X, y if y.ndim == 1 else y[:, i]) - setattr(gpr, "_K_inv", None) - self._gprs.append(gpr) + if isinstance(self.noise, str) and self.noise != "gaussian": + raise ValueError("expected noise to be 'gaussian', got %s" % self.noise) - self.is_fitted = True - return self - - def predict(self, X: np.ndarray, eval_MSE: bool = False) -> np.ndarray: - n_samples = X.shape[0] - y_hat = np.zeros((n_samples, self.n_obj)) - if eval_MSE: - std_hat = np.zeros((n_samples, self.n_obj)) + if self.kernel is None: + self.kernel = ConstantKernel(1.0, constant_value_bounds="fixed") * RBF( + 1.0, length_scale_bounds="fixed" + ) + if self.noise == "gaussian": + self.kernel = self.kernel + WhiteKernel() + elif self.noise: + self.kernel = self.kernel + WhiteKernel(noise_level=self.noise, noise_level_bounds="fixed") + + X = np.atleast_2d(X.tolist()) + if self.lb is not None and self.ub is not None: + X = (X - self.lb) / (self.ub - self.lb) + + super().fit(X, y.ravel()) + self.noise_ = None + + if self.noise: + # The noise component of this kernel should be set to zero + # while estimating K(X_test, X_test) + # Note that the term K(X, X) should include the noise but + # this (K(X, X))^-1y is precomputed as the attribute `alpha_`. + # (Notice the underscore). + # This has been described in Eq 2.24 of + # http://www.gaussianprocess.org/gpml/chapters/RW2.pdf + # Hence this hack + if isinstance(self.kernel_, WhiteKernel): + self.kernel_.set_params(noise_level=0.0) - for i, gp in enumerate(self._gprs): - out = gp.predict(X, return_std=eval_MSE) - if eval_MSE: - y_hat[:, i], std_hat[:, i] = out[0], out[1] ** 2 else: - y_hat[:, i] = out + white_present, white_param = _param_for_white_kernel_in_Sum(self.kernel_) + + # This should always be true. Just in case. + if white_present: + noise_kernel = self.kernel_.get_params()[white_param] + self.noise_ = noise_kernel.noise_level + self.kernel_.set_params(**{white_param: WhiteKernel(noise_level=0.0)}) + + # Precompute arrays needed at prediction + L_inv = solve_triangular(self.L_.T, np.eye(self.L_.shape[0])) + self.K_inv_ = L_inv.dot(L_inv.T) + + # Fix deprecation warning #462 + if sklearn.__version__ >= "0.23": + self.y_train_std_ = self._y_train_std + self.y_train_mean_ = self._y_train_mean + elif sklearn.__version__ >= "0.19": + self.y_train_mean_ = self._y_train_mean + self.y_train_std_ = 1 + else: + self.y_train_mean_ = self.y_train_mean + self.y_train_std_ = 1 - return (y_hat, std_hat) if eval_MSE else y_hat + self.is_fitted = True + return self - def gradient(self, X: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: - """compute the gradient of GPR's mean and variance w.r.t. the input points + def predict(self, X, return_std=False, return_cov=False, return_mean_grad=False, return_std_grad=False): + """ + Predict output for X. + + In addition to the mean of the predictive distribution, also its + standard deviation (return_std=True) or covariance (return_cov=True), + the gradient of the mean and the standard-deviation with respect to X + can be optionally provided. Parameters ---------- - X : np.ndarray - the point at which gradients should be computed + X : array-like, shape = (n_samples, n_features) + Query points where the GP is evaluated. + + return_std : bool, default: False + If True, the standard-deviation of the predictive distribution at + the query points is returned along with the mean. + + return_cov : bool, default: False + If True, the covariance of the joint predictive distribution at + the query points is returned along with the mean. + + return_mean_grad : bool, default: False + Whether or not to return the gradient of the mean. + Only valid when X is a single point. + + return_std_grad : bool, default: False + Whether or not to return the gradient of the std. + Only valid when X is a single point. Returns ------- - Tuple[np.ndarray, np.ndarray] - (grad of mean, grad of variance) - """ - n_samples = X.shape[0] - mean_dx = np.zeros((self.n_obj, n_samples, self.dim)) - var_dx = np.zeros((self.n_obj, n_samples, self.dim)) + y_mean : array, shape = (n_samples, [n_output_dims]) + Mean of predictive distribution a query points - for i, gp in enumerate(self._gprs): - scale = getattr(gp, "_y_train_std") - k = np.expand_dims(gp.kernel_(X, gp.X_train_), 2) # shape (n_samples, N_train, 1) - k_dx = self.kernel_dx(gp, X) # shape (n_samples, dim, N_train) + y_std : array, shape = (n_samples,), optional + Standard deviation of predictive distribution at query points. + Only returned when return_std is True. - if getattr(gp, "_K_inv") is None: - L_inv = solve_triangular(gp.L_.T, np.eye(gp.L_.shape[0])) - setattr(gp, "_K_inv", L_inv.dot(L_inv.T)) + y_cov : array, shape = (n_samples, n_samples), optional + Covariance of joint predictive distribution a query points. + Only returned when return_cov is True. - mean_dx[i, ...] = scale * k_dx @ gp.alpha_ - var_dx[i, ...] = -2.0 * scale**2 * np.squeeze(k_dx @ getattr(gp, "_K_inv") @ k) - return mean_dx, var_dx + y_mean_grad : shape = (n_samples, n_features) + The gradient of the predicted mean - def kernel_dx(self, gp: sk_gpr, X: np.ndarray) -> np.ndarray: - """compute the gradient of the kernel function w.r.t. the input points + y_std_grad : shape = (n_samples, n_features) + The gradient of the predicted std. + """ + if return_std and return_cov: + raise RuntimeError( + "Not returning standard deviation of predictions when " "returning full covariance." + ) - Parameters - ---------- - gp : sk_gpr - a fitted `GaussianProcessRegressor` instance - X : np.ndarray - the location at which the gradient will be computed + if return_std_grad and not return_std: + raise ValueError("Not returning std_gradient without returning " "the std.") + + X = np.atleast_2d(X.tolist()) + range_ = self.ub - self.lb + if self.lb is not None and self.ub is not None: + X = (X - self.lb) / range_ + + X = check_array(X) + if X.shape[0] != 1 and (return_mean_grad or return_std_grad): + raise ValueError("Not implemented for n_samples > 1") + + if not hasattr(self, "X_train_"): # Not fit; predict based on GP prior + y_mean = np.zeros(X.shape[0]) + if return_cov: + y_cov = self.kernel(X) + return y_mean, y_cov + elif return_std: + y_var = self.kernel.diag(X) + return y_mean, np.sqrt(y_var) + else: + return y_mean + + else: # Predict based on GP posterior + K_trans = self.kernel_(X, self.X_train_) + y_mean = K_trans.dot(self.alpha_) # Line 4 (y_mean = f_star) + # undo normalisation + y_mean = self.y_train_std_ * y_mean + self.y_train_mean_ + + if return_cov: + v = cho_solve((self.L_, True), K_trans.T) # Line 5 + y_cov = self.kernel_(X) - K_trans.dot(v) # Line 6 + # undo normalisation + y_cov = y_cov * self.y_train_std_**2 + return y_mean, y_cov + + elif return_std: + K_inv = self.K_inv_ + + # Compute variance of predictive distribution + y_var = self.kernel_.diag(X) + y_var -= np.einsum("ki,kj,ij->k", K_trans, K_trans, K_inv) + + # Check if any of the variances is negative because of + # numerical issues. If yes: set the variance to 0. + y_var_negative = y_var < 0 + if np.any(y_var_negative): + warnings.warn("Predicted variances smaller than 0. " "Setting those variances to 0.") + y_var[y_var_negative] = 0.0 + # undo normalisation + y_var = y_var * self.y_train_std_**2 + y_std = np.sqrt(y_var) + + if return_mean_grad: + grad = self.kernel_.gradient_x(X[0], self.X_train_) + grad_mean = np.dot(grad.T, self.alpha_) + # undo normalisation + grad_mean = grad_mean * self.y_train_std_ + if return_std_grad: + grad_std = np.zeros(X.shape[1]) + if not np.allclose(y_std, grad_std): + grad_std = -np.dot(K_trans, np.dot(K_inv, grad))[0] / y_std + # undo normalisation + grad_std = grad_std * self.y_train_std_**2 + return y_mean, y_std, grad_mean * range_, grad_std * range_ + + if return_std: + return y_mean, y_std, grad_mean * range_ + else: + return y_mean, grad_mean * range_ - Returns - ------- - np.ndarray - the gradient of shape (n_samples, dim, n_train) - """ - sigma2 = np.exp(gp.kernel_.k1.k1.theta) - length_scale = np.exp(gp.kernel_.k1.k2.theta) - kernel = gp.kernel_.k1.k2 - - d = np.expand_dims( - cdist(X / length_scale, gp.X_train_ / length_scale), 1 - ) # shape (n_samples, 1, n_train) - X_ = np.expand_dims(X, 2) # shape (n_samples, dim, 1) - X_train_ = np.expand_dims(gp.X_train_.T, 0) # shape (1, dim, n_train) - # shape (n_samples, dim, n_train) - dd = safe_divide(X_ - X_train_, d * np.expand_dims(length_scale**2, 1)) - - if isinstance(kernel, Matern): - nu = kernel.nu - if nu == 0.5: - g = -sigma2 * np.exp(-d) * dd - elif nu == 1.5: - g = -3 * sigma2 * np.exp(-np.sqrt(3) * d) * d * dd - elif nu == 2.5: - g = -5.0 / 3 * sigma2 * np.exp(-np.sqrt(5) * d) * (1 + np.sqrt(5) * d) * d * dd - elif isinstance(kernel, RBF): - g = -sigma2 * np.exp(-0.5 * d**2) * dd - return g + else: + if return_std: + return y_mean, y_std + else: + return y_mean diff --git a/bayes_optim/surrogate/gaussian_process/gpr.py b/bayes_optim/surrogate/gaussian_process/gpr.py index 54563d5..16a78ec 100755 --- a/bayes_optim/surrogate/gaussian_process/gpr.py +++ b/bayes_optim/surrogate/gaussian_process/gpr.py @@ -11,12 +11,13 @@ from sklearn.utils import check_array, check_random_state, check_X_y from .cma_es import cma_es -from .kernel import absolute_exponential, cubic, generalized_exponential, matern, squared_exponential +from .kernel_old import (absolute_exponential, cubic, generalized_exponential, + matern, squared_exponential) from .trend import BasisExpansionTrend, NonparametricTrend, constant_trend MACHINE_EPSILON = np.finfo(np.double).eps -warnings.filterwarnings("error") +# warnings.filterwarnings("error") def l1_cross_distances(X): @@ -524,7 +525,9 @@ def predict(self, X, eval_MSE=False, batch_size=None): for k in range(max(1, n_eval // batch_size)): batch_from = k * batch_size batch_to = min([(k + 1) * batch_size + 1, n_eval + 1]) - y[batch_from:batch_to] = self.predict(X[batch_from:batch_to], eval_MSE=eval_MSE, batch_size=None)[:,0] + y[batch_from:batch_to] = self.predict( + X[batch_from:batch_to], eval_MSE=eval_MSE, batch_size=None + )[:, 0] return y def gradient(self, x): diff --git a/bayes_optim/surrogate/gaussian_process/kernel.py b/bayes_optim/surrogate/gaussian_process/kernel_old.py similarity index 100% rename from bayes_optim/surrogate/gaussian_process/kernel.py rename to bayes_optim/surrogate/gaussian_process/kernel_old.py diff --git a/bayes_optim/surrogate/gaussian_process/kernels.py b/bayes_optim/surrogate/gaussian_process/kernels.py new file mode 100644 index 0000000..a10acf7 --- /dev/null +++ b/bayes_optim/surrogate/gaussian_process/kernels.py @@ -0,0 +1,395 @@ +from math import sqrt + +import numpy as np +from sklearn.gaussian_process.kernels import RBF as sk_RBF +from sklearn.gaussian_process.kernels import ConstantKernel as sk_ConstantKernel +from sklearn.gaussian_process.kernels import DotProduct as sk_DotProduct +from sklearn.gaussian_process.kernels import Exponentiation as sk_Exponentiation +from sklearn.gaussian_process.kernels import ExpSineSquared as sk_ExpSineSquared +from sklearn.gaussian_process.kernels import Hyperparameter +from sklearn.gaussian_process.kernels import Kernel as sk_Kernel +from sklearn.gaussian_process.kernels import Matern as sk_Matern +from sklearn.gaussian_process.kernels import NormalizedKernelMixin as sk_NormalizedKernelMixin +from sklearn.gaussian_process.kernels import Product as sk_Product +from sklearn.gaussian_process.kernels import RationalQuadratic as sk_RationalQuadratic +from sklearn.gaussian_process.kernels import StationaryKernelMixin as sk_StationaryKernelMixin +from sklearn.gaussian_process.kernels import Sum as sk_Sum +from sklearn.gaussian_process.kernels import WhiteKernel as sk_WhiteKernel + + +class Kernel(sk_Kernel): + """ + Base class for skopt.gaussian_process kernels. + Supports computation of the gradient of the kernel with respect to X + """ + + def __add__(self, b): + if not isinstance(b, Kernel): + return Sum(self, ConstantKernel(b)) + return Sum(self, b) + + def __radd__(self, b): + if not isinstance(b, Kernel): + return Sum(ConstantKernel(b), self) + return Sum(b, self) + + def __mul__(self, b): + if not isinstance(b, Kernel): + return Product(self, ConstantKernel(b)) + return Product(self, b) + + def __rmul__(self, b): + if not isinstance(b, Kernel): + return Product(ConstantKernel(b), self) + return Product(b, self) + + def __pow__(self, b): + return Exponentiation(self, b) + + def gradient_x(self, x, X_train): + """ + Computes gradient of K(x, X_train) with respect to x + + Parameters + ---------- + x: array-like, shape=(n_features,) + A single test point. + + X_train: array-like, shape=(n_samples, n_features) + Training data used to fit the gaussian process. + + Returns + ------- + gradient_x: array-like, shape=(n_samples, n_features) + Gradient of K(x, X_train) with respect to x. + """ + raise NotImplementedError + + +class RBF(Kernel, sk_RBF): + def gradient_x(self, x, X_train): + # diff = (x - X) / length_scale + # size = (n_train_samples, n_dimensions) + x = np.asarray(x) + X_train = np.asarray(X_train) + + length_scale = np.asarray(self.length_scale) + diff = x - X_train + diff /= length_scale + + # e = -exp(0.5 * \sum_{i=1}^d (diff ** 2)) + # size = (n_train_samples, 1) + exp_diff_squared = np.sum(diff**2, axis=1) + exp_diff_squared *= -0.5 + exp_diff_squared = np.exp(exp_diff_squared, exp_diff_squared) + exp_diff_squared = np.expand_dims(exp_diff_squared, axis=1) + exp_diff_squared *= -1 + + # gradient = (e * diff) / length_scale + gradient = exp_diff_squared * diff + gradient /= length_scale + return gradient + + +class Matern(Kernel, sk_Matern): + def gradient_x(self, x, X_train): + x = np.asarray(x) + X_train = np.asarray(X_train) + length_scale = np.asarray(self.length_scale) + + # diff = (x - X_train) / length_scale + # size = (n_train_samples, n_dimensions) + diff = x - X_train + diff /= length_scale + + # dist_sq = \sum_{i=1}^d (diff ^ 2) + # dist = sqrt(dist_sq) + # size = (n_train_samples,) + dist_sq = np.sum(diff**2, axis=1) + dist = np.sqrt(dist_sq) + + if self.nu == 0.5: + # e = -np.exp(-dist) / dist + # size = (n_train_samples, 1) + scaled_exp_dist = -dist + scaled_exp_dist = np.exp(scaled_exp_dist, scaled_exp_dist) + scaled_exp_dist *= -1 + + # grad = (e * diff) / length_scale + # For all i in [0, D) if x_i equals y_i. + # 1. e -> -1 + # 2. (x_i - y_i) / \sum_{j=1}^D (x_i - y_i)**2 approaches 1. + # Hence the gradient when for all i in [0, D), + # x_i equals y_i is -1 / length_scale[i]. + gradient = -np.ones((X_train.shape[0], x.shape[0])) + mask = dist != 0.0 + scaled_exp_dist[mask] /= dist[mask] + scaled_exp_dist = np.expand_dims(scaled_exp_dist, axis=1) + gradient[mask] = scaled_exp_dist[mask] * diff[mask] + gradient /= length_scale + return gradient + + elif self.nu == 1.5: + # grad(fg) = f'g + fg' + # where f = 1 + sqrt(3) * euclidean((X - Y) / length_scale) + # where g = exp(-sqrt(3) * euclidean((X - Y) / length_scale)) + sqrt_3_dist = sqrt(3) * dist + f = np.expand_dims(1 + sqrt_3_dist, axis=1) + + # When all of x_i equals y_i, f equals 1.0, (1 - f) equals + # zero, hence from below + # f * g_grad + g * f_grad (where g_grad = -g * f_grad) + # -f * g * f_grad + g * f_grad + # g * f_grad * (1 - f) equals zero. + # sqrt_3_by_dist can be set to any value since diff equals + # zero for this corner case. + sqrt_3_by_dist = np.zeros_like(dist) + nzd = dist != 0.0 + sqrt_3_by_dist[nzd] = sqrt(3) / dist[nzd] + dist_expand = np.expand_dims(sqrt_3_by_dist, axis=1) + + f_grad = diff / length_scale + f_grad *= dist_expand + + sqrt_3_dist *= -1 + exp_sqrt_3_dist = np.exp(sqrt_3_dist, sqrt_3_dist) + g = np.expand_dims(exp_sqrt_3_dist, axis=1) + g_grad = -g * f_grad + + # f * g_grad + g * f_grad (where g_grad = -g * f_grad) + f *= -1 + f += 1 + return g * f_grad * f + + elif self.nu == 2.5: + # grad(fg) = f'g + fg' + # where f = (1 + sqrt(5) * euclidean((X - Y) / length_scale) + + # 5 / 3 * sqeuclidean((X - Y) / length_scale)) + # where g = exp(-sqrt(5) * euclidean((X - Y) / length_scale)) + sqrt_5_dist = sqrt(5) * dist + f2 = (5.0 / 3.0) * dist_sq + f2 += sqrt_5_dist + f2 += 1 + f = np.expand_dims(f2, axis=1) + + # For i in [0, D) if x_i equals y_i + # f = 1 and g = 1 + # Grad = f'g + fg' = f' + g' + # f' = f_1' + f_2' + # Also g' = -g * f1' + # Grad = f'g - g * f1' * f + # Grad = g * (f' - f1' * f) + # Grad = f' - f1' + # Grad = f2' which equals zero when x = y + # Since for this corner case, diff equals zero, + # dist can be set to anything. + nzd_mask = dist != 0.0 + nzd = dist[nzd_mask] + dist[nzd_mask] = np.reciprocal(nzd, nzd) + + dist *= sqrt(5) + dist = np.expand_dims(dist, axis=1) + diff /= length_scale + f1_grad = dist * diff + f2_grad = (10.0 / 3.0) * diff + f_grad = f1_grad + f2_grad + + sqrt_5_dist *= -1 + g = np.exp(sqrt_5_dist, sqrt_5_dist) + g = np.expand_dims(g, axis=1) + g_grad = -g * f1_grad + return f * g_grad + g * f_grad + + +class RationalQuadratic(Kernel, sk_RationalQuadratic): + def gradient_x(self, x, X_train): + x = np.asarray(x) + X_train = np.asarray(X_train) + alpha = self.alpha + length_scale = self.length_scale + + # diff = (x - X_train) / length_scale + # size = (n_train_samples, n_dimensions) + diff = x - X_train + diff /= length_scale + + # dist = -(1 + (\sum_{i=1}^d (diff^2) / (2 * alpha)))** (-alpha - 1) + # size = (n_train_samples,) + scaled_dist = np.sum(diff**2, axis=1) + scaled_dist /= 2 * self.alpha + scaled_dist += 1 + scaled_dist **= -alpha - 1 + scaled_dist *= -1 + + scaled_dist = np.expand_dims(scaled_dist, axis=1) + diff_by_ls = diff / length_scale + return scaled_dist * diff_by_ls + + +class ExpSineSquared(Kernel, sk_ExpSineSquared): + def gradient_x(self, x, X_train): + x = np.asarray(x) + X_train = np.asarray(X_train) + length_scale = self.length_scale + periodicity = self.periodicity + + diff = x - X_train + sq_dist = np.sum(diff**2, axis=1) + dist = np.sqrt(sq_dist) + + pi_by_period = dist * (np.pi / periodicity) + sine = np.sin(pi_by_period) / length_scale + sine_squared = -2 * sine**2 + exp_sine_squared = np.exp(sine_squared) + + grad_wrt_exp = -2 * np.sin(2 * pi_by_period) / length_scale**2 + + # When x_i -> y_i for all i in [0, D), the gradient becomes + # zero. See https://github.com/MechCoder/Notebooks/blob/master/ExpSineSquared%20Kernel%20gradient%20computation.ipynb + # for a detailed math explanation + # grad_wrt_theta can be anything since diff is zero + # for this corner case, hence we set to zero. + grad_wrt_theta = np.zeros_like(dist) + nzd = dist != 0.0 + grad_wrt_theta[nzd] = np.pi / (periodicity * dist[nzd]) + return np.expand_dims(grad_wrt_theta * exp_sine_squared * grad_wrt_exp, axis=1) * diff + + +class ConstantKernel(Kernel, sk_ConstantKernel): + def gradient_x(self, x, X_train): + return np.zeros_like(X_train) + + +class WhiteKernel(Kernel, sk_WhiteKernel): + def gradient_x(self, x, X_train): + return np.zeros_like(X_train) + + +class Exponentiation(Kernel, sk_Exponentiation): + def gradient_x(self, x, X_train): + x = np.asarray(x) + X_train = np.asarray(X_train) + expo = self.exponent + kernel = self.kernel + + K = np.expand_dims(kernel(np.expand_dims(x, axis=0), X_train)[0], axis=1) + return expo * K ** (expo - 1) * kernel.gradient_x(x, X_train) + + +class Sum(Kernel, sk_Sum): + def gradient_x(self, x, X_train): + return self.k1.gradient_x(x, X_train) + self.k2.gradient_x(x, X_train) + + +class Product(Kernel, sk_Product): + def gradient_x(self, x, X_train): + x = np.asarray(x) + x = np.expand_dims(x, axis=0) + X_train = np.asarray(X_train) + f_ggrad = np.expand_dims(self.k1(x, X_train)[0], axis=1) * self.k2.gradient_x(x, X_train) + fgrad_g = np.expand_dims(self.k2(x, X_train)[0], axis=1) * self.k1.gradient_x(x, X_train) + return f_ggrad + fgrad_g + + +class DotProduct(Kernel, sk_DotProduct): + def gradient_x(self, x, X_train): + return np.asarray(X_train) + + +class HammingKernel(sk_StationaryKernelMixin, sk_NormalizedKernelMixin, Kernel): + r""" + The HammingKernel is used to handle categorical inputs. + + ``K(x_1, x_2) = exp(\sum_{j=1}^{d} -ls_j * (I(x_1j != x_2j)))`` + + Parameters + ----------- + * `length_scale` [float, array-like, shape=[n_features,], 1.0 (default)] + The length scale of the kernel. If a float, an isotropic kernel is + used. If an array, an anisotropic kernel is used where each dimension + of l defines the length-scale of the respective feature dimension. + + * `length_scale_bounds` [array-like, [1e-5, 1e5] (default)] + The lower and upper bound on length_scale + """ + + def __init__(self, length_scale=1.0, length_scale_bounds=(1e-5, 1e5)): + self.length_scale = length_scale + self.length_scale_bounds = length_scale_bounds + + @property + def hyperparameter_length_scale(self): + length_scale = self.length_scale + anisotropic = np.iterable(length_scale) and len(length_scale) > 1 + if anisotropic: + return Hyperparameter("length_scale", "numeric", self.length_scale_bounds, len(length_scale)) + return Hyperparameter("length_scale", "numeric", self.length_scale_bounds) + + def __call__(self, X, Y=None, eval_gradient=False): + """Return the kernel k(X, Y) and optionally its gradient. + + Parameters + ---------- + * `X` [array-like, shape=(n_samples_X, n_features)] + Left argument of the returned kernel k(X, Y) + + * `Y` [array-like, shape=(n_samples_Y, n_features) or None(default)] + Right argument of the returned kernel k(X, Y). If None, k(X, X) + if evaluated instead. + + * `eval_gradient` [bool, False(default)] + Determines whether the gradient with respect to the kernel + hyperparameter is determined. Only supported when Y is None. + + Returns + ------- + * `K` [array-like, shape=(n_samples_X, n_samples_Y)] + Kernel k(X, Y) + + * `K_gradient` [array-like, shape=(n_samples_X, n_samples_X, n_dims)] + The gradient of the kernel k(X, X) with respect to the + hyperparameter of the kernel. Only returned when eval_gradient + is True. + """ + length_scale = self.length_scale + anisotropic = np.iterable(length_scale) and len(length_scale) > 1 + + if np.iterable(length_scale): + if len(length_scale) > 1: + length_scale = np.asarray(length_scale, dtype=float) + else: + length_scale = float(length_scale[0]) + else: + length_scale = float(length_scale) + + X = np.atleast_2d(X) + if anisotropic and X.shape[1] != len(length_scale): + raise ValueError("Expected X to have %d features, got %d" % (len(length_scale), X.shape[1])) + + n_samples, n_dim = X.shape + + Y_is_None = Y is None + if Y_is_None: + Y = X + elif eval_gradient: + raise ValueError("gradient can be evaluated only when Y != X") + else: + Y = np.atleast_2d(Y) + + indicator = np.expand_dims(X, axis=1) != Y + kernel_prod = np.exp(-np.sum(length_scale * indicator, axis=2)) + + # dK / d theta = (dK / dl) * (dl / d theta) + # theta = log(l) => dl / d (theta) = e^theta = l + # dK / d theta = l * dK / dl + + # dK / dL computation + if anisotropic: + grad = -np.expand_dims(kernel_prod, axis=-1) * np.array(indicator, dtype=np.float32) + else: + grad = -np.expand_dims(kernel_prod * np.sum(indicator, axis=2), axis=-1) + + grad *= length_scale + if eval_gradient: + return kernel_prod, grad + return kernel_prod diff --git a/example/example_BO.py b/example/example_BO.py index d68e1b5..1dd419a 100644 --- a/example/example_BO.py +++ b/example/example_BO.py @@ -1,52 +1,47 @@ - import sys -import numpy as np -from sklearn.gaussian_process import GaussianProcessRegressor - -from bayes_optim import RandomForest, BO, GaussianProcess - sys.path.insert(0, "./") -from bayes_optim.extension import PCABO, RealSpace, KernelPCABO -from bayes_optim.mylogging import eprintf -import benchmark.bbobbenchmarks as bn import random +import benchmark.bbobbenchmarks as bn +import numpy as np +from bayes_optim import BO +from bayes_optim.extension import RealSpace +from bayes_optim.mylogging import eprintf -SEED = int(sys.argv[1]) -random.seed(SEED) -np.random.seed(SEED) -dim = 2 +# from sklearn.gaussian_process import GaussianProcessRegressor + + +# SEED = int(sys.argv[1]) +# random.seed(SEED) +# np.random.seed(SEED) +dim = 20 lb, ub = -5, 5 -OBJECTIVE_FUNCTION = bn.F17() +OBJECTIVE_FUNCTION = bn.F21() + def fitness(x): - # x = np.asarray(x) - # return np.sum((np.arange(1, dim + 1) * x) ** 2) - # eprintf("Evaluated solution:", x, "type", type(x)) if type(x) is np.ndarray: x = x.tolist() - return OBJECTIVE_FUNCTION(np.array(x)) - - -space = RealSpace([lb, ub], random_seed=SEED) * dim -eprintf("new call to PCABO") -model = GaussianProcess( - domain=space, - n_obj=1, - n_restarts_optimizer=dim, -) -opt = BO( - search_space=space, - obj_fun=fitness, - DoE_size=5, - max_FEs=40, - verbose=True, - n_point=1, - model=model, - acquisition_optimization={"optimizer": "OnePlusOne_Cholesky_CMA"}, -) - -print(opt.run()) + return OBJECTIVE_FUNCTION(np.array(x)) - OBJECTIVE_FUNCTION.fopt + + +res = [] +for i in range(10): + space = RealSpace([lb, ub], random_seed=i) * dim + eprintf("new call to PCABO") + opt = BO( + search_space=space, + obj_fun=fitness, + DoE_size=3 * dim, + n_point=1, + random_seed=i, + data_file=f"test{i}.csv", + acquisition_optimization={"optimizer": "BFGS"}, + max_FEs=150, + verbose=True, + ) + opt.run() + res += [opt.xopt.fitness] diff --git a/example/example_KernelPCABO.py b/example/example_KernelPCABO.py index a9d6373..ded9d49 100644 --- a/example/example_KernelPCABO.py +++ b/example/example_KernelPCABO.py @@ -1,3 +1,7 @@ +import random +import benchmark.bbobbenchmarks as bn +from bayes_optim.mylogging import eprintf +from bayes_optim.extension import PCABO, RealSpace, KernelPCABO, KernelFitStrategy import sys import numpy as np @@ -7,12 +11,6 @@ sys.path.insert(0, "./") -from bayes_optim.extension import PCABO, RealSpace, KernelPCABO, KernelFitStrategy -from bayes_optim.mylogging import eprintf - -import benchmark.bbobbenchmarks as bn -import random - SEED = int(sys.argv[1]) random.seed(SEED) @@ -22,26 +20,29 @@ OBJECTIVE_FUNCTION = bn.F21() -def fitness(x): -# x = np.asarray(x) -# return np.sum((np.arange(1, dim + 1) * x) ** 2) +def func(x): + # x = np.asarray(x) + # return np.sum((np.arange(1, dim + 1) * x) ** 2) return OBJECTIVE_FUNCTION(x) space = RealSpace([lb, ub], random_seed=SEED) * dim +doe_size = 5 +total_budget = 40 +dim = 2 eprintf("new call to Kernel PCABO") opt = KernelPCABO( search_space=space, - obj_fun=fitness, - DoE_size=5, - max_FEs=40, + obj_fun=func, + DoE_size=doe_size, + max_FEs=total_budget, verbose=True, n_point=1, - acquisition_optimization={"optimizer": "OnePlusOne_Cholesky_CMA"}, - max_information_loss = 0.01, - kernel_fit_strategy = KernelFitStrategy.AUTO + acquisition_optimization={"optimizer": "BFGS"}, + max_information_loss=0.5, + kernel_fit_strategy=KernelFitStrategy.AUTO, + NN=dim, + random_seed=SEED ) - print(opt.run()) - diff --git a/example/example_gpr.py b/example/example_gpr.py new file mode 100644 index 0000000..a743ce1 --- /dev/null +++ b/example/example_gpr.py @@ -0,0 +1,41 @@ +import sys + +sys.path.insert(0, "./") + +import benchmark.bbobbenchmarks as bn +import numpy as np +from bayes_optim.surrogate.gaussian_process import ConstantKernel, GaussianProcess, Matern + +dim = 20 +f = bn.F21() + +np.random.seed(42) + +# X = np.random.rand(200, 20) +# y = np.array([f(x) - f.fopt for x in X]) +# X = X * 10 - 5 + +npzfile = np.load("data.npz") +X, y = npzfile["X"], npzfile["y"] + +cov_amplitude = ConstantKernel(1.0, (0.01, 1000.0)) +# only special if *all* dimensions are categorical +other_kernel = Matern(length_scale=np.ones(dim), length_scale_bounds=[(0.01, 100)] * dim, nu=2.5) + +base_estimator = GaussianProcess( + kernel=cov_amplitude * other_kernel, + normalize_y=True, + noise="gaussian", + n_restarts_optimizer=2, + random_state=np.random.randint(1000), + lb=np.array([0] * 20), + ub=np.array([1] * 20), +) +base_estimator.fit(X, y) +mu, std, mu_grad, std_grad = base_estimator.predict( + X[0:1], return_std=True, return_mean_grad=True, return_std_grad=True +) +print(mu) +print(std) +print(mu_grad) +print(std_grad) diff --git a/experiments/doe_info_extract.py b/experiments/doe_info_extract.py new file mode 100644 index 0000000..78e8ac6 --- /dev/null +++ b/experiments/doe_info_extract.py @@ -0,0 +1,89 @@ +#!/usr/bin/python3 + + +import argparse +import sys +import shutil +import os +import csv +import math +import json +import functools +import ast +from zipfile import ZipFile +import io + + +class Description: + def __init__(self, fid: int, iid: int, dim: int, seed: int): + self.fid = int(fid) + self.iid = int(iid) + self.dim = int(dim) + self.seed = int(seed) + + def __eq__(self, other): + if (isinstance(other, Description)): + return self.fid == other.fid and self.iid == other.iid and self.dim == other.dim and self.seed == other.seed + return False + + def __hash__(self): + return hash((self.fid, self.iid, self.dim, self.seed)) + + def __str__(self): + return f'F{self.fid}_I{self.iid}_D{self.dim}_Seed{self.seed}' + + +def process_stdout(stdout_file): + with open(stdout_file, 'r') as f: + lines = f.readlines() + beg = lines[0].find('{') + end = lines[0].rfind('}') + m = json.loads(lines[0][beg:end+1].replace('\'', '\"')) + result = Description(fid=m['fid'], iid=m['iid'], + dim=m['dim'], seed=m['seed']) + doe_size, beg, cnt = 0, 0, 0 + for line in lines: + if 'doe_size = ' in line: + doe_size = int(line.split('=')[1]) + if 'DoE_value DoE_point' in line: + beg = cnt + 1 + break + cnt += 1 + doe = [] + for line in lines[beg:beg+doe_size]: + value = float(line[:line.find('[')]) + x = ast.literal_eval(line[line.find('['):]) + x = [float(xi) for xi in x] + doe.append((x, value)) + return result, doe + + +def get_best(doe): + best_value, arg_best = float('inf'), [] + for x, v in doe: + if v < best_value: + best_value = v + arg_best = x + return (arg_best, best_value) + + +def add_logs_folder(folder_name): + m = {} + for f in os.listdir(folder_name): + if f.endswith('.out'): + description, doe = process_stdout(os.path.join(folder_name, f)) + m[description] = get_best(doe) + return m + + +def main(): + parser = argparse.ArgumentParser('Extract DoE info') + parser.add_argument('--log_paths', nargs='+', required=True, + help='Path to files with log from run in cluster') + args = parser.parse_args() + for log_path in args.log_paths: + add_logs_folder(log_path) + + +if __name__ == '__main__': + main() diff --git a/experiments/experiment_helpers.py b/experiments/experiment_helpers.py new file mode 100644 index 0000000..c7ba6a5 --- /dev/null +++ b/experiments/experiment_helpers.py @@ -0,0 +1,216 @@ +from functools import partial +import random +from maria_laura.wrapper import marialaura as create_marialaura_alg +from my_logger import MyIOHFormatOnEveryEvaluationLogger, MyObjectiveFunctionWrapper +from bayes_optim.surrogate import GaussianProcess, trend +from bayes_optim.extension import RealSpace, KernelPCABO1, KernelPCABO, KernelFitStrategy, PCABO, BO +import sys +import os +import json +from ioh import Experiment, get_problem, logger, problem, OptimizationType +from bayes_optim.acquisition import OnePlusOne_Cholesky_CMA +import numpy as np +import copy +import time +from datetime import timedelta +from pycma.cma.evolution_strategy import fmin2 as pycmaes + + +lb, ub = -5, 5 +cnt = -1 +row_function = None + + +class Py_CMA_ES_Wrapper: + def __init__(self, func, dim, total_budget, seed, doe_arg_best, doe_best): + self.func = func + self.dim = dim + self.total_budget = total_budget + self.seed = seed + self.doe_arg_best = doe_arg_best + self.doe_best = doe_best + + def run(self): + space = RealSpace([lb, ub], random_seed=self.seed) * self.dim + assert abs(row_function(self.doe_arg_best) - self.doe_best) < 0.00001 + print(f' CMA starting point is {self.doe_arg_best}') + pycmaes(self.func, self.doe_arg_best, (ub - lb) / 4, eval_initial_x=True, options={'bounds': [ + [lb]*self.dim, [ub]*self.dim], 'maxfevals': self.total_budget, 'seed': self.seed}) + + +def create_algorithm(optimizer_name, func, dim, total_budget, doe_size, seed, **kwargs): + space = RealSpace([lb, ub], random_seed=seed) * dim + global cnt + cnt += 1 + if optimizer_name == 'KernelPCABOCheat': + return KernelPCABO1( + search_space=space, + obj_fun=func, + DoE_size=doe_size, + max_FEs=total_budget, + verbose=False, + n_point=1, + acquisition_optimization={"optimizer": "BFGS"}, + max_information_loss=0.1, + kernel_fit_strategy=KernelFitStrategy.AUTO, + NN=dim + ) + elif optimizer_name == 'KernelPCABOInverse': + return KernelPCABO( + search_space=space, + obj_fun=func, + DoE_size=doe_size, + max_FEs=total_budget, + verbose=False, + n_point=1, + acquisition_optimization={"optimizer": "BFGS"}, + max_information_loss=0.1, + kernel_fit_strategy=KernelFitStrategy.AUTO, + NN=dim, + random_seed=seed + ) + elif optimizer_name == 'LinearPCABO': + return PCABO( + search_space=space, + obj_fun=func, + DoE_size=doe_size, + max_FEs=total_budget, + verbose=False, + n_point=1, + n_components=0.90, + acquisition_optimization={"optimizer": "BFGS"}, + random_seed=seed + ) + elif optimizer_name == 'BO': + return BO( + search_space=space, + obj_fun=func, + DoE_size=doe_size, + n_point=1, + random_seed=seed, + data_file=f"test-{cnt}.csv", + acquisition_optimization={"optimizer": "BFGS"}, + max_FEs=total_budget, + verbose=False, + ) + elif optimizer_name == 'CMA_ES': + return OnePlusOne_Cholesky_CMA( + search_space=space, + obj_fun=func, + lb=lb, + ub=ub, + sigma0=40, + max_FEs=total_budget, + verbose=False, + random_seed=seed + ) + elif optimizer_name == 'SAASBO': + return create_saasbo( + optimizer_name='saasbo', + func=func, + ml_dim=dim, + ml_total_budget=total_budget, + ml_DoE_size=doe_size, + random_seed=seed + ) + elif optimizer_name == 'SKlearnBO': + return create_marialaura_alg( + optimizer_name='BO_sklearn', + func=func, + ml_dim=dim, + ml_total_budget=total_budget, + ml_DoE_size=doe_size, + random_seed=seed + ) + elif optimizer_name == 'pyCMA': + return Py_CMA_ES_Wrapper(func, dim, total_budget, seed, kwargs['doe_arg_best'], kwargs['doe_best']) + else: + raise NotImplementedError + + +class AlgorithmWrapper: + def __init__(self, seed): + self.opt = None + self.seed = seed + + @staticmethod + def __fitness_function_wrapper(x, f): + if type(x) is np.ndarray: + x = x.tolist() + return f(x) + + @staticmethod + def create_fitness(my_function): + return partial(AlgorithmWrapper.__fitness_function_wrapper, f=my_function) + + def __call__(self, optimizer_name, f, fid, iid, dim, **kwargs): + self.dim = dim + self.optimizer_name = optimizer_name + func = partial(AlgorithmWrapper.__fitness_function_wrapper, f=f) + doe_size = self.dim + if self.dim == 10: + total_budget = 250 + elif self.dim == 20: + total_budget = 120 + elif self.dim == 40: + total_budget = 200 + elif self.dim == 60: + total_budget = 300 + else: + total_budget = 2 * doe_size + self.opt = create_algorithm( + optimizer_name, func, self.dim, total_budget, doe_size, self.seed, **kwargs) + print(f' total_budget = {total_budget}') + print(f' doe_size = {doe_size}') + sys.stdout.flush() + self.opt.run() + + @property + def lower_space_dim(self) -> int: + if self.optimizer_name == 'BO': + return self.dim + return self.opt.get_lower_space_dimensionality() + + @property + def extracted_information(self) -> float: + if self.optimizer_name == 'BO': + return 1.0 + return self.opt.get_extracted_information() + + @property + def kernel_config(self) -> str: + return self.opt._pca.get_kernel_parameters() + + @property + def out_of_the_box_solutions(self) -> int: + return self.opt.out_solutions + + @property + def acq_opt_time(self) -> float: + return self.opt.acq_opt_time + + @property + def model_fit_time(self) -> float: + return self.opt.mode_fit_time + + +def run_particular_experiment(my_optimizer_name, fid, iid, dim, rep, folder_name, **kwargs): + algorithm = AlgorithmWrapper(rep) + l = MyIOHFormatOnEveryEvaluationLogger( + folder_name=folder_name, algorithm_name=my_optimizer_name) + print(f' Logging to the folder {l.folder_name}') + sys.stdout.flush() + l.watch(algorithm, ['lower_space_dim', 'extracted_information', + 'out_of_the_box_solutions', 'kernel_config', 'acq_opt_time', 'model_fit_time']) + p = MyObjectiveFunctionWrapper(fid, iid, dim) + global row_function + row_function = p.my_function + p.attach_logger(l) + algorithm(my_optimizer_name, p, fid, iid, dim, **kwargs) + l.finish_logging() + + +def validate_optimizers(optimizers): + for optimizer in optimizers: + create_algorithm(optimizer, lambda x: 1, 10, 10, 10, + 0, doe_arg_best=[0]*10, doe_best=0) diff --git a/experiments/my_logger.py b/experiments/my_logger.py index 651a4a2..547ee98 100644 --- a/experiments/my_logger.py +++ b/experiments/my_logger.py @@ -11,6 +11,7 @@ def __init__(self, folder_name='TMP', algorithm_name='UNKNOWN', suite='unkown su self.algorithm_info = algorithm_info self.suite = suite self.create_time = time.time() + self.extra_info_getters = [] @staticmethod def __generate_dir_name(name, x=0): @@ -27,20 +28,28 @@ def watch(self, algorithm, extra_data): self.algorithm = algorithm self.extra_info_getters = extra_data + def __log_to_meta_file(self, time_taken=None): + if time_taken is None: + time_taken = time.time() - self.create_time + with open(self.log_info_path, 'w') as f: + f.write(f'suite = \"{self.suite}\", funcId = {self.fid}, funcName = \"{self.func_name}\", DIM = {self.dim}, maximization = \"F\", algId = \"{self.algorithm_name}\", algInfo = \"{self.algorithm_info}\"\n') + f.write('%\n') + f.write(f'{self.log_file_path}, {self.first_line}:{self.last_line}|{time_taken}\n') + def _set_up_logger(self, fid, iid, dim, func_name): self.log_info_path = f'{self.folder_name}/IOHprofiler_f{fid}_{func_name}.info' - with open(self.log_info_path, 'a') as f: - f.write(f'suite = \"{self.suite}\", funcId = {fid}, funcName = \"{func_name}\", DIM = {dim}, maximization = \"F\", algId = \"{self.algorithm_name}\", algInfo = \"{self.algorithm_info}\"\n') + self.fid, self.func_name, self.dim = fid, func_name, dim self.log_file_path = f'data_f{fid}_{func_name}/IOHprofiler_f{fid}_DIM{dim}.dat' self.log_file_full_path = f'{self.folder_name}/{self.log_file_path}' os.makedirs(os.path.dirname(self.log_file_full_path), exist_ok=True) self.first_line = 0 self.last_line = 0 - with open(self.log_file_full_path, 'a') as f: + with open(self.log_file_full_path, 'w') as f: f.write('\"function evaluation\" \"current f(x)\" \"best-so-far f(x)\" \"current af(x)+b\" \"best af(x)+b\"') for extra_info in self.extra_info_getters: f.write(f' {extra_info}') f.write('\n') + self.__log_to_meta_file() def log(self, cur_evaluation, cur_fitness, best_so_far): with open(self.log_file_full_path, 'a') as f: @@ -53,13 +62,10 @@ def log(self, cur_evaluation, cur_fitness, best_so_far): f.write(f' {extra_info}') f.write('\n') self.last_line += 1 + self.__log_to_meta_file() - def finish_logging(self): - time_taken = time.time() - self.create_time - with open(self.log_info_path, 'a') as f: - f.write('%\n') - f.write(f'{self.log_file_path}, {self.first_line}:{self.last_line}|{time_taken}\n') - + def finish_logging(self, time_taken=None): + self.__log_to_meta_file(time_taken) class MyObjectiveFunctionWrapper: def __init__(self, fid, iid, dim, directed_by='Hao'): diff --git a/experiments/rec_walk.py b/experiments/rec_walk.py index 0aa858a..0234ab4 100755 --- a/experiments/rec_walk.py +++ b/experiments/rec_walk.py @@ -1,6 +1,7 @@ #!/usr/bin/python3 import sys +import shutil import os @@ -24,7 +25,7 @@ def cat(from_file, to_file): outfile.write(line) -def process_file(dat_file): +def process_dat_file(dat_file): tmp_file_path = os.path.join(os.path.dirname(dat_file), 'tmp') remove_kernel_info(dat_file, tmp_file_path) cat(tmp_file_path, dat_file) @@ -48,10 +49,12 @@ def main(argv): for results_folder in resuls_folders: for root, d_names, f_names in os.walk(results_folder): for file in f_names: + file_path = os.path.join(root, file) if file.startswith('IOH') and file.endswith('.info'): - process_info_file(os.path.join(root, file)) + process_info_file(file_path) + if file.startswith('IOH') and file.endswith('.dat'): + process_dat_file(file_path) - import shutil for d in incomplete_results: print(d) diff --git a/experiments/run_local_experiment.py b/experiments/run_local_experiment.py index 8cb7778..713fdae 100644 --- a/experiments/run_local_experiment.py +++ b/experiments/run_local_experiment.py @@ -14,160 +14,22 @@ import random from functools import partial - - -MY_EXPEREMENT_FOLDER = "TMP" -fids = [21] -iids = [0] -dims = [10] -reps = 5 -problem_type = 'BBOB' -optimizers = sys.argv[1:] -seed = 0 -random.seed(seed) -np.random.seed(seed) -lb, ub = -5, 5 - - -def create_algorithm(optimizer_name, func, dim, total_budget, doe_size): - space = RealSpace([lb, ub], random_seed=seed) * dim - if optimizer_name == 'KernelPCABOCheat': - return KernelPCABO1( - search_space=space, - obj_fun=func, - DoE_size=doe_size, - max_FEs=total_budget, - verbose=False, - n_point=1, - acquisition_optimization={"optimizer": "OnePlusOne_Cholesky_CMA"}, - max_information_loss=0.1, - kernel_fit_strategy=KernelFitStrategy.FIXED_KERNEL, - kernel_config={'kernel_name': 'rbf', 'kernel_parameters': {'gamma': 0.05}}, - NN=dim - ) - elif optimizer_name == 'KernelPCABOInverse': - return KernelPCABO( - search_space=space, - obj_fun=func, - DoE_size=doe_size, - max_FEs=total_budget, - verbose=False, - n_point=1, - acquisition_optimization={"optimizer": "OnePlusOne_Cholesky_CMA"}, - max_information_loss = 0.1, - kernel_fit_strategy = KernelFitStrategy.AUTO, - NN=dim - ) - elif optimizer_name == 'LinearPCABO': - return PCABO( - search_space=space, - obj_fun=func, - DoE_size=doe_size, - max_FEs=total_budget, - verbose=False, - n_point=1, - n_components=0.9, - acquisition_optimization={"optimizer": "OnePlusOne_Cholesky_CMA"}, - ) - elif optimizer_name == 'BO': - bounds = np.asarray([(lb, ub)]*dim) - return BO( - search_space=space, - obj_fun=func, - DoE_size=doe_size, - max_FEs=total_budget, - verbose=False, - n_point=1, - random_seed=seed, - model=GaussianProcess( - mean=trend.constant_trend(dim), - corr="matern", - thetaL=1e-3 * (bounds[:, 1] - bounds[:, 0]), - thetaU=1e3 * (bounds[:, 1] - bounds[:, 0]), - nugget=1e-6, - noise_estim=False, - optimizer="BFGS", - wait_iter=3, - random_start=max(10, dim), - likelihood="concentrated", - eval_budget=100 * dim, - ), - acquisition_optimization={"optimizer": "OnePlusOne_Cholesky_CMA"}, - ) - elif optimizer_name == 'CMA_ES': - return OnePlusOne_Cholesky_CMA( - search_space=space, - obj_fun=func, - lb=lb, - ub=ub, - sigma0=40, - max_FEs=total_budget, - verbose=False, - random_seed=seed - ) - else: - raise NotImplementedError - - -def validate_optimizers(optimizers): - for optimizer in optimizers: - create_algorithm(optimizer, lambda x: 1, 10, 10, 10) - - -class AlgorithmWrapper: - def __init__(self): - self.opt = None - - @staticmethod - def __fitness_function_wrapper(x, f): - if type(x) is np.ndarray: - x = x.tolist() - return f(x) - - @staticmethod - def create_fitness(my_function): - return partial(AlgorithmWrapper.__fitness_function_wrapper, f=my_function) - - def __call__(self, optimizer_name, f, fid, iid, dim): - self.dim = dim - self.optimizer_name = optimizer_name - func = partial(AlgorithmWrapper.__fitness_function_wrapper, f=f) - total_budget = 50 + 10 * self.dim - doe_size = 3 * self.dim - self.opt = create_algorithm(optimizer_name, func, self.dim, total_budget, doe_size) - self.opt.run() - - @property - def lower_space_dim(self) -> int: - if self.optimizer_name == 'BO': - return self.dim - return self.opt.get_lower_space_dimensionality() - - @property - def extracted_information(self) -> float: - if self.optimizer_name == 'BO': - return 1.0 - return self.opt.get_extracted_information() - - @property - def kernel_config(self) -> float: - return self.opt._pca.get_kernel_parameters() - - -def run_particular_experiment(my_optimizer_name, fid, iid, dim, rep): - global seed - seed = rep - algorithm = AlgorithmWrapper() - l = MyIOHFormatOnEveryEvaluationLogger(folder_name=MY_EXPEREMENT_FOLDER, algorithm_name=my_optimizer_name) - print(f' Logging to the folder {l.folder_name}') - l.watch(algorithm, ['lower_space_dim', 'extracted_information', 'kernel_config']) - p = MyObjectiveFunctionWrapper(fid, iid, dim) - p.attach_logger(l) - algorithm(my_optimizer_name, p, fid, iid, dim) - l.finish_logging() +import json +from experiment_helpers import run_particular_experiment, validate_optimizers def run_experiment(): + if len(sys.argv) == 1: + print('No configs given') + return + with open(sys.argv[1], 'r') as f: + config = json.load(f) + result_folder_prefix = config['folder'] + fids = config['fids'] + iids = config['iids'] + dims = config['dims'] + reps = config['reps'] + optimizers = config['optimizers'] validate_optimizers(optimizers) runs_number = len(optimizers) * len(fids) * len(iids) * len(dims) * reps cur_run_number = 1 @@ -178,7 +40,7 @@ def run_experiment(): for rep in range(reps): print(f'Run {cur_run_number} out of {runs_number}, Algorithm {my_optimizer_name}, Problem {fid}, Instance {iid}, Dimension {dim}, Repetition {rep+1} ...') start = time.time() - run_particular_experiment(my_optimizer_name, fid, iid, dim, rep) + run_particular_experiment(my_optimizer_name, fid, iid, dim, rep, result_folder_prefix) end = time.time() print(f' Done in {end - start} secs') cur_run_number += 1 diff --git a/experiments/single_experiment.py b/experiments/single_experiment.py index adb612c..13fe41f 100644 --- a/experiments/single_experiment.py +++ b/experiments/single_experiment.py @@ -1,6 +1,6 @@ from functools import partial import random -from maria_laura.wrapper import marialaura as create_saasbo +from maria_laura.wrapper import marialaura as create_marialaura_alg from my_logger import MyIOHFormatOnEveryEvaluationLogger, MyObjectiveFunctionWrapper from bayes_optim.surrogate import GaussianProcess, trend from bayes_optim.extension import RealSpace, KernelPCABO1, KernelPCABO, KernelFitStrategy, PCABO, BO @@ -13,163 +13,7 @@ import copy import time from datetime import timedelta - -sys.path.insert(0, "./") - - -MY_EXPEREMENT_FOLDER = "TMP" -seed = 0 -lb, ub = -5, 5 - - -def create_algorithm(optimizer_name, func, dim, total_budget, doe_size): - space = RealSpace([lb, ub], random_seed=seed) * dim - if optimizer_name == 'KernelPCABOCheat': - return KernelPCABO1( - search_space=space, - obj_fun=func, - DoE_size=doe_size, - max_FEs=total_budget, - verbose=False, - n_point=1, - acquisition_optimization={"optimizer": "OnePlusOne_Cholesky_CMA"}, - max_information_loss=0.1, - kernel_fit_strategy=KernelFitStrategy.AUTO, - NN=dim - ) - elif optimizer_name == 'KernelPCABOInverse': - return KernelPCABO( - search_space=space, - obj_fun=func, - DoE_size=doe_size, - max_FEs=total_budget, - verbose=False, - n_point=1, - acquisition_optimization={"optimizer": "OnePlusOne_Cholesky_CMA"}, - max_information_loss=0.1, - kernel_fit_strategy=KernelFitStrategy.AUTO, - NN=dim - ) - elif optimizer_name == 'LinearPCABO': - return PCABO( - search_space=space, - obj_fun=func, - DoE_size=doe_size, - max_FEs=total_budget, - verbose=False, - n_point=1, - n_components=0.95, - acquisition_optimization={"optimizer": "OnePlusOne_Cholesky_CMA"}, - ) - elif optimizer_name == 'BO': - bounds = np.asarray([(lb, ub)]*dim) - return BO( - search_space=space, - obj_fun=func, - DoE_size=doe_size, - max_FEs=total_budget, - verbose=False, - n_point=1, - random_seed=seed, - model=GaussianProcess( - mean=trend.constant_trend(dim), - corr="matern", - thetaL=1e-3 * (bounds[:, 1] - bounds[:, 0]), - thetaU=1e3 * (bounds[:, 1] - bounds[:, 0]), - nugget=1e-6, - noise_estim=False, - optimizer="BFGS", - wait_iter=3, - random_start=max(10, dim), - likelihood="concentrated", - eval_budget=100 * dim, - ), - acquisition_optimization={"optimizer": "OnePlusOne_Cholesky_CMA"}, - ) - elif optimizer_name == 'CMA_ES': - return OnePlusOne_Cholesky_CMA( - search_space=space, - obj_fun=func, - lb=lb, - ub=ub, - sigma0=40, - max_FEs=total_budget, - verbose=False, - random_seed=seed - ) - elif optimizer_name == 'SAASBO': - return create_saasbo( - optimizer_name='saasbo', - func=func, - ml_dim=dim, - ml_total_budget=total_budget, - ml_DoE_size=doe_size, - random_seed=seed - ) - else: - raise NotImplementedError - - -def validate_optimizers(optimizers): - for optimizer in optimizers: - create_algorithm(optimizer, lambda x: 1, 10, 10, 10) - - -class AlgorithmWrapper: - def __init__(self): - self.opt = None - - @staticmethod - def __fitness_function_wrapper(x, f): - if type(x) is np.ndarray: - x = x.tolist() - return f(x) - - @staticmethod - def create_fitness(my_function): - return partial(AlgorithmWrapper.__fitness_function_wrapper, f=my_function) - - def __call__(self, optimizer_name, f, fid, iid, dim): - self.dim = dim - self.optimizer_name = optimizer_name - func = partial(AlgorithmWrapper.__fitness_function_wrapper, f=f) - total_budget = 50 + 10 * self.dim - doe_size = 3 * self.dim - self.opt = create_algorithm( - optimizer_name, func, self.dim, total_budget, doe_size) - self.opt.run() - - @property - def lower_space_dim(self) -> int: - if self.optimizer_name == 'BO': - return self.dim - return self.opt.get_lower_space_dimensionality() - - @property - def extracted_information(self) -> float: - if self.optimizer_name == 'BO': - return 1.0 - return self.opt.get_extracted_information() - - @property - def kernel_config(self) -> float: - return self.opt._pca.get_kernel_parameters() - - -def run_particular_experiment(my_optimizer_name, fid, iid, dim, rep): - global seed - seed = rep - algorithm = AlgorithmWrapper() - l = MyIOHFormatOnEveryEvaluationLogger( - folder_name=MY_EXPEREMENT_FOLDER, algorithm_name=my_optimizer_name) - print(f' Logging to the folder {l.folder_name}') - sys.stdout.flush() - l.watch(algorithm, ['lower_space_dim', - 'extracted_information', 'kernel_config']) - p = MyObjectiveFunctionWrapper(fid, iid, dim) - p.attach_logger(l) - algorithm(my_optimizer_name, p, fid, iid, dim) - l.finish_logging() +from experiment_helpers import run_particular_experiment def run_experiment(): @@ -179,13 +23,13 @@ def run_experiment(): with open(sys.argv[1]) as f: m = json.load(f) print(f'Running with config {m} ...') - global MY_EXPEREMENT_FOLDER, lb, ub - MY_EXPEREMENT_FOLDER = m['folder'] - lb = m['lb'] - ub = m['ub'] + arg_best, best = [], 0. + if 'doe_arg_best' in m.keys(): + arg_best = m['doe_arg_best'] + best = m['doe_best'] start = time.time() run_particular_experiment( - m['opt'], m['fid'], m['iid'], m['dim'], m['seed']) + m['opt'], m['fid'], m['iid'], m['dim'], m['seed'], m['folder'], doe_arg_best=arg_best, doe_best=best) end = time.time() sec = int(round(end - start)) x = str(timedelta(seconds=sec)).split(':') diff --git a/experiments/slurm_env_set_up.py b/experiments/slurm_env_set_up.py index bf78b1e..1197356 100644 --- a/experiments/slurm_env_set_up.py +++ b/experiments/slurm_env_set_up.py @@ -1,15 +1,17 @@ import sys +import argparse import os import json import datetime -from single_experiment import validate_optimizers +from doe_info_extract import Description, add_logs_folder +from experiment_helpers import validate_optimizers class ExperimentEnvironment: - SLURM_SCRIPT_TEMPLATE = '''#!/bin/env bash + HAO_SLURM_SCRIPT_TEMPLATE = '''#!/bin/env bash #SBATCH --job-name=##folder## -#SBATCH --array=##from_number##-##to_number## +#SBATCH --array=0-##jobs_count## #SBATCH --partition=cpu-long #SBATCH --mem-per-cpu=1G #SBATCH --time=7-00:00:00 @@ -20,18 +22,48 @@ class ExperimentEnvironment: #SBATCH --output=##logs_out## #SBATCH --error=##logs_err## -python ../single_experiment.py configs/${SLURM_ARRAY_TASK_ID}.json +num=##from_number## +FILE_ID=$((${SLURM_ARRAY_TASK_ID}+$num)) +python ../single_experiment.py configs/${FILE_ID}.json ''' - def __init__(self): + ELENA_SLURM_SCRIPT_TEMPLATE = '''#!/bin/bash + +#SBATCH --job-name=##folder## +#SBATCH --array=0-##jobs_count## +#SBATCH --clusters=serial +#SBATCH --partition=serial_long +#SBATCH --mem=512MB +#SBATCH --time=7-00:00:00 +#SBATCH --mail-user=kirant9797@gmail.com +#SBATCH --mail-type=END,FAIL +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=1 +#SBATCH --output=##logs_out## +#SBATCH --error=##logs_err## + +num=##from_number## +FILE_ID=$((${SLURM_ARRAY_TASK_ID}+$num)) +python ../single_experiment.py configs/${FILE_ID}.json +''' + + def __init__(self, whose_server): now = datetime.datetime.now() suffix = now.strftime('%d-%m-%Y_%Hh%Mm%Ss') folder_name = 'run_' + suffix os.makedirs(folder_name, exist_ok=False) print(f'Experiment root is: {folder_name}') self.experiment_root = os.path.abspath(folder_name) - self.__max_array_size = 1000 + if whose_server == 'Hao': + self.__max_array_size = 1000 + else: + self.__max_array_size = 100 self.__number_of_slurm_scripts = 0 + self.whose_server = whose_server + self.logs_with_doe = None + + def set_logs_with_doe(self, logs_with_doe): + self.logs_with_doe = logs_with_doe def set_up_by_experiment_config_file(self, experiment_config_file_name): self.__generate_configs(experiment_config_file_name) @@ -46,25 +78,29 @@ def __generate_slurm_script(self): self.__number_of_slurm_scripts = 0 logs_out = os.path.join(self.logs_folder, '%A_%a.out') logs_err = os.path.join(self.logs_folder, '%A_%a.err') - script = ExperimentEnvironment.SLURM_SCRIPT_TEMPLATE\ - .replace('##folder##', self.result_folder_prefix)\ - .replace('##logs_out##', logs_out)\ - .replace('##logs_err##', logs_err) + if self.whose_server == 'Hao': + script = ExperimentEnvironment.HAO_SLURM_SCRIPT_TEMPLATE + else: + script = ExperimentEnvironment.ELENA_SLURM_SCRIPT_TEMPLATE + script = script\ + .replace('##folder##', self.result_folder_prefix)\ + .replace('##logs_out##', logs_out)\ + .replace('##logs_err##', logs_err) offset = 0 for i in range(self.generated_configs // self.__max_array_size): with open(os.path.join(self.experiment_root, f'slurm{self.__number_of_slurm_scripts}.sh'), 'w') as f: - f.write(script\ - .replace('##from_number##', str(offset))\ - .replace('##to_number##', str(offset + self.__max_array_size - 1))) + f.write(script + .replace('##from_number##', str(offset)) + .replace('##jobs_count##', str(self.__max_array_size - 1))) offset += self.__max_array_size self.__number_of_slurm_scripts += 1 r = self.generated_configs % self.__max_array_size if r > 0: with open(os.path.join(self.experiment_root, f'slurm{self.__number_of_slurm_scripts}.sh'), 'w') as f: - f.write(script\ - .replace('##from_number##', str(offset))\ - .replace('##to_number##', str(offset + r - 1))) - offset += self.__max_array_size + f.write(script + .replace('##from_number##', str(offset)) + .replace('##jobs_count##', str(r - 1))) + offset += r self.__number_of_slurm_scripts += 1 def __generate_configs(self, experiment_config_file_name): @@ -78,9 +114,14 @@ def __generate_configs(self, experiment_config_file_name): if 'extra' not in config.keys(): config['extra'] = '' optimizers = config['optimizers'] + if 'pyCMA' in optimizers: + if self.logs_with_doe is None: + raise ValueError(f'Logs with doe should be configured') + my_doe = add_logs_folder(self.logs_with_doe) lb, ub = config['lb'], config['ub'] validate_optimizers(optimizers) - runs_number = len(optimizers) * len(fids) * len(iids) * len(dims) * reps + runs_number = len(optimizers) * len(fids) * \ + len(iids) * len(dims) * reps cur_config_number = 0 configs_dir = os.path.join(self.experiment_root, 'configs') os.makedirs(configs_dir, exist_ok=False) @@ -93,15 +134,20 @@ def __generate_configs(self, experiment_config_file_name): # print(f'Ids for opt={my_optimizer_name}, fid={fid}, iid={iid}, dim={dim} are [{cur_config_number}, {cur_config_number+reps-1}]') for rep in range(reps): experiment_config = { - 'folder': f'{self.result_folder_prefix}_Opt-{my_optimizer_name}_F-{fid}_Dim-{dim}_Rep-{rep}_Id-{cur_config_number}', - 'opt': my_optimizer_name, - 'fid': fid, - 'iid': iid, - 'dim': dim, - 'seed': rep, - 'lb': lb, - 'ub': ub, - } + 'folder': f'{self.result_folder_prefix}_Opt-{my_optimizer_name}_F-{fid}_Dim-{dim}_Rep-{rep}_Id-{cur_config_number}', + 'opt': my_optimizer_name, + 'fid': fid, + 'iid': iid, + 'dim': dim, + 'seed': rep, + 'lb': lb, + 'ub': ub, + } + if my_optimizer_name == 'pyCMA': + arg_best, best = my_doe[Description( + fid=fid, iid=iid, dim=dim, seed=rep)] + experiment_config['doe_arg_best'] = arg_best + experiment_config['doe_best'] = best cur_config_file_name = f'{cur_config_number}.json' with open(os.path.join(configs_dir, cur_config_file_name), 'w') as f: json.dump(experiment_config, f) @@ -114,11 +160,21 @@ def print_helper(self): def main(argv): - env = ExperimentEnvironment() - env.set_up_by_experiment_config_file(argv[1]) + parser = argparse.ArgumentParser( + 'generate slurm environment with all the configurations') + parser.add_argument('config_file', type=str, + help='fqn of the file with global configuration of the experiment') + parser.add_argument('whose', type=str, + help='Whose cluster is it? Options: Elena, Hao') + parser.add_argument('--logs_with_doe', type=str, + help='Path to the folder with logs that contain information about does. This information is required for pyCMA', default=None) + args = parser.parse_args() + env = ExperimentEnvironment(args.whose) + if args.logs_with_doe is not None: + env.set_logs_with_doe(args.logs_with_doe) + env.set_up_by_experiment_config_file(args.config_file) env.print_helper() if __name__ == '__main__': main(sys.argv) - diff --git a/unittest/test_BO.py b/unittest/test_BO.py index 7f6d640..47bff61 100644 --- a/unittest/test_BO.py +++ b/unittest/test_BO.py @@ -94,7 +94,7 @@ def fitness(_): if var_type == "r": lb, ub = -1, 5 - space = RealSpace([lb, ub]) * dim + space = RealSpace([lb, ub], random_seed=0) * dim mean = trend.constant_trend(dim, beta=None) thetaL = 1e-10 * (ub - lb) * np.ones(dim) thetaU = 10 * (ub - lb) * np.ones(dim) diff --git a/unittest/test_kpca.py b/unittest/test_kpca.py index 4f7f365..47ae683 100644 --- a/unittest/test_kpca.py +++ b/unittest/test_kpca.py @@ -2,9 +2,11 @@ from bayes_optim.mylogging import eprintf from bayes_optim.extension import KernelTransform, KernelFitStrategy, RealSpace import pytest +import random def test_kpca(): + random.seed(0) X = [[6.888437030500963, 5.159088058806049], [-1.5885683833831, -4.821664994140733], [0.22549442737217085, -1.9013172509917133], [5.675971780695452, -3.933745478421451], [-0.4680609169528829, 1.6676407891006235]] X_weighted = [[1.079483540452395, 0.808478123348775], [-0.0, -0.0], [0.015436285850211205, -0.13015521900151383], [2.8024450976474777, -1.9422411099521695], [-0.1315704411634408, 0.4687685435317057]] Y = [35.441907931514024, 455.983619954143, 288.22967496622755, 26.86758082381718, 30.021247428418974] @@ -17,12 +19,12 @@ def test_kpca(): # eprintf("transformed point is", ys) points1 = kpca.inverse_transform(ys) for (point, point1) in zip(points, points1): - assert point == pytest.approx(point1, 0.01) + assert point == pytest.approx(point1, abs=0.1) def test_poly_kernel(): my_kernel = create_kernel('poly', {'gamma': 0.1,'d': 2,'c0': 0}) - assert my_kernel([1., 2.], [2., 3.]) == pytest.approx(0.64, 0.0001) + assert my_kernel([1., 2.], [2., 3.]) == pytest.approx(0.64, abs=0.0001) X = [[-3.5261636457420837, -2.351717984413572], [-0.7006502656326337, -4.566899291151256], [1.7729779622517245, 4.2261269157682655], [3.8998999798224556, -0.5553572234968236], [-2.729563653189096, 2.805196951058811], [4.832929712261667, 3.865893673749989], [3.9124322981507795, -4.971877090648892], [-0.1616436599443336, 0.11613330925482401], [4.832929712261667, 3.865893673749989], [4.832929712261667, 3.865893673749989], [4.832929712261667, 3.865893673749989], [4.832929712261667, 3.865893673749989], [4.832929712261667, 3.865893673749989], [4.832929712261667, 3.865893673749989], [4.832929712261667, 3.865893673749989], [4.832929712261667, 3.865893673749989], [-4.867060615194007, 4.9945691484006876], [-1.9860986969950059, 3.2112837468619855], [-2.8723134299050783, 1.7676771760067167], [-1.4657214741455222, -3.692243533520105], [2.7708894654641236, 1.8538803354174291], [0.040778264675010334, 2.446797223084789], [-2.632665245745297, 2.3637546031531738], [-0.38638978135752566, 1.9949986329384561], [-4.999617881627624, -4.999350584219162], [-4.670272654869409, 1.7318585973129172], [0.2771155651877102, 3.5011898479365797], [-4.480953216322681, 2.893320596399356], [-3.425458722518958, 2.145437886908651], [-4.843765691130719, 4.855505489292614], [-1.341228249655169, 2.8575957979501982], [-2.784909143737248, 2.486004313421647], [-3.2770532373739147, 2.5342728135789905], [-3.5416175104634493, 2.6230400477720472], [-3.9900670493682697, 2.7739614707582874], [-3.9357543738566023, 2.9198451835015398], [-3.389817127225266, 2.4732174256575163], [4.832929712261667, 3.865893673749989], [3.677950467762358, 1.0628481656179396], [-0.0250796466799299, -2.0651154234823688]] diff --git a/unittest/test_ord_container.py b/unittest/test_ord_container.py new file mode 100644 index 0000000..e92a914 --- /dev/null +++ b/unittest/test_ord_container.py @@ -0,0 +1,10 @@ +from bayes_optim.extension import KernelPCABO + +def test_simple(): + c = KernelPCABO.MyOrderedContainer() + for i in range(10): + c.add_element(i) + assert 1 == c.find_pos(0) + assert 0 == c.find_pos(-1) + assert 10 == c.find_pos(100) + assert 2 == c.find_pos(1)