Multi-fidelity BO¶
Here we demonstrate how Multi-Fidelity Bayesian Optimization can be used to reduce the computational cost of optimization by using lower fidelity surrogate models. The goal is to learn functional dependance of the objective on input variables at low fidelities (which are cheap to compute) and use that information to quickly find the best objective value at higher fidelities (which are more expensive to compute). This assumes that there is some learnable correlation between the objective values at different fidelities.
Xopt implements the MOMF (https://botorch.org/tutorials/Multi_objective_multi_fidelity_BO)
algorithm which can be used to solve both single (this notebook) and multi-objective
(see multi-objective BO section) multi-fidelity problems. Under the hood this
algorithm attempts to solve a multi-objective optimization problem, where one
objective is the function objective and the other is a simple fidelity objective,
weighted by the cost_function of evaluating the objective at a given fidelity.
from xopt.generators.bayesian import MultiFidelityGenerator
from xopt import Evaluator, Xopt
from xopt import VOCS
import os
import matplotlib.pyplot as plt
import numpy as np
import math
import pandas as pd
# Ignore all warnings
import warnings
warnings.filterwarnings("ignore")
SMOKE_TEST = os.environ.get("SMOKE_TEST")
N_MC_SAMPLES = 1 if SMOKE_TEST else 128
N_RESTARTS = 1 if SMOKE_TEST else 20
def test_function(input_dict):
x = input_dict["x"]
s = input_dict["s"]
return {"f": np.sin(x + (1.0 - s)) * np.exp((-s + 1) / 2)}
# define vocs
vocs = VOCS(
variables={
"x": [0, 2 * math.pi],
},
objectives={"f": "MINIMIZE"},
)
plot the test function in input + fidelity space¶
test_x = np.linspace(*vocs.bounds, 1000)
fidelities = [0.0, 0.5, 1.0]
fig, ax = plt.subplots()
for ele in fidelities:
f = test_function({"x": test_x, "s": ele})["f"]
ax.plot(test_x, f, label=f"s:{ele}")
ax.legend()
<matplotlib.legend.Legend at 0x7f9747f8f380>
# create xopt object
# get and modify default generator options
generator = MultiFidelityGenerator(vocs=vocs)
generator.gp_constructor.use_low_noise_prior = True
# specify a custom cost function based on the fidelity parameter
generator.cost_function = lambda s: s + 0.001
generator.numerical_optimizer.n_restarts = N_RESTARTS
generator.n_monte_carlo_samples = N_MC_SAMPLES
# pass options to the generator
evaluator = Evaluator(function=test_function)
X = Xopt(generator=generator, evaluator=evaluator)
X
Xopt
________________________________
Version: 0.1.dev1+g6d6c7f050
Data size: 0
Config as YAML:
dump_file: null
evaluator:
function: __main__.test_function
function_kwargs: {}
max_workers: 1
vectorized: false
generator:
computation_time: null
custom_objective: null
fixed_features: null
gp_constructor:
covar_modules: {}
custom_noise_prior: null
mean_modules: {}
name: standard
train_config: null
train_kwargs: null
train_method: lbfgs
train_model: true
trainable_mean_keys: []
transform_inputs: true
use_cached_hyperparameters: false
use_low_noise_prior: true
max_travel_distances: null
model: null
n_candidates: 1
n_interpolate_points: null
n_monte_carlo_samples: 128
name: multi_fidelity
numerical_optimizer:
discrete_max_batch_size: 2048
discrete_max_choices: 4096
max_iter: 1000
max_time: 5.0
mixed_max_discrete_configurations: 512
n_restarts: 20
name: LBFGS
reference_point:
f: 100.0
s: 0.0
returns_id: false
supports_batch_generation: true
supports_constraints: true
supports_discrete_variables: true
supports_multi_objective: true
turbo_controller: null
use_cuda: false
use_pf_as_initial_points: false
vocs:
constants: {}
constraints: {}
objectives:
f:
dtype: null
type: MinimizeObjective
s:
dtype: null
type: MaximizeObjective
observables: {}
variables:
s:
default_value: null
domain:
- 0.0
- 1.0
dtype: null
type: ContinuousVariable
x:
default_value: null
domain:
- 0.0
- 6.283185307179586
dtype: null
type: ContinuousVariable
serialize_inline: false
serialize_torch: false
stopping_condition: null
strict: true
# evaluate initial points at mixed fidelities to seed optimization
X.evaluate_data(
pd.DataFrame({"x": [math.pi / 4, math.pi / 2.0, math.pi], "s": [0.0, 0.25, 0.0]})
)
| x | s | f | xopt_runtime | xopt_error | |
|---|---|---|---|---|---|
| 0 | 0.785398 | 0.00 | 1.610902 | 0.000010 | False |
| 1 | 1.570796 | 0.25 | 1.064601 | 0.000004 | False |
| 2 | 3.141593 | 0.00 | -1.387351 | 0.000002 | False |
# get the total cost of previous observations based on the cost function
X.generator.calculate_total_cost()
tensor(0.2530)
# run optimization until the cost budget is exhausted
# we subtract one unit to make sure we don't go over our eval budget
budget = 10
while X.generator.calculate_total_cost() < budget - 1:
X.step()
print(
f"n_samples: {len(X.data)} "
f"budget used: {X.generator.calculate_total_cost():.4} "
f"hypervolume: {X.generator.get_pareto_front_and_hypervolume()[-1]:.4}"
)
n_samples: 4 budget used: 0.5792 hypervolume: 32.95
n_samples: 5 budget used: 1.032 hypervolume: 45.75
n_samples: 6 budget used: 1.697 hypervolume: 66.86
n_samples: 7 budget used: 2.686 hypervolume: 99.34
n_samples: 8 budget used: 3.687 hypervolume: 100.6
n_samples: 9 budget used: 4.688 hypervolume: 101.1
n_samples: 10 budget used: 5.076 hypervolume: 101.1
n_samples: 11 budget used: 5.223 hypervolume: 101.1
n_samples: 12 budget used: 5.853 hypervolume: 101.2
n_samples: 13 budget used: 6.652 hypervolume: 101.2
n_samples: 14 budget used: 6.919 hypervolume: 101.2
n_samples: 15 budget used: 7.824 hypervolume: 101.2
n_samples: 16 budget used: 8.275 hypervolume: 101.2
n_samples: 17 budget used: 8.626 hypervolume: 101.2
n_samples: 18 budget used: 9.158 hypervolume: 101.3
X.data
| x | s | f | xopt_runtime | xopt_error | |
|---|---|---|---|---|---|
| 0 | 0.785398 | 0.000000 | 1.610902e+00 | 0.000010 | False |
| 1 | 1.570796 | 0.250000 | 1.064601e+00 | 0.000004 | False |
| 2 | 3.141593 | 0.000000 | -1.387351e+00 | 0.000002 | False |
| 3 | 3.731996 | 0.325158 | -1.336422e+00 | 0.000010 | False |
| 4 | 3.463365 | 0.451840 | -1.005280e+00 | 0.000011 | False |
| 5 | 0.000000 | 0.663812 | 3.902770e-01 | 0.000011 | False |
| 6 | 0.000000 | 0.987857 | 1.221650e-02 | 0.000010 | False |
| 7 | 6.283185 | 1.000000 | -2.449294e-16 | 0.000010 | False |
| 8 | 4.428499 | 1.000000 | -9.599732e-01 | 0.000011 | False |
| 9 | 4.527358 | 0.387453 | -1.236100e+00 | 0.000011 | False |
| 10 | 3.932443 | 0.146186 | -1.528331e+00 | 0.000012 | False |
| 11 | 4.334340 | 0.629174 | -1.203684e+00 | 0.000010 | False |
| 12 | 4.425679 | 0.797966 | -1.102332e+00 | 0.000011 | False |
| 13 | 3.980567 | 0.265980 | -1.443409e+00 | 0.000013 | False |
| 14 | 4.514431 | 0.903244 | -1.044197e+00 | 0.000012 | False |
| 15 | 4.166937 | 0.450695 | -1.316064e+00 | 0.000011 | False |
| 16 | 4.064967 | 0.349388 | -1.384447e+00 | 0.000012 | False |
| 17 | 4.244276 | 0.530787 | -1.264410e+00 | 0.000011 | False |
Plot the model prediction and acquisition function inside the optimization space¶
fig, ax = X.generator.visualize_model()
Plot the Pareto front¶
X.data.plot(x="f", y="s", style="o-")
<Axes: xlabel='f'>
X.data
| x | s | f | xopt_runtime | xopt_error | |
|---|---|---|---|---|---|
| 0 | 0.785398 | 0.000000 | 1.610902e+00 | 0.000010 | False |
| 1 | 1.570796 | 0.250000 | 1.064601e+00 | 0.000004 | False |
| 2 | 3.141593 | 0.000000 | -1.387351e+00 | 0.000002 | False |
| 3 | 3.731996 | 0.325158 | -1.336422e+00 | 0.000010 | False |
| 4 | 3.463365 | 0.451840 | -1.005280e+00 | 0.000011 | False |
| 5 | 0.000000 | 0.663812 | 3.902770e-01 | 0.000011 | False |
| 6 | 0.000000 | 0.987857 | 1.221650e-02 | 0.000010 | False |
| 7 | 6.283185 | 1.000000 | -2.449294e-16 | 0.000010 | False |
| 8 | 4.428499 | 1.000000 | -9.599732e-01 | 0.000011 | False |
| 9 | 4.527358 | 0.387453 | -1.236100e+00 | 0.000011 | False |
| 10 | 3.932443 | 0.146186 | -1.528331e+00 | 0.000012 | False |
| 11 | 4.334340 | 0.629174 | -1.203684e+00 | 0.000010 | False |
| 12 | 4.425679 | 0.797966 | -1.102332e+00 | 0.000011 | False |
| 13 | 3.980567 | 0.265980 | -1.443409e+00 | 0.000013 | False |
| 14 | 4.514431 | 0.903244 | -1.044197e+00 | 0.000012 | False |
| 15 | 4.166937 | 0.450695 | -1.316064e+00 | 0.000011 | False |
| 16 | 4.064967 | 0.349388 | -1.384447e+00 | 0.000012 | False |
| 17 | 4.244276 | 0.530787 | -1.264410e+00 | 0.000011 | False |
# get optimal value at max fidelity, note that the actual maximum is 4.71
X.generator.get_optimum().to_dict()
{'x': {0: 4.614557548513697}, 's': {0: 1.0}}