Multi-fidelity Multi-objective Bayesian Optimization¶

Here we attempt to solve for the constrained Pareto front of the TNK multi-objective optimization problem using Multi-Fidelity Multi-Objective Bayesian optimization. For simplicity we assume that the objective and constraint functions at lower fidelities is exactly equal to the functions at higher fidelities (this is obviously not a requirement, although for the best results lower fidelity calculations should correlate with higher fidelity ones). The algorithm should learn this relationship and use information gathered at lower fidelities to gather samples to improve the hypervolume of the Pareto front at the maximum fidelity.

TNK function $n=2$ variables: $x_i \in [0, \pi], i=1,2$

Objectives:

$f_i(x) = x_i$

Constraints:

$g_1(x) = -x_1^2 -x_2^2 + 1 + 0.1 \cos\left(16 \arctan \frac{x_1}{x_2}\right) \le 0$
$g_2(x) = (x_1 - 1/2)^2 + (x_2-1/2)^2 \le 0.5$

In [1]:

Copied!





# set values if testing
import os
from copy import deepcopy

import pandas as pd
import numpy as np
import torch

from xopt import Xopt, Evaluator
from xopt.generators.bayesian import MultiFidelityGenerator
from xopt.resources.test_functions.tnk import evaluate_TNK, tnk_vocs

import matplotlib.pyplot as plt

# Ignore all warnings
import warnings

warnings.filterwarnings("ignore")

SMOKE_TEST = os.environ.get("SMOKE_TEST")
N_MC_SAMPLES = 1 if SMOKE_TEST else 128
NUM_RESTARTS = 1 if SMOKE_TEST else 20
BUDGET = 0.02 if SMOKE_TEST else 10

evaluator = Evaluator(function=evaluate_TNK)
print(tnk_vocs.dict())
# set values if testing
import os
from copy import deepcopy

import pandas as pd
import numpy as np
import torch

from xopt import Xopt, Evaluator
from xopt.generators.bayesian import MultiFidelityGenerator
from xopt.resources.test_functions.tnk import evaluate_TNK, tnk_vocs

import matplotlib.pyplot as plt

# Ignore all warnings
import warnings

warnings.filterwarnings("ignore")

SMOKE_TEST = os.environ.get("SMOKE_TEST")
N_MC_SAMPLES = 1 if SMOKE_TEST else 128
NUM_RESTARTS = 1 if SMOKE_TEST else 20
BUDGET = 0.02 if SMOKE_TEST else 10

evaluator = Evaluator(function=evaluate_TNK)
print(tnk_vocs.dict())

{'variables': {'x1': (0.0, 3.14159), 'x2': (0.0, 3.14159)}, 'constraints': {'c1': ('GREATER_THAN', 0.0), 'c2': ('LESS_THAN', 0.5)}, 'objectives': {'y1': 'MINIMIZE', 'y2': 'MINIMIZE'}, 'constants': {'a': 'dummy_constant'}, 'observables': []}

Set up the Multi-Fidelity Multi-objective optimization algorithm¶

Here we create the Multi-Fidelity generator object which can solve both single and multi-objective optimization problems depending on the number of objectives in VOCS. We specify a cost function as a function of fidelity parameter $s=[0,1]$ as $C(s) = s^{3.5}$ as an example from a real life multi-fidelity simulation problem.

In [2]:

Copied!





my_vocs = deepcopy(tnk_vocs)
my_vocs.constraints = {}
generator = MultiFidelityGenerator(vocs=my_vocs, reference_point={"y1": 1.5, "y2": 1.5})

# set cost function according to approximate scaling of laser plasma accelerator
# problem, see https://journals.aps.org/prresearch/abstract/10.1103/PhysRevResearch.5.013063
generator.cost_function = lambda s: s**3.5 + 0.001
generator.numerical_optimizer.n_restarts = NUM_RESTARTS
generator.n_monte_carlo_samples = N_MC_SAMPLES
generator.gp_constructor.use_low_noise_prior = True

X = Xopt(generator=generator, evaluator=evaluator, vocs=my_vocs)

# evaluate at some explicit initial points
X.evaluate_data(pd.DataFrame({"x1": [1.0, 0.75], "x2": [0.75, 1.0], "s": [0.0, 0.1]}))

X
my_vocs = deepcopy(tnk_vocs)
my_vocs.constraints = {}
generator = MultiFidelityGenerator(vocs=my_vocs, reference_point={"y1": 1.5, "y2": 1.5})

# set cost function according to approximate scaling of laser plasma accelerator
# problem, see https://journals.aps.org/prresearch/abstract/10.1103/PhysRevResearch.5.013063
generator.cost_function = lambda s: s**3.5 + 0.001
generator.numerical_optimizer.n_restarts = NUM_RESTARTS
generator.n_monte_carlo_samples = N_MC_SAMPLES
generator.gp_constructor.use_low_noise_prior = True

X = Xopt(generator=generator, evaluator=evaluator, vocs=my_vocs)

# evaluate at some explicit initial points
X.evaluate_data(pd.DataFrame({"x1": [1.0, 0.75], "x2": [0.75, 1.0], "s": [0.0, 0.1]}))

X

Out[2]:

            Xopt
________________________________
Version: 2.6.11.dev7+g4d3d82eb9.d20260128
Data size: 2
Config as YAML:
dump_file: null
evaluator:
  function: xopt.resources.test_functions.tnk.evaluate_TNK
  function_kwargs:
    raise_probability: 0
    random_sleep: 0
    sleep: 0
  max_workers: 1
  vectorized: false
generator:
  computation_time: null
  custom_objective: null
  fixed_features: null
  gp_constructor:
    covar_modules: {}
    custom_noise_prior: null
    mean_modules: {}
    name: standard
    trainable_mean_keys: []
    transform_inputs: true
    use_cached_hyperparameters: false
    use_low_noise_prior: true
  max_travel_distances: null
  model: null
  n_candidates: 1
  n_interpolate_points: null
  n_monte_carlo_samples: 128
  name: multi_fidelity
  numerical_optimizer:
    max_iter: 2000
    max_time: 5.0
    n_restarts: 20
    name: LBFGS
  reference_point:
    s: 0.0
    y1: 1.5
    y2: 1.5
  supports_batch_generation: true
  supports_constraints: true
  supports_multi_objective: true
  turbo_controller: null
  use_cuda: false
  use_pf_as_initial_points: false
serialize_inline: false
serialize_torch: false
stopping_condition: null
strict: true
vocs:
  constants:
    a: dummy_constant
  constraints: {}
  objectives:
    s: MAXIMIZE
    y1: MINIMIZE
    y2: MINIMIZE
  observables: []
  variables:
    s:
    - 0
    - 1
    x1:
    - 0.0
    - 3.14159
    x2:
    - 0.0
    - 3.14159

Run optimization routine¶

Instead of ending the optimization routine after an explict number of samples we end optimization once a given optimization budget has been exceeded. WARNING: This will slightly exceed the given budget

In [3]:

Copied!





budget = BUDGET
while X.generator.calculate_total_cost() < budget:
    X.step()
    print(
        f"n_samples: {len(X.data)} "
        f"budget used: {X.generator.calculate_total_cost():.4} "
        f"hypervolume: {X.generator.get_pareto_front_and_hypervolume()[-1]:.4}"
    )
budget = BUDGET
while X.generator.calculate_total_cost() < budget:
    X.step()
    print(
        f"n_samples: {len(X.data)} "
        f"budget used: {X.generator.calculate_total_cost():.4} "
        f"hypervolume: {X.generator.get_pareto_front_and_hypervolume()[-1]:.4}"
    )

n_samples: 3 budget used: 0.003317 hypervolume: 0.03837

n_samples: 4 budget used: 0.004663 hypervolume: 0.05218

n_samples: 5 budget used: 0.006222 hypervolume: 0.05218

n_samples: 6 budget used: 0.007977 hypervolume: 0.2056

n_samples: 7 budget used: 0.01215 hypervolume: 0.4315

n_samples: 8 budget used: 0.02442 hypervolume: 0.6245

n_samples: 9 budget used: 0.05181 hypervolume: 0.7965

n_samples: 10 budget used: 0.1127 hypervolume: 1.006

n_samples: 11 budget used: 0.2478 hypervolume: 1.267

n_samples: 12 budget used: 0.5498 hypervolume: 1.597

n_samples: 13 budget used: 1.217 hypervolume: 2.003

n_samples: 14 budget used: 2.218 hypervolume: 2.25

n_samples: 15 budget used: 3.219 hypervolume: 2.25

n_samples: 16 budget used: 4.22 hypervolume: 2.25

n_samples: 17 budget used: 4.589 hypervolume: 2.25

n_samples: 18 budget used: 4.59 hypervolume: 2.25

n_samples: 19 budget used: 5.591 hypervolume: 2.25

n_samples: 20 budget used: 5.844 hypervolume: 2.25

n_samples: 21 budget used: 6.835 hypervolume: 2.25

n_samples: 22 budget used: 6.836 hypervolume: 2.25

n_samples: 23 budget used: 6.966 hypervolume: 2.25

n_samples: 24 budget used: 6.968 hypervolume: 2.25

n_samples: 25 budget used: 7.969 hypervolume: 2.25

n_samples: 26 budget used: 7.97 hypervolume: 2.25

n_samples: 27 budget used: 7.971 hypervolume: 2.25

n_samples: 28 budget used: 8.331 hypervolume: 2.25

n_samples: 29 budget used: 8.67 hypervolume: 2.25

n_samples: 30 budget used: 8.671 hypervolume: 2.25

n_samples: 31 budget used: 8.672 hypervolume: 2.25

n_samples: 32 budget used: 8.741 hypervolume: 2.25

n_samples: 33 budget used: 9.651 hypervolume: 2.25

n_samples: 34 budget used: 9.689 hypervolume: 2.25

n_samples: 35 budget used: 9.733 hypervolume: 2.25

n_samples: 36 budget used: 10.2 hypervolume: 2.25

Show results¶

In [4]:

Copied!

X.data
X.data

Out[4]:

	x1	x2	s	a	y1	y2	c1	c2	xopt_runtime	xopt_error
0	1.000000	0.750000	0.000000	dummy_constant	1.000000	0.750000	0.626888	0.312500	0.000165	False
1	0.750000	1.000000	0.100000	dummy_constant	0.750000	1.000000	0.626888	0.312500	0.000132	False
2	0.542355	1.210821	0.014460	dummy_constant	0.542355	1.210821	0.670403	0.507061	0.000153	False
3	1.162506	0.613711	0.102600	dummy_constant	1.162506	0.613711	0.719845	0.451844	0.000161	False
4	2.771510	0.778743	0.117692	dummy_constant	2.771510	0.778743	7.320086	5.237458	0.000163	False
5	0.000000	0.430829	0.128227	dummy_constant	0.000000	0.430829	-0.914386	0.254785	0.000158	False
6	0.000000	0.011869	0.193328	dummy_constant	0.000000	0.011869	-1.099859	0.488272	0.000153	False
7	0.000000	0.000000	0.277546	dummy_constant	0.000000	0.000000	-1.100000	0.500000	0.000155	False
8	0.000000	0.000000	0.354010	dummy_constant	0.000000	0.000000	-1.100000	0.500000	0.000161	False
9	0.000000	0.000000	0.447325	dummy_constant	0.000000	0.000000	-1.100000	0.500000	0.000155	False
10	0.000000	0.000000	0.563317	dummy_constant	0.000000	0.000000	-1.100000	0.500000	0.000155	False
11	0.000000	0.000000	0.709606	dummy_constant	0.000000	0.000000	-1.100000	0.500000	0.000152	False
12	0.000000	0.000000	0.890413	dummy_constant	0.000000	0.000000	-1.100000	0.500000	0.000153	False
13	0.000000	0.000000	1.000000	dummy_constant	0.000000	0.000000	-1.100000	0.500000	0.000154	False
14	0.000000	3.141590	1.000000	dummy_constant	0.000000	3.141590	8.769588	7.227998	0.000111	False
15	0.000000	0.784422	1.000000	dummy_constant	0.000000	0.784422	-0.484683	0.330896	0.000147	False
16	0.371654	0.235969	0.751672	dummy_constant	0.371654	0.235969	-0.713090	0.086185	0.000152	False
17	1.618485	2.450725	0.070894	dummy_constant	1.618485	2.450725	7.725176	5.056338	0.000156	False
18	0.752904	0.000000	1.000000	dummy_constant	0.752904	0.000000	-0.533135	0.313960	0.000152	False
19	2.518274	2.319278	0.673908	dummy_constant	2.518274	2.319278	10.641620	7.383202	0.000149	False
20	1.860841	2.575078	0.997181	dummy_constant	1.860841	2.575078	9.177000	6.157838	0.000153	False
21	0.009706	1.513285	0.042260	dummy_constant	0.009706	1.513285	1.190650	1.267133	0.000155	False
22	0.711838	0.761477	0.557060	dummy_constant	0.711838	0.761477	0.000731	0.113245	0.000167	False
23	1.581468	2.562870	0.131123	dummy_constant	1.581468	2.562870	8.153059	5.425007	0.000151	False
24	0.837776	0.806534	1.000000	dummy_constant	0.837776	0.806534	0.256950	0.208056	0.000153	False
25	1.076706	0.040916	0.047952	dummy_constant	1.076706	0.040916	0.078874	0.543347	0.000154	False
26	1.891797	2.420450	0.027005	dummy_constant	1.891797	2.420450	8.474653	5.625226	0.000153	False
27	0.799560	0.536612	0.746366	dummy_constant	0.799560	0.536612	0.027196	0.091077	0.000153	False
28	1.096468	0.039266	0.733559	dummy_constant	1.096468	0.039266	0.119741	0.568050	0.000145	False
29	0.920367	1.970652	0.100809	dummy_constant	0.920367	1.970652	3.654560	2.339526	0.000152	False
30	0.316325	2.686170	0.057810	dummy_constant	0.316325	2.686170	6.345575	4.813076	0.000152	False
31	1.213724	2.448158	0.462814	dummy_constant	1.213724	2.448158	6.419554	4.304722	0.000151	False
32	2.698558	1.048141	0.973145	dummy_constant	2.698558	1.048141	7.287069	5.134114	0.000138	False
33	1.166590	2.433034	0.391553	dummy_constant	1.166590	2.433034	6.216132	4.180962	0.000163	False
34	0.768286	1.319676	0.406759	dummy_constant	0.768286	1.319676	1.386728	0.743846	0.000154	False
35	2.213834	2.168245	0.802241	dummy_constant	2.213834	2.168245	8.503728	5.720267	0.000150	False

Plot results¶

Here we plot the resulting observations in input space, colored by feasibility (neglecting the fact that these data points are at varying fidelities).

In [5]:

Copied!





fig, ax = plt.subplots()

theta = np.linspace(0, np.pi / 2)
r = np.sqrt(1 + 0.1 * np.cos(16 * theta))
x_1 = r * np.sin(theta)
x_2_lower = r * np.cos(theta)
x_2_upper = (0.5 - (x_1 - 0.5) ** 2) ** 0.5 + 0.5

z = np.zeros_like(x_1)

# ax2.plot(x_1, x_2_lower,'r')
ax.fill_between(x_1, z, x_2_lower, fc="white")
circle = plt.Circle(
    (0.5, 0.5), 0.5**0.5, color="r", alpha=0.25, zorder=0, label="Valid Region"
)
ax.add_patch(circle)
history = pd.concat(
    [X.data, tnk_vocs.feasibility_data(X.data)], axis=1, ignore_index=False
)

ax.plot(*history[["x1", "x2"]][history["feasible"]].to_numpy().T, ".C1")
ax.plot(*history[["x1", "x2"]][~history["feasible"]].to_numpy().T, ".C2")

ax.set_xlim(0, 3.14)
ax.set_ylim(0, 3.14)
ax.set_xlabel("x1")
ax.set_ylabel("x2")
ax.set_aspect("equal")
fig, ax = plt.subplots()

theta = np.linspace(0, np.pi / 2)
r = np.sqrt(1 + 0.1 * np.cos(16 * theta))
x_1 = r * np.sin(theta)
x_2_lower = r * np.cos(theta)
x_2_upper = (0.5 - (x_1 - 0.5) ** 2) ** 0.5 + 0.5

z = np.zeros_like(x_1)

# ax2.plot(x_1, x_2_lower,'r')
ax.fill_between(x_1, z, x_2_lower, fc="white")
circle = plt.Circle(
    (0.5, 0.5), 0.5**0.5, color="r", alpha=0.25, zorder=0, label="Valid Region"
)
ax.add_patch(circle)
history = pd.concat(
    [X.data, tnk_vocs.feasibility_data(X.data)], axis=1, ignore_index=False
)

ax.plot(*history[["x1", "x2"]][history["feasible"]].to_numpy().T, ".C1")
ax.plot(*history[["x1", "x2"]][~history["feasible"]].to_numpy().T, ".C2")

ax.set_xlim(0, 3.14)
ax.set_ylim(0, 3.14)
ax.set_xlabel("x1")
ax.set_ylabel("x2")
ax.set_aspect("equal")

No description has been provided for this image

Plot path through input space¶

In [6]:

Copied!

ax = history.hist(["x1", "x2", "s"], bins=20)
ax = history.hist(["x1", "x2", "s"], bins=20)

In [7]:

Copied!

history.plot(y=["x1", "x2", "s"])
history.plot(y=["x1", "x2", "s"])

Out[7]:

<Axes: >

Plot the acqusisition function¶

Here we plot the acquisition function at a small set of fidelities $[0, 0.5, 1.0]$.

In [8]:

Copied!





# plot the acquisition function

bounds = X.generator.vocs.bounds
model = X.generator.model

# create mesh over non-fidelity parameters
n = 50
x = torch.linspace(*bounds.T[1], n)
y = torch.linspace(*bounds.T[2], n)
xx, yy = torch.meshgrid(x, y)

# plot function(s) at a single fidelity parameter
fidelities = [0.0, 0.5, 1.0]
for fidelity in fidelities:
    pts = torch.hstack([ele.reshape(-1, 1) for ele in (xx, yy)]).double()
    pts = torch.cat((torch.ones(pts.shape[0], 1) * fidelity, pts), dim=-1)

    acq_func = X.generator.get_acquisition(model)
    with torch.no_grad():
        acq_pts = pts.unsqueeze(1)
        acq = acq_func(acq_pts)

        fig, ax = plt.subplots()

        xxn, yyn = xx.numpy(), yy.numpy()

        c = ax.pcolor(xxn, yyn, acq.reshape(n, n), cmap="Blues")
        fig.colorbar(c)
        ax.set_title(f"Acquisition function - s: {fidelity}")

        ax.plot(*history[["x1", "x2"]][history["feasible"]].to_numpy().T, ".C1")
        ax.plot(*history[["x1", "x2"]][~history["feasible"]].to_numpy().T, ".C2")

        ax.plot(*history[["x1", "x2"]].to_numpy()[-1].T, "+")


candidate = pd.DataFrame(X.generator.generate(1), index=[0])
print(candidate[["x1", "x2"]].to_numpy())
ax.plot(*candidate[["x1", "x2"]].to_numpy()[0], "o")
# plot the acquisition function

bounds = X.generator.vocs.bounds
model = X.generator.model

# create mesh over non-fidelity parameters
n = 50
x = torch.linspace(*bounds.T[1], n)
y = torch.linspace(*bounds.T[2], n)
xx, yy = torch.meshgrid(x, y)

# plot function(s) at a single fidelity parameter
fidelities = [0.0, 0.5, 1.0]
for fidelity in fidelities:
    pts = torch.hstack([ele.reshape(-1, 1) for ele in (xx, yy)]).double()
    pts = torch.cat((torch.ones(pts.shape[0], 1) * fidelity, pts), dim=-1)

    acq_func = X.generator.get_acquisition(model)
    with torch.no_grad():
        acq_pts = pts.unsqueeze(1)
        acq = acq_func(acq_pts)

        fig, ax = plt.subplots()

        xxn, yyn = xx.numpy(), yy.numpy()

        c = ax.pcolor(xxn, yyn, acq.reshape(n, n), cmap="Blues")
        fig.colorbar(c)
        ax.set_title(f"Acquisition function - s: {fidelity}")

        ax.plot(*history[["x1", "x2"]][history["feasible"]].to_numpy().T, ".C1")
        ax.plot(*history[["x1", "x2"]][~history["feasible"]].to_numpy().T, ".C2")

        ax.plot(*history[["x1", "x2"]].to_numpy()[-1].T, "+")


candidate = pd.DataFrame(X.generator.generate(1), index=[0])
print(candidate[["x1", "x2"]].to_numpy())
ax.plot(*candidate[["x1", "x2"]].to_numpy()[0], "o")

[[1.38409713 0.7480633 ]]

Out[8]:

[<matplotlib.lines.Line2D at 0x7f07dc0a8b90>]

In [9]:

Copied!

# examine lengthscale of the first objective
list(model.models[0].named_parameters())
# examine lengthscale of the first objective
list(model.models[0].named_parameters())

Out[9]:

[('likelihood.noise_covar.raw_noise',
  Parameter containing:
  tensor([-55.1573], dtype=torch.float64, requires_grad=True)),
 ('mean_module.raw_constant',
  Parameter containing:
  tensor(0.0816, dtype=torch.float64, requires_grad=True)),
 ('covar_module.raw_lengthscale',
  Parameter containing:
  tensor([[ 0.7557, 44.5003, 51.6150]], dtype=torch.float64, requires_grad=True))]