Time dependent Bayesian Optimization¶
In this example we demonstrate time dependent optimization. In this case we are not only interested in finding an optimum point in input space, but also maintain the ideal point over time.
# set values if testing
import os
import time
import warnings
import torch
from matplotlib import pyplot as plt
from tqdm import trange
from xopt.generators.bayesian import TDUpperConfidenceBoundGenerator
from xopt.vocs import VOCS
from xopt.evaluator import Evaluator
from xopt import Xopt
SMOKE_TEST = os.environ.get("SMOKE_TEST")
N_MC_SAMPLES = 1 if SMOKE_TEST else 128
NUM_RESTARTS = 1 if SMOKE_TEST else 20
N_STEPS = 1 if SMOKE_TEST else 250
warnings.filterwarnings("ignore")
Time dependent test problem¶
Optimization is carried out over a single variable x. The test function is a simple
quadratic, with a minimum location that drifts and changes as a function of time t.
Define test functions
# location of time dependent minimum
def k(t_):
return torch.where(
t_ < 50, 0.25 * torch.sin(t_ * 6 / 10.0) + 0.1e-2 * t_, -1.5e-2 * (t_ - 50.0)
)
# define function in time and position space
def g(x_, t_):
return (x_ - k(t_)) ** 2
# create callable function for Xopt
def f(inputs):
x_ = inputs["x"]
current_time = time.time()
t_ = current_time - start_time
y_ = g(x_, torch.tensor(t_))
return {"y": float(y_), "time": float(current_time)}
Define Xopt objects including optimization algorithm¶
variables = {"x": [-1, 1]}
objectives = {"y": "MINIMIZE"}
vocs = VOCS(variables=variables, objectives=objectives)
evaluator = Evaluator(function=f)
Run optimization¶
generator = TDUpperConfidenceBoundGenerator(
vocs=vocs,
beta=0.01,
added_time=0.1,
forgetting_time=10.0,
)
generator.n_monte_carlo_samples = N_MC_SAMPLES
generator.numerical_optimizer.n_restarts = NUM_RESTARTS
generator.max_travel_distances = [0.1]
generator.gp_constructor.use_low_noise_prior = True
start_time = time.time()
X = Xopt(evaluator=evaluator, generator=generator, vocs=vocs)
X.random_evaluate(2)
for _ in trange(N_STEPS):
# note that in this example we can ignore warnings if computation
# time is greater than added time
with warnings.catch_warnings():
warnings.filterwarnings("ignore", category=RuntimeWarning)
X.step()
time.sleep(0.1)
0%| | 0/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:08<00:00, 8.02s/it]
100%|██████████| 1/1 [00:08<00:00, 8.03s/it]
Visualize GP model of objective function and plot trajectory¶
data = X.data
xbounds = generator.vocs.bounds
tbounds = [data["time"].min(), data["time"].max()]
model = X.generator.model
n = 100
t = torch.linspace(*tbounds, n, dtype=torch.double)
x = torch.linspace(*xbounds.flatten(), n, dtype=torch.double)
tt, xx = torch.meshgrid(t, x)
pts = torch.hstack([ele.reshape(-1, 1) for ele in (tt, xx)]).double()
tt, xx = tt.numpy(), xx.numpy()
# NOTE: the model inputs are such that t is the last dimension
gp_pts = torch.flip(pts, dims=[-1])
gt_vals = g(gp_pts.T[0], gp_pts.T[1] - start_time)
with torch.no_grad():
post = model.posterior(gp_pts)
mean = post.mean
std = torch.sqrt(post.variance)
fig, ax = plt.subplots()
ax.set_title("model mean")
ax.set_xlabel("unix time")
ax.set_ylabel("x")
c = ax.pcolor(tt, xx, mean.reshape(n, n), rasterized=True)
ax.plot(data["time"].to_numpy(), data["x"].to_numpy(), "oC1", label="samples")
ax.plot(t, k(t - start_time), "C3--", label="ideal path", zorder=10)
ax.legend()
fig.colorbar(c)
fig2, ax2 = plt.subplots()
ax2.set_title("model uncertainty")
ax2.set_xlabel("unix time")
ax2.set_ylabel("x")
c = ax2.pcolor(tt, xx, std.reshape(n, n))
fig2.colorbar(c)
fig3, ax3 = plt.subplots()
ax3.set_title("ground truth value")
ax3.set_xlabel("unix time")
ax3.set_ylabel("x")
c = ax3.pcolor(tt, xx, gt_vals.reshape(n, n))
fig3.colorbar(c)
ax2.plot(data["time"].to_numpy(), data["x"].to_numpy(), "oC1")
ax3.plot(data["time"].to_numpy(), data["x"].to_numpy(), "oC1")
plot the acquisition function¶
# note that target time is only updated during the generate call
target_time = X.generator.target_prediction_time
print(target_time - start_time)
my_acq_func = X.generator.get_acquisition(model)
with torch.no_grad():
acq_pts = x.unsqueeze(-1).unsqueeze(-1)
full_acq = my_acq_func.acq_func(gp_pts.unsqueeze(1))
fixed_acq = my_acq_func(acq_pts)
fig, ax = plt.subplots()
c = ax.pcolor(tt, xx, full_acq.reshape(n, n))
ax.set_xlabel("unix time")
ax.set_ylabel("x")
ax.set_title("acquisition function")
fig.colorbar(c)
fi2, ax2 = plt.subplots()
ax2.plot(x.flatten(), fixed_acq.flatten())
ax2.set_xlabel("x")
ax2.set_ylabel("acquisition function")
ax2.set_title("acquisition function at last time step")
0.1258993148803711
Run Time Dependent BO with Model Caching¶
Instead of retraining the GP model hyperparameters at every step, we can instead hold
on to previously determined model parameters by setting
use_catched_hyperparameters=True in the model constructor. This reduces the time
needed to make decisions, leading to faster feedback when addressing time-critical
optimization tasks. However, this can come at the cost of model accuracy when the
target function changes behavior (change in lengthscale for example).
generator = TDUpperConfidenceBoundGenerator(
vocs=vocs,
beta=0.01,
added_time=0.1,
forgetting_time=20.0,
)
generator.n_monte_carlo_samples = N_MC_SAMPLES
generator.numerical_optimizer.n_restarts = NUM_RESTARTS
generator.max_travel_distances = [0.1]
start_time = time.time()
X = Xopt(evaluator=evaluator, generator=generator, vocs=vocs)
X.random_evaluate(2)
for i in trange(N_STEPS):
# note that in this example we can ignore warnings if computation time is greater
# than added time
if i == 50:
X.generator.gp_constructor.use_cached_hyperparameters = True
with warnings.catch_warnings():
warnings.filterwarnings("ignore", category=RuntimeWarning)
X.step()
time.sleep(0.1)
0%| | 0/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<00:00, 3.34it/s]
100%|██████████| 1/1 [00:00<00:00, 3.33it/s]
# plot total computation time
ax = X.generator.computation_time.sum(axis=1).plot()
ax.set_xlabel("Iteration")
ax.set_ylabel("total BO computation time (s)")
Text(0, 0.5, 'total BO computation time (s)')
data = X.data
xbounds = generator.vocs.bounds
tbounds = [data["time"].min(), data["time"].max()]
model = X.generator.model
n = 100
t = torch.linspace(*tbounds, n, dtype=torch.double)
x = torch.linspace(*xbounds.flatten(), n, dtype=torch.double)
tt, xx = torch.meshgrid(t, x)
pts = torch.hstack([ele.reshape(-1, 1) for ele in (tt, xx)]).double()
tt, xx = tt.numpy(), xx.numpy()
# NOTE: the model inputs are such that t is the last dimension
gp_pts = torch.flip(pts, dims=[-1])
gt_vals = g(gp_pts.T[0], gp_pts.T[1] - start_time)
with torch.no_grad():
post = model.posterior(gp_pts)
mean = post.mean
std = torch.sqrt(post.variance)
fig, ax = plt.subplots()
ax.set_title("model mean")
ax.set_xlabel("unix time")
ax.set_ylabel("x")
c = ax.pcolor(tt, xx, mean.reshape(n, n))
ax.plot(data["time"].to_numpy(), data["x"].to_numpy(), "oC1", label="samples")
ax.plot(t, k(t - start_time), "C3--", label="ideal path", zorder=10)
ax.legend()
fig.colorbar(c)
fig2, ax2 = plt.subplots()
ax2.set_title("model uncertainty")
ax2.set_xlabel("unix time")
ax2.set_ylabel("x")
c = ax2.pcolor(tt, xx, std.reshape(n, n))
fig2.colorbar(c)
fig3, ax3 = plt.subplots()
ax3.set_title("ground truth value")
ax3.set_xlabel("unix time")
ax3.set_ylabel("x")
c = ax3.pcolor(tt, xx, gt_vals.reshape(n, n))
fig3.colorbar(c)
ax2.plot(data["time"].to_numpy(), data["x"].to_numpy(), "oC1")
ax3.plot(data["time"].to_numpy(), data["x"].to_numpy(), "oC1")