Time dependent Bayesian Optimization¶

In this example we demonstrate time dependent optimization. In this case we are not only interested in finding an optimum point in input space, but also maintain the ideal point over time.

In [1]:

Copied!





# set values if testing
import os
import time
import warnings
import torch
from matplotlib import pyplot as plt
from tqdm import trange
from xopt.generators.bayesian import TDUpperConfidenceBoundGenerator
from xopt.vocs import VOCS
from xopt.evaluator import Evaluator
from xopt import Xopt

SMOKE_TEST = os.environ.get("SMOKE_TEST")
N_MC_SAMPLES = 1 if SMOKE_TEST else 128
NUM_RESTARTS = 1 if SMOKE_TEST else 20
N_STEPS = 1 if SMOKE_TEST else 250
warnings.filterwarnings("ignore")
# set values if testing
import os
import time
import warnings
import torch
from matplotlib import pyplot as plt
from tqdm import trange
from xopt.generators.bayesian import TDUpperConfidenceBoundGenerator
from xopt.vocs import VOCS
from xopt.evaluator import Evaluator
from xopt import Xopt

SMOKE_TEST = os.environ.get("SMOKE_TEST")
N_MC_SAMPLES = 1 if SMOKE_TEST else 128
NUM_RESTARTS = 1 if SMOKE_TEST else 20
N_STEPS = 1 if SMOKE_TEST else 250
warnings.filterwarnings("ignore")

Time dependent test problem¶

Optimization is carried out over a single variable x. The test function is a simple quadratic, with a minimum location that drifts and changes as a function of time t.

Define test functions

In [2]:

Copied!





# location of time dependent minimum
def k(t_):
    return torch.where(
        t_ < 50, 0.25 * torch.sin(t_ * 6 / 10.0) + 0.1e-2 * t_, -1.5e-2 * (t_ - 50.0)
    )


# define function in time and position space
def g(x_, t_):
    return (x_ - k(t_)) ** 2


# create callable function for Xopt
def f(inputs):
    x_ = inputs["x"]
    current_time = time.time()
    t_ = current_time - start_time
    y_ = g(x_, torch.tensor(t_))

    return {"y": float(y_), "time": float(current_time)}
# location of time dependent minimum
def k(t_):
    return torch.where(
        t_ < 50, 0.25 * torch.sin(t_ * 6 / 10.0) + 0.1e-2 * t_, -1.5e-2 * (t_ - 50.0)
    )


# define function in time and position space
def g(x_, t_):
    return (x_ - k(t_)) ** 2


# create callable function for Xopt
def f(inputs):
    x_ = inputs["x"]
    current_time = time.time()
    t_ = current_time - start_time
    y_ = g(x_, torch.tensor(t_))

    return {"y": float(y_), "time": float(current_time)}

Define Xopt objects including optimization algorithm¶

In [3]:

Copied!

variables = {"x": [-1, 1]}
objectives = {"y": "MINIMIZE"}

vocs = VOCS(variables=variables, objectives=objectives)

evaluator = Evaluator(function=f)
variables = {"x": [-1, 1]}
objectives = {"y": "MINIMIZE"}

vocs = VOCS(variables=variables, objectives=objectives)

evaluator = Evaluator(function=f)

Run optimization¶

In [4]:

Copied!





generator = TDUpperConfidenceBoundGenerator(
    vocs=vocs,
    beta=0.01,
    added_time=0.1,
    forgetting_time=10.0,
)
generator.n_monte_carlo_samples = N_MC_SAMPLES
generator.numerical_optimizer.n_restarts = NUM_RESTARTS
generator.max_travel_distances = [0.1]
generator.gp_constructor.use_low_noise_prior = True

start_time = time.time()
X = Xopt(evaluator=evaluator, generator=generator, vocs=vocs)
X.random_evaluate(2)

for _ in trange(N_STEPS):
    # note that in this example we can ignore warnings if computation
    # time is greater than added time
    with warnings.catch_warnings():
        warnings.filterwarnings("ignore", category=RuntimeWarning)
        X.step()
        time.sleep(0.1)
generator = TDUpperConfidenceBoundGenerator(
    vocs=vocs,
    beta=0.01,
    added_time=0.1,
    forgetting_time=10.0,
)
generator.n_monte_carlo_samples = N_MC_SAMPLES
generator.numerical_optimizer.n_restarts = NUM_RESTARTS
generator.max_travel_distances = [0.1]
generator.gp_constructor.use_low_noise_prior = True

start_time = time.time()
X = Xopt(evaluator=evaluator, generator=generator, vocs=vocs)
X.random_evaluate(2)

for _ in trange(N_STEPS):
    # note that in this example we can ignore warnings if computation
    # time is greater than added time
    with warnings.catch_warnings():
        warnings.filterwarnings("ignore", category=RuntimeWarning)
        X.step()
        time.sleep(0.1)

  0%|          | 0/1 [00:00<?, ?it/s]

100%|██████████| 1/1 [00:08<00:00,  8.02s/it]

100%|██████████| 1/1 [00:08<00:00,  8.03s/it]

Visualize GP model of objective function and plot trajectory¶

In [5]:

Copied!





data = X.data

xbounds = generator.vocs.bounds
tbounds = [data["time"].min(), data["time"].max()]

model = X.generator.model
n = 100
t = torch.linspace(*tbounds, n, dtype=torch.double)
x = torch.linspace(*xbounds.flatten(), n, dtype=torch.double)
tt, xx = torch.meshgrid(t, x)
pts = torch.hstack([ele.reshape(-1, 1) for ele in (tt, xx)]).double()

tt, xx = tt.numpy(), xx.numpy()

# NOTE: the model inputs are such that t is the last dimension
gp_pts = torch.flip(pts, dims=[-1])

gt_vals = g(gp_pts.T[0], gp_pts.T[1] - start_time)

with torch.no_grad():
    post = model.posterior(gp_pts)

    mean = post.mean
    std = torch.sqrt(post.variance)

    fig, ax = plt.subplots()
    ax.set_title("model mean")
    ax.set_xlabel("unix time")
    ax.set_ylabel("x")
    c = ax.pcolor(tt, xx, mean.reshape(n, n), rasterized=True)
    ax.plot(data["time"].to_numpy(), data["x"].to_numpy(), "oC1", label="samples")

    ax.plot(t, k(t - start_time), "C3--", label="ideal path", zorder=10)
    ax.legend()
    fig.colorbar(c)

    fig2, ax2 = plt.subplots()
    ax2.set_title("model uncertainty")
    ax2.set_xlabel("unix time")
    ax2.set_ylabel("x")
    c = ax2.pcolor(tt, xx, std.reshape(n, n))
    fig2.colorbar(c)

    fig3, ax3 = plt.subplots()
    ax3.set_title("ground truth value")
    ax3.set_xlabel("unix time")
    ax3.set_ylabel("x")
    c = ax3.pcolor(tt, xx, gt_vals.reshape(n, n))
    fig3.colorbar(c)

    ax2.plot(data["time"].to_numpy(), data["x"].to_numpy(), "oC1")
    ax3.plot(data["time"].to_numpy(), data["x"].to_numpy(), "oC1")
data = X.data

xbounds = generator.vocs.bounds
tbounds = [data["time"].min(), data["time"].max()]

model = X.generator.model
n = 100
t = torch.linspace(*tbounds, n, dtype=torch.double)
x = torch.linspace(*xbounds.flatten(), n, dtype=torch.double)
tt, xx = torch.meshgrid(t, x)
pts = torch.hstack([ele.reshape(-1, 1) for ele in (tt, xx)]).double()

tt, xx = tt.numpy(), xx.numpy()

# NOTE: the model inputs are such that t is the last dimension
gp_pts = torch.flip(pts, dims=[-1])

gt_vals = g(gp_pts.T[0], gp_pts.T[1] - start_time)

with torch.no_grad():
    post = model.posterior(gp_pts)

    mean = post.mean
    std = torch.sqrt(post.variance)

    fig, ax = plt.subplots()
    ax.set_title("model mean")
    ax.set_xlabel("unix time")
    ax.set_ylabel("x")
    c = ax.pcolor(tt, xx, mean.reshape(n, n), rasterized=True)
    ax.plot(data["time"].to_numpy(), data["x"].to_numpy(), "oC1", label="samples")

    ax.plot(t, k(t - start_time), "C3--", label="ideal path", zorder=10)
    ax.legend()
    fig.colorbar(c)

    fig2, ax2 = plt.subplots()
    ax2.set_title("model uncertainty")
    ax2.set_xlabel("unix time")
    ax2.set_ylabel("x")
    c = ax2.pcolor(tt, xx, std.reshape(n, n))
    fig2.colorbar(c)

    fig3, ax3 = plt.subplots()
    ax3.set_title("ground truth value")
    ax3.set_xlabel("unix time")
    ax3.set_ylabel("x")
    c = ax3.pcolor(tt, xx, gt_vals.reshape(n, n))
    fig3.colorbar(c)

    ax2.plot(data["time"].to_numpy(), data["x"].to_numpy(), "oC1")
    ax3.plot(data["time"].to_numpy(), data["x"].to_numpy(), "oC1")

No description has been provided for this image

plot the acquisition function¶

In [6]:

Copied!





# note that target time is only updated during the generate call
target_time = X.generator.target_prediction_time
print(target_time - start_time)
my_acq_func = X.generator.get_acquisition(model)

with torch.no_grad():
    acq_pts = x.unsqueeze(-1).unsqueeze(-1)
    full_acq = my_acq_func.acq_func(gp_pts.unsqueeze(1))
    fixed_acq = my_acq_func(acq_pts)

    fig, ax = plt.subplots()
    c = ax.pcolor(tt, xx, full_acq.reshape(n, n))
    ax.set_xlabel("unix time")
    ax.set_ylabel("x")
    ax.set_title("acquisition function")
    fig.colorbar(c)

    fi2, ax2 = plt.subplots()
    ax2.plot(x.flatten(), fixed_acq.flatten())
    ax2.set_xlabel("x")
    ax2.set_ylabel("acquisition function")
    ax2.set_title("acquisition function at last time step")
# note that target time is only updated during the generate call
target_time = X.generator.target_prediction_time
print(target_time - start_time)
my_acq_func = X.generator.get_acquisition(model)

with torch.no_grad():
    acq_pts = x.unsqueeze(-1).unsqueeze(-1)
    full_acq = my_acq_func.acq_func(gp_pts.unsqueeze(1))
    fixed_acq = my_acq_func(acq_pts)

    fig, ax = plt.subplots()
    c = ax.pcolor(tt, xx, full_acq.reshape(n, n))
    ax.set_xlabel("unix time")
    ax.set_ylabel("x")
    ax.set_title("acquisition function")
    fig.colorbar(c)

    fi2, ax2 = plt.subplots()
    ax2.plot(x.flatten(), fixed_acq.flatten())
    ax2.set_xlabel("x")
    ax2.set_ylabel("acquisition function")
    ax2.set_title("acquisition function at last time step")

0.1258993148803711

Run Time Dependent BO with Model Caching¶

Instead of retraining the GP model hyperparameters at every step, we can instead hold on to previously determined model parameters by setting use_catched_hyperparameters=True in the model constructor. This reduces the time needed to make decisions, leading to faster feedback when addressing time-critical optimization tasks. However, this can come at the cost of model accuracy when the target function changes behavior (change in lengthscale for example).

In [7]:

Copied!





generator = TDUpperConfidenceBoundGenerator(
    vocs=vocs,
    beta=0.01,
    added_time=0.1,
    forgetting_time=20.0,
)
generator.n_monte_carlo_samples = N_MC_SAMPLES
generator.numerical_optimizer.n_restarts = NUM_RESTARTS
generator.max_travel_distances = [0.1]

start_time = time.time()

X = Xopt(evaluator=evaluator, generator=generator, vocs=vocs)
X.random_evaluate(2)

for i in trange(N_STEPS):
    # note that in this example we can ignore warnings if computation time is greater
    # than added time
    if i == 50:
        X.generator.gp_constructor.use_cached_hyperparameters = True

    with warnings.catch_warnings():
        warnings.filterwarnings("ignore", category=RuntimeWarning)
        X.step()
        time.sleep(0.1)
generator = TDUpperConfidenceBoundGenerator(
    vocs=vocs,
    beta=0.01,
    added_time=0.1,
    forgetting_time=20.0,
)
generator.n_monte_carlo_samples = N_MC_SAMPLES
generator.numerical_optimizer.n_restarts = NUM_RESTARTS
generator.max_travel_distances = [0.1]

start_time = time.time()

X = Xopt(evaluator=evaluator, generator=generator, vocs=vocs)
X.random_evaluate(2)

for i in trange(N_STEPS):
    # note that in this example we can ignore warnings if computation time is greater
    # than added time
    if i == 50:
        X.generator.gp_constructor.use_cached_hyperparameters = True

    with warnings.catch_warnings():
        warnings.filterwarnings("ignore", category=RuntimeWarning)
        X.step()
        time.sleep(0.1)

  0%|          | 0/1 [00:00<?, ?it/s]

100%|██████████| 1/1 [00:00<00:00,  3.34it/s]

100%|██████████| 1/1 [00:00<00:00,  3.33it/s]

In [8]:

Copied!





# plot total computation time
ax = X.generator.computation_time.sum(axis=1).plot()
ax.set_xlabel("Iteration")
ax.set_ylabel("total BO computation time (s)")
# plot total computation time
ax = X.generator.computation_time.sum(axis=1).plot()
ax.set_xlabel("Iteration")
ax.set_ylabel("total BO computation time (s)")

Out[8]:

Text(0, 0.5, 'total BO computation time (s)')

In [9]:

Copied!





data = X.data

xbounds = generator.vocs.bounds
tbounds = [data["time"].min(), data["time"].max()]

model = X.generator.model
n = 100
t = torch.linspace(*tbounds, n, dtype=torch.double)
x = torch.linspace(*xbounds.flatten(), n, dtype=torch.double)
tt, xx = torch.meshgrid(t, x)
pts = torch.hstack([ele.reshape(-1, 1) for ele in (tt, xx)]).double()

tt, xx = tt.numpy(), xx.numpy()

# NOTE: the model inputs are such that t is the last dimension
gp_pts = torch.flip(pts, dims=[-1])

gt_vals = g(gp_pts.T[0], gp_pts.T[1] - start_time)

with torch.no_grad():
    post = model.posterior(gp_pts)

    mean = post.mean
    std = torch.sqrt(post.variance)

    fig, ax = plt.subplots()
    ax.set_title("model mean")
    ax.set_xlabel("unix time")
    ax.set_ylabel("x")
    c = ax.pcolor(tt, xx, mean.reshape(n, n))
    ax.plot(data["time"].to_numpy(), data["x"].to_numpy(), "oC1", label="samples")

    ax.plot(t, k(t - start_time), "C3--", label="ideal path", zorder=10)
    ax.legend()
    fig.colorbar(c)

    fig2, ax2 = plt.subplots()
    ax2.set_title("model uncertainty")
    ax2.set_xlabel("unix time")
    ax2.set_ylabel("x")
    c = ax2.pcolor(tt, xx, std.reshape(n, n))
    fig2.colorbar(c)

    fig3, ax3 = plt.subplots()
    ax3.set_title("ground truth value")
    ax3.set_xlabel("unix time")
    ax3.set_ylabel("x")
    c = ax3.pcolor(tt, xx, gt_vals.reshape(n, n))
    fig3.colorbar(c)

    ax2.plot(data["time"].to_numpy(), data["x"].to_numpy(), "oC1")
    ax3.plot(data["time"].to_numpy(), data["x"].to_numpy(), "oC1")
data = X.data

xbounds = generator.vocs.bounds
tbounds = [data["time"].min(), data["time"].max()]

model = X.generator.model
n = 100
t = torch.linspace(*tbounds, n, dtype=torch.double)
x = torch.linspace(*xbounds.flatten(), n, dtype=torch.double)
tt, xx = torch.meshgrid(t, x)
pts = torch.hstack([ele.reshape(-1, 1) for ele in (tt, xx)]).double()

tt, xx = tt.numpy(), xx.numpy()

# NOTE: the model inputs are such that t is the last dimension
gp_pts = torch.flip(pts, dims=[-1])

gt_vals = g(gp_pts.T[0], gp_pts.T[1] - start_time)

with torch.no_grad():
    post = model.posterior(gp_pts)

    mean = post.mean
    std = torch.sqrt(post.variance)

    fig, ax = plt.subplots()
    ax.set_title("model mean")
    ax.set_xlabel("unix time")
    ax.set_ylabel("x")
    c = ax.pcolor(tt, xx, mean.reshape(n, n))
    ax.plot(data["time"].to_numpy(), data["x"].to_numpy(), "oC1", label="samples")

    ax.plot(t, k(t - start_time), "C3--", label="ideal path", zorder=10)
    ax.legend()
    fig.colorbar(c)

    fig2, ax2 = plt.subplots()
    ax2.set_title("model uncertainty")
    ax2.set_xlabel("unix time")
    ax2.set_ylabel("x")
    c = ax2.pcolor(tt, xx, std.reshape(n, n))
    fig2.colorbar(c)

    fig3, ax3 = plt.subplots()
    ax3.set_title("ground truth value")
    ax3.set_xlabel("unix time")
    ax3.set_ylabel("x")
    c = ax3.pcolor(tt, xx, gt_vals.reshape(n, n))
    fig3.colorbar(c)

    ax2.plot(data["time"].to_numpy(), data["x"].to_numpy(), "oC1")
    ax3.plot(data["time"].to_numpy(), data["x"].to_numpy(), "oC1")