Batched models
Batched model tutorial¶
In this tutorial we demonstrate that for problems where more than one output is involved (constraints and objectives) and you are ok using only perfect samples (no NaNs in any output), a significant speedup can be achieved by using a batched model. On GPU, this can get you 2-3x speedup.
In [1]:
Copied!
import numpy as np
import time
import matplotlib.pyplot as plt
import pandas as pd
import threadpoolctl
import torch
from xopt import Xopt
from xopt.evaluator import Evaluator
from xopt.generators.bayesian import ExpectedImprovementGenerator
from xopt.generators.bayesian.models.standard import (
BatchedModelConstructor,
StandardModelConstructor,
)
from xopt.numerical_optimizer import LBFGSOptimizer
from xopt.resources.test_functions.rosenbrock import evaluate_rosenbrock
HAS_CUDA = False
step_size = 2
if torch.cuda.is_available():
step_size = 20
HAS_CUDA = True
torch.set_num_threads(1)
threadpoolctl.threadpool_limits(limits=1, user_api="blas")
threadpoolctl.threadpool_limits(limits=1, user_api="openmp")
vocs = {
"variables": {f"x{i}": [-3, 3] for i in range(16)},
"objectives": {"y": "MINIMIZE"},
"constraints": {
"c1": ["GREATER_THAN", 0.1],
"c2": ["LESS_THAN", 3],
"c3": ["GREATER_THAN", 0],
},
}
def eval_f(input_dict):
rosenbrock = False # change to try another function
d = {
"y2": input_dict["x0"] + input_dict["x1"],
"c1": input_dict["x2"] + input_dict["x3"],
"c2": input_dict["x4"] + input_dict["x5"],
"c3": input_dict["x0"],
}
if rosenbrock:
d["y"] = evaluate_rosenbrock(input_dict)["y"]
else:
d["y"] = (
np.sum(np.array([input_dict[f"x{i}"] ** 2 for i in range(16)]))
+ np.random.randn() * 0.01
)
return d
import numpy as np
import time
import matplotlib.pyplot as plt
import pandas as pd
import threadpoolctl
import torch
from xopt import Xopt
from xopt.evaluator import Evaluator
from xopt.generators.bayesian import ExpectedImprovementGenerator
from xopt.generators.bayesian.models.standard import (
BatchedModelConstructor,
StandardModelConstructor,
)
from xopt.numerical_optimizer import LBFGSOptimizer
from xopt.resources.test_functions.rosenbrock import evaluate_rosenbrock
HAS_CUDA = False
step_size = 2
if torch.cuda.is_available():
step_size = 20
HAS_CUDA = True
torch.set_num_threads(1)
threadpoolctl.threadpool_limits(limits=1, user_api="blas")
threadpoolctl.threadpool_limits(limits=1, user_api="openmp")
vocs = {
"variables": {f"x{i}": [-3, 3] for i in range(16)},
"objectives": {"y": "MINIMIZE"},
"constraints": {
"c1": ["GREATER_THAN", 0.1],
"c2": ["LESS_THAN", 3],
"c3": ["GREATER_THAN", 0],
},
}
def eval_f(input_dict):
rosenbrock = False # change to try another function
d = {
"y2": input_dict["x0"] + input_dict["x1"],
"c1": input_dict["x2"] + input_dict["x3"],
"c2": input_dict["x4"] + input_dict["x5"],
"c3": input_dict["x0"],
}
if rosenbrock:
d["y"] = evaluate_rosenbrock(input_dict)["y"]
else:
d["y"] = (
np.sum(np.array([input_dict[f"x{i}"] ** 2 for i in range(16)]))
+ np.random.randn() * 0.01
)
return d
In [2]:
Copied!
evaluator = Evaluator(function=eval_f)
generator = ExpectedImprovementGenerator(
vocs=vocs,
gp_constructor=StandardModelConstructor(train_method="adam"),
numerical_optimizer=LBFGSOptimizer(n_restarts=5),
use_cuda=HAS_CUDA,
)
X = Xopt(evaluator=evaluator, generator=generator, vocs=vocs)
generator_batched = ExpectedImprovementGenerator(
vocs=vocs,
gp_constructor=BatchedModelConstructor(train_method="adam"),
numerical_optimizer=LBFGSOptimizer(n_restarts=5),
use_cuda=HAS_CUDA,
)
X2 = Xopt(evaluator=evaluator, generator=generator_batched, vocs=vocs)
evaluator = Evaluator(function=eval_f)
generator = ExpectedImprovementGenerator(
vocs=vocs,
gp_constructor=StandardModelConstructor(train_method="adam"),
numerical_optimizer=LBFGSOptimizer(n_restarts=5),
use_cuda=HAS_CUDA,
)
X = Xopt(evaluator=evaluator, generator=generator, vocs=vocs)
generator_batched = ExpectedImprovementGenerator(
vocs=vocs,
gp_constructor=BatchedModelConstructor(train_method="adam"),
numerical_optimizer=LBFGSOptimizer(n_restarts=5),
use_cuda=HAS_CUDA,
)
X2 = Xopt(evaluator=evaluator, generator=generator_batched, vocs=vocs)
In [3]:
Copied!
X.random_evaluate(20)
X2.random_evaluate(20)
X.random_evaluate(20)
X2.random_evaluate(20)
Out[3]:
| x0 | x1 | x2 | x3 | x4 | x5 | x6 | x7 | x8 | x9 | ... | x13 | x14 | x15 | y2 | c1 | c2 | c3 | y | xopt_runtime | xopt_error | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | -0.629771 | -1.529682 | 1.718415 | -2.260491 | 2.963790 | 0.975458 | -1.445952 | -1.403649 | 2.822854 | 1.834528 | ... | -0.990638 | 2.254874 | -1.928286 | -2.159452 | -0.542076 | 3.939248 | -0.629771 | 56.040175 | 0.000044 | False |
| 1 | -2.065522 | 2.471455 | -2.576183 | -1.701610 | 0.188843 | -0.663712 | -2.077991 | -0.792385 | -2.952899 | -1.510485 | ... | -0.305860 | 0.427607 | -2.680592 | 0.405933 | -4.277792 | -0.474869 | -2.065522 | 47.937440 | 0.000026 | False |
| 2 | -0.966346 | -1.981250 | 0.972131 | -0.902440 | -1.745697 | 0.852876 | 2.167262 | 0.387109 | -1.926389 | 2.889878 | ... | -2.152611 | -2.577891 | -0.904383 | -2.947596 | 0.069691 | -0.892820 | -0.966346 | 46.673252 | 0.000018 | False |
| 3 | 1.403526 | 1.400349 | -0.200041 | 0.193738 | -1.643334 | 2.947927 | 1.500362 | 1.998573 | -0.363381 | 0.164230 | ... | 0.739102 | 1.810546 | 0.947885 | 2.803875 | -0.006304 | 1.304593 | 1.403526 | 27.211103 | 0.000017 | False |
| 4 | 2.421217 | -2.170732 | 0.649781 | -1.647925 | 2.918195 | -1.251224 | -2.168905 | 0.601780 | -2.832474 | 0.369364 | ... | -1.578308 | 1.017236 | 0.813045 | 0.250485 | -0.998144 | 1.666971 | 2.421217 | 61.753942 | 0.000016 | False |
| 5 | -2.325302 | -2.134769 | -2.252290 | -1.614489 | -0.648646 | -2.390945 | 1.408011 | -2.685488 | -2.113125 | -0.526007 | ... | -1.639817 | 0.044242 | 1.898319 | -4.460072 | -3.866779 | -3.039591 | -2.325302 | 45.848722 | 0.000015 | False |
| 6 | -1.189527 | 2.476465 | 0.816846 | -1.376397 | -2.460270 | 1.745539 | 0.246297 | 0.996389 | -2.626013 | 0.262975 | ... | -1.336137 | -2.180613 | 1.898827 | 1.286939 | -0.559551 | -0.714731 | -1.189527 | 42.038339 | 0.000016 | False |
| 7 | 0.390989 | -1.539833 | 1.527494 | -0.042535 | 0.962704 | -0.517070 | -1.639343 | -1.876162 | -2.392006 | 2.224896 | ... | 1.313645 | 0.642401 | 1.207771 | -1.148844 | 1.484959 | 0.445635 | 0.390989 | 35.057813 | 0.000016 | False |
| 8 | -0.982868 | 1.230035 | -0.332712 | 1.284289 | -0.334786 | -0.561378 | 0.359927 | -2.900761 | 2.871355 | 2.500537 | ... | -2.490454 | -0.030497 | -1.536240 | 0.247167 | 0.951577 | -0.896163 | -0.982868 | 43.878148 | 0.000015 | False |
| 9 | 0.184043 | 2.731321 | 1.787637 | -2.625602 | 2.100862 | -0.808530 | 1.411866 | 0.390989 | -1.914743 | -0.694626 | ... | -0.883298 | -2.600023 | 0.645353 | 2.915364 | -0.837965 | 1.292332 | 0.184043 | 50.488743 | 0.000014 | False |
| 10 | -2.762480 | -0.013872 | 2.083811 | 1.816921 | 0.564897 | 1.745714 | -2.423665 | -2.735492 | -2.792730 | -0.233024 | ... | 0.144091 | -0.718396 | 2.051423 | -2.776352 | 3.900731 | 2.310611 | -2.762480 | 48.283709 | 0.000033 | False |
| 11 | -1.072088 | -1.751515 | -2.650509 | 0.137912 | -0.819666 | 2.801726 | 0.037222 | -0.301054 | -2.026159 | -1.763036 | ... | 0.109839 | 0.410451 | 2.199298 | -2.823603 | -2.512597 | 1.982060 | -1.072088 | 44.254909 | 0.000019 | False |
| 12 | -2.395338 | 0.558286 | 2.382018 | -2.462393 | 2.265589 | -1.847598 | -1.440349 | -1.063999 | -1.855712 | 2.951158 | ... | -2.786359 | 0.997192 | -1.460859 | -1.837051 | -0.080375 | 0.417991 | -2.395338 | 58.651507 | 0.000016 | False |
| 13 | 1.415236 | -2.492701 | -1.334565 | 2.421728 | 0.888129 | 2.397188 | -1.707710 | 2.544272 | -0.261251 | 1.434785 | ... | -2.765691 | 2.036665 | -0.532732 | -1.077465 | 1.087163 | 3.285318 | 1.415236 | 52.817253 | 0.000019 | False |
| 14 | -0.157974 | -0.043113 | -0.955338 | 2.539707 | -2.231660 | -2.777834 | -0.096231 | -1.133902 | 2.922451 | 0.178956 | ... | -0.740648 | -0.329391 | 1.672301 | -0.201087 | 1.584370 | -5.009494 | -0.157974 | 51.248912 | 0.000015 | False |
| 15 | 0.510391 | 1.928369 | 2.033205 | -0.582756 | -1.786856 | -0.057517 | 2.016938 | -2.416222 | 2.151468 | -0.436532 | ... | -1.906637 | 0.118334 | 1.950391 | 2.438760 | 1.450449 | -1.844373 | 0.510391 | 44.566776 | 0.000015 | False |
| 16 | 1.831307 | 0.261976 | -0.846258 | -2.120677 | -0.288012 | 1.474156 | -2.717937 | -0.585148 | -2.597342 | -1.571646 | ... | -1.958282 | -2.573681 | -1.988644 | 2.093284 | -2.966935 | 1.186145 | 1.831307 | 51.207547 | 0.000015 | False |
| 17 | 0.924571 | -2.906889 | 2.505877 | -0.057980 | -1.898340 | 2.575779 | -0.587287 | 0.292420 | 2.252874 | -0.708580 | ... | -1.954753 | 1.161425 | 0.976055 | -1.982318 | 2.447897 | 0.677439 | 0.924571 | 39.779383 | 0.000015 | False |
| 18 | 0.029757 | 2.490044 | 0.421286 | -0.319254 | -1.782242 | 1.246140 | -2.932972 | 0.413711 | 2.195711 | 2.422576 | ... | -2.702444 | 1.997469 | 1.840539 | 2.519801 | 0.102031 | -0.536102 | 0.029757 | 54.559911 | 0.000014 | False |
| 19 | -0.965711 | 1.479641 | -1.976076 | 1.307773 | -1.052445 | -0.586139 | 1.560118 | -2.803489 | -1.838699 | -2.585415 | ... | 2.108590 | -1.136846 | -1.991490 | 0.513929 | -0.668303 | -1.638584 | -0.965711 | 42.678710 | 0.000014 | False |
20 rows × 23 columns
Run the optimization¶
We run the optimizers side by side to compare speed. In the interest of saving time, we skip 10 points by sampling randomly between optimization steps.
In [4]:
Copied!
history = []
for i in range(50):
torch.cuda.empty_cache()
X.random_evaluate(step_size)
# sync data
X2.data = X.data.copy()
X2.generator.data = X.generator.data.copy()
t1 = time.perf_counter()
X.step()
t2 = time.perf_counter()
X2.step()
t3 = time.perf_counter()
if i % 10 == 0:
print(f"Step {i}")
history.append(
{
"n": len(X.data),
"Standard training": X.generator.computation_time["training"].to_numpy()[
-1
], # t2-t1,
"Standard acquisition": X.generator.computation_time[
"acquisition_optimization"
].to_numpy()[-1],
"Batched training": X2.generator.computation_time["training"].to_numpy()[
-1
], # t3-t2
"Batched acquisition": X2.generator.computation_time[
"acquisition_optimization"
].to_numpy()[-1],
}
)
history = []
for i in range(50):
torch.cuda.empty_cache()
X.random_evaluate(step_size)
# sync data
X2.data = X.data.copy()
X2.generator.data = X.generator.data.copy()
t1 = time.perf_counter()
X.step()
t2 = time.perf_counter()
X2.step()
t3 = time.perf_counter()
if i % 10 == 0:
print(f"Step {i}")
history.append(
{
"n": len(X.data),
"Standard training": X.generator.computation_time["training"].to_numpy()[
-1
], # t2-t1,
"Standard acquisition": X.generator.computation_time[
"acquisition_optimization"
].to_numpy()[-1],
"Batched training": X2.generator.computation_time["training"].to_numpy()[
-1
], # t3-t2
"Batched acquisition": X2.generator.computation_time[
"acquisition_optimization"
].to_numpy()[-1],
}
)
Step 0
Step 10
Step 20
Step 30
Step 40
Plot performance¶
Let's plot the timings.
In [5]:
Copied!
history_df = pd.DataFrame(history)
fig, ax = plt.subplots(1, 1)
ax.plot(history_df["n"], history_df["Standard training"], label="Standard training")
ax.plot(history_df["n"], history_df["Batched training"], label="Batched training")
ax.set_ylabel("Time (s)")
ax.set_xlabel("Iteration")
ax.legend()
ax.set_title(
f"Vars: {len(vocs['variables'])}, Objs: {len(vocs['objectives'])}, Cons: {len(vocs['constraints'])}, GPU: {generator.use_cuda}"
)
history_df = pd.DataFrame(history)
fig, ax = plt.subplots(1, 1)
ax.plot(history_df["n"], history_df["Standard training"], label="Standard training")
ax.plot(history_df["n"], history_df["Batched training"], label="Batched training")
ax.set_ylabel("Time (s)")
ax.set_xlabel("Iteration")
ax.legend()
ax.set_title(
f"Vars: {len(vocs['variables'])}, Objs: {len(vocs['objectives'])}, Cons: {len(vocs['constraints'])}, GPU: {generator.use_cuda}"
)
Out[5]:
Text(0.5, 1.0, 'Vars: 16, Objs: 1, Cons: 3, GPU: False')
In [6]:
Copied!
fig, ax = plt.subplots(1, 1)
ax.plot(
history_df["n"],
history_df["Standard acquisition"],
label="Standard acquisition",
)
ax.plot(history_df["n"], history_df["Batched acquisition"], label="Batched acquisition")
ax.set_ylabel("Time (s)")
ax.set_xlabel("Iteration")
ax.legend()
fig, ax = plt.subplots(1, 1)
ax.plot(
history_df["n"],
history_df["Standard acquisition"],
label="Standard acquisition",
)
ax.plot(history_df["n"], history_df["Batched acquisition"], label="Batched acquisition")
ax.set_ylabel("Time (s)")
ax.set_xlabel("Iteration")
ax.legend()
Out[6]:
<matplotlib.legend.Legend at 0x7f8124491ee0>