Batched models

Batched model tutorial¶

In this tutorial we demonstrate that for problems where more than one output is involved (constraints and objectives) and you are ok using only perfect samples (no NaNs in any output), a significant speedup can be achieved by using a batched model. On GPU, this can get you 2-3x speedup.

In [1]:

Copied!





import numpy as np
import time

import matplotlib.pyplot as plt
import pandas as pd
import threadpoolctl
import torch

from xopt import Xopt
from xopt.evaluator import Evaluator
from xopt.generators.bayesian import ExpectedImprovementGenerator
from xopt.generators.bayesian.models.standard import (
    BatchedModelConstructor,
    StandardModelConstructor,
)
from xopt.numerical_optimizer import LBFGSOptimizer
from xopt.resources.test_functions.rosenbrock import evaluate_rosenbrock

HAS_CUDA = False
step_size = 2
if torch.cuda.is_available():
    step_size = 20
    HAS_CUDA = True

torch.set_num_threads(1)
threadpoolctl.threadpool_limits(limits=1, user_api="blas")
threadpoolctl.threadpool_limits(limits=1, user_api="openmp")

vocs = {
    "variables": {f"x{i}": [-3, 3] for i in range(16)},
    "objectives": {"y": "MINIMIZE"},
    "constraints": {
        "c1": ["GREATER_THAN", 0.1],
        "c2": ["LESS_THAN", 3],
        "c3": ["GREATER_THAN", 0],
    },
}


def eval_f(input_dict):
    rosenbrock = False  # change to try another function
    d = {
        "y2": input_dict["x0"] + input_dict["x1"],
        "c1": input_dict["x2"] + input_dict["x3"],
        "c2": input_dict["x4"] + input_dict["x5"],
        "c3": input_dict["x0"],
    }
    if rosenbrock:
        d["y"] = evaluate_rosenbrock(input_dict)["y"]
    else:
        d["y"] = (
            np.sum(np.array([input_dict[f"x{i}"] ** 2 for i in range(16)]))
            + np.random.randn() * 0.01
        )
    return d
import numpy as np
import time

import matplotlib.pyplot as plt
import pandas as pd
import threadpoolctl
import torch

from xopt import Xopt
from xopt.evaluator import Evaluator
from xopt.generators.bayesian import ExpectedImprovementGenerator
from xopt.generators.bayesian.models.standard import (
    BatchedModelConstructor,
    StandardModelConstructor,
)
from xopt.numerical_optimizer import LBFGSOptimizer
from xopt.resources.test_functions.rosenbrock import evaluate_rosenbrock

HAS_CUDA = False
step_size = 2
if torch.cuda.is_available():
    step_size = 20
    HAS_CUDA = True

torch.set_num_threads(1)
threadpoolctl.threadpool_limits(limits=1, user_api="blas")
threadpoolctl.threadpool_limits(limits=1, user_api="openmp")

vocs = {
    "variables": {f"x{i}": [-3, 3] for i in range(16)},
    "objectives": {"y": "MINIMIZE"},
    "constraints": {
        "c1": ["GREATER_THAN", 0.1],
        "c2": ["LESS_THAN", 3],
        "c3": ["GREATER_THAN", 0],
    },
}


def eval_f(input_dict):
    rosenbrock = False  # change to try another function
    d = {
        "y2": input_dict["x0"] + input_dict["x1"],
        "c1": input_dict["x2"] + input_dict["x3"],
        "c2": input_dict["x4"] + input_dict["x5"],
        "c3": input_dict["x0"],
    }
    if rosenbrock:
        d["y"] = evaluate_rosenbrock(input_dict)["y"]
    else:
        d["y"] = (
            np.sum(np.array([input_dict[f"x{i}"] ** 2 for i in range(16)]))
            + np.random.randn() * 0.01
        )
    return d

In [2]:

Copied!





evaluator = Evaluator(function=eval_f)
generator = ExpectedImprovementGenerator(
    vocs=vocs,
    gp_constructor=StandardModelConstructor(train_method="adam"),
    numerical_optimizer=LBFGSOptimizer(n_restarts=5),
    use_cuda=HAS_CUDA,
)
X = Xopt(evaluator=evaluator, generator=generator, vocs=vocs)
generator_batched = ExpectedImprovementGenerator(
    vocs=vocs,
    gp_constructor=BatchedModelConstructor(train_method="adam"),
    numerical_optimizer=LBFGSOptimizer(n_restarts=5),
    use_cuda=HAS_CUDA,
)
X2 = Xopt(evaluator=evaluator, generator=generator_batched, vocs=vocs)
evaluator = Evaluator(function=eval_f)
generator = ExpectedImprovementGenerator(
    vocs=vocs,
    gp_constructor=StandardModelConstructor(train_method="adam"),
    numerical_optimizer=LBFGSOptimizer(n_restarts=5),
    use_cuda=HAS_CUDA,
)
X = Xopt(evaluator=evaluator, generator=generator, vocs=vocs)
generator_batched = ExpectedImprovementGenerator(
    vocs=vocs,
    gp_constructor=BatchedModelConstructor(train_method="adam"),
    numerical_optimizer=LBFGSOptimizer(n_restarts=5),
    use_cuda=HAS_CUDA,
)
X2 = Xopt(evaluator=evaluator, generator=generator_batched, vocs=vocs)

In [3]:

Copied!

X.random_evaluate(20)
X2.random_evaluate(20)
X.random_evaluate(20)
X2.random_evaluate(20)

Out[3]:

	x0	x1	x2	x3	x4	x5	x6	x7	x8	x9	...	x13	x14	x15	y2	c1	c2	c3	y	xopt_runtime	xopt_error
0	-0.629771	-1.529682	1.718415	-2.260491	2.963790	0.975458	-1.445952	-1.403649	2.822854	1.834528	...	-0.990638	2.254874	-1.928286	-2.159452	-0.542076	3.939248	-0.629771	56.040175	0.000044	False
1	-2.065522	2.471455	-2.576183	-1.701610	0.188843	-0.663712	-2.077991	-0.792385	-2.952899	-1.510485	...	-0.305860	0.427607	-2.680592	0.405933	-4.277792	-0.474869	-2.065522	47.937440	0.000026	False
2	-0.966346	-1.981250	0.972131	-0.902440	-1.745697	0.852876	2.167262	0.387109	-1.926389	2.889878	...	-2.152611	-2.577891	-0.904383	-2.947596	0.069691	-0.892820	-0.966346	46.673252	0.000018	False
3	1.403526	1.400349	-0.200041	0.193738	-1.643334	2.947927	1.500362	1.998573	-0.363381	0.164230	...	0.739102	1.810546	0.947885	2.803875	-0.006304	1.304593	1.403526	27.211103	0.000017	False
4	2.421217	-2.170732	0.649781	-1.647925	2.918195	-1.251224	-2.168905	0.601780	-2.832474	0.369364	...	-1.578308	1.017236	0.813045	0.250485	-0.998144	1.666971	2.421217	61.753942	0.000016	False
5	-2.325302	-2.134769	-2.252290	-1.614489	-0.648646	-2.390945	1.408011	-2.685488	-2.113125	-0.526007	...	-1.639817	0.044242	1.898319	-4.460072	-3.866779	-3.039591	-2.325302	45.848722	0.000015	False
6	-1.189527	2.476465	0.816846	-1.376397	-2.460270	1.745539	0.246297	0.996389	-2.626013	0.262975	...	-1.336137	-2.180613	1.898827	1.286939	-0.559551	-0.714731	-1.189527	42.038339	0.000016	False
7	0.390989	-1.539833	1.527494	-0.042535	0.962704	-0.517070	-1.639343	-1.876162	-2.392006	2.224896	...	1.313645	0.642401	1.207771	-1.148844	1.484959	0.445635	0.390989	35.057813	0.000016	False
8	-0.982868	1.230035	-0.332712	1.284289	-0.334786	-0.561378	0.359927	-2.900761	2.871355	2.500537	...	-2.490454	-0.030497	-1.536240	0.247167	0.951577	-0.896163	-0.982868	43.878148	0.000015	False
9	0.184043	2.731321	1.787637	-2.625602	2.100862	-0.808530	1.411866	0.390989	-1.914743	-0.694626	...	-0.883298	-2.600023	0.645353	2.915364	-0.837965	1.292332	0.184043	50.488743	0.000014	False
10	-2.762480	-0.013872	2.083811	1.816921	0.564897	1.745714	-2.423665	-2.735492	-2.792730	-0.233024	...	0.144091	-0.718396	2.051423	-2.776352	3.900731	2.310611	-2.762480	48.283709	0.000033	False
11	-1.072088	-1.751515	-2.650509	0.137912	-0.819666	2.801726	0.037222	-0.301054	-2.026159	-1.763036	...	0.109839	0.410451	2.199298	-2.823603	-2.512597	1.982060	-1.072088	44.254909	0.000019	False
12	-2.395338	0.558286	2.382018	-2.462393	2.265589	-1.847598	-1.440349	-1.063999	-1.855712	2.951158	...	-2.786359	0.997192	-1.460859	-1.837051	-0.080375	0.417991	-2.395338	58.651507	0.000016	False
13	1.415236	-2.492701	-1.334565	2.421728	0.888129	2.397188	-1.707710	2.544272	-0.261251	1.434785	...	-2.765691	2.036665	-0.532732	-1.077465	1.087163	3.285318	1.415236	52.817253	0.000019	False
14	-0.157974	-0.043113	-0.955338	2.539707	-2.231660	-2.777834	-0.096231	-1.133902	2.922451	0.178956	...	-0.740648	-0.329391	1.672301	-0.201087	1.584370	-5.009494	-0.157974	51.248912	0.000015	False
15	0.510391	1.928369	2.033205	-0.582756	-1.786856	-0.057517	2.016938	-2.416222	2.151468	-0.436532	...	-1.906637	0.118334	1.950391	2.438760	1.450449	-1.844373	0.510391	44.566776	0.000015	False
16	1.831307	0.261976	-0.846258	-2.120677	-0.288012	1.474156	-2.717937	-0.585148	-2.597342	-1.571646	...	-1.958282	-2.573681	-1.988644	2.093284	-2.966935	1.186145	1.831307	51.207547	0.000015	False
17	0.924571	-2.906889	2.505877	-0.057980	-1.898340	2.575779	-0.587287	0.292420	2.252874	-0.708580	...	-1.954753	1.161425	0.976055	-1.982318	2.447897	0.677439	0.924571	39.779383	0.000015	False
18	0.029757	2.490044	0.421286	-0.319254	-1.782242	1.246140	-2.932972	0.413711	2.195711	2.422576	...	-2.702444	1.997469	1.840539	2.519801	0.102031	-0.536102	0.029757	54.559911	0.000014	False
19	-0.965711	1.479641	-1.976076	1.307773	-1.052445	-0.586139	1.560118	-2.803489	-1.838699	-2.585415	...	2.108590	-1.136846	-1.991490	0.513929	-0.668303	-1.638584	-0.965711	42.678710	0.000014	False

20 rows × 23 columns

Run the optimization¶

We run the optimizers side by side to compare speed. In the interest of saving time, we skip 10 points by sampling randomly between optimization steps.

In [4]:

Copied!





history = []
for i in range(50):
    torch.cuda.empty_cache()
    X.random_evaluate(step_size)

    # sync data
    X2.data = X.data.copy()
    X2.generator.data = X.generator.data.copy()

    t1 = time.perf_counter()
    X.step()
    t2 = time.perf_counter()
    X2.step()
    t3 = time.perf_counter()
    if i % 10 == 0:
        print(f"Step {i}")
    history.append(
        {
            "n": len(X.data),
            "Standard training": X.generator.computation_time["training"].to_numpy()[
                -1
            ],  # t2-t1,
            "Standard acquisition": X.generator.computation_time[
                "acquisition_optimization"
            ].to_numpy()[-1],
            "Batched training": X2.generator.computation_time["training"].to_numpy()[
                -1
            ],  # t3-t2
            "Batched acquisition": X2.generator.computation_time[
                "acquisition_optimization"
            ].to_numpy()[-1],
        }
    )
history = []
for i in range(50):
    torch.cuda.empty_cache()
    X.random_evaluate(step_size)

    # sync data
    X2.data = X.data.copy()
    X2.generator.data = X.generator.data.copy()

    t1 = time.perf_counter()
    X.step()
    t2 = time.perf_counter()
    X2.step()
    t3 = time.perf_counter()
    if i % 10 == 0:
        print(f"Step {i}")
    history.append(
        {
            "n": len(X.data),
            "Standard training": X.generator.computation_time["training"].to_numpy()[
                -1
            ],  # t2-t1,
            "Standard acquisition": X.generator.computation_time[
                "acquisition_optimization"
            ].to_numpy()[-1],
            "Batched training": X2.generator.computation_time["training"].to_numpy()[
                -1
            ],  # t3-t2
            "Batched acquisition": X2.generator.computation_time[
                "acquisition_optimization"
            ].to_numpy()[-1],
        }
    )

Step 0

Step 10

Step 20

Step 30

Step 40

Plot performance¶

Let's plot the timings.

In [5]:

Copied!





history_df = pd.DataFrame(history)
fig, ax = plt.subplots(1, 1)
ax.plot(history_df["n"], history_df["Standard training"], label="Standard training")
ax.plot(history_df["n"], history_df["Batched training"], label="Batched training")
ax.set_ylabel("Time (s)")
ax.set_xlabel("Iteration")
ax.legend()
ax.set_title(
    f"Vars: {len(vocs['variables'])}, Objs: {len(vocs['objectives'])}, Cons: {len(vocs['constraints'])}, GPU: {generator.use_cuda}"
)
history_df = pd.DataFrame(history)
fig, ax = plt.subplots(1, 1)
ax.plot(history_df["n"], history_df["Standard training"], label="Standard training")
ax.plot(history_df["n"], history_df["Batched training"], label="Batched training")
ax.set_ylabel("Time (s)")
ax.set_xlabel("Iteration")
ax.legend()
ax.set_title(
    f"Vars: {len(vocs['variables'])}, Objs: {len(vocs['objectives'])}, Cons: {len(vocs['constraints'])}, GPU: {generator.use_cuda}"
)

Out[5]:

Text(0.5, 1.0, 'Vars: 16, Objs: 1, Cons: 3, GPU: False')

No description has been provided for this image

In [6]:

Copied!





fig, ax = plt.subplots(1, 1)
ax.plot(
    history_df["n"],
    history_df["Standard acquisition"],
    label="Standard acquisition",
)
ax.plot(history_df["n"], history_df["Batched acquisition"], label="Batched acquisition")
ax.set_ylabel("Time (s)")
ax.set_xlabel("Iteration")
ax.legend()
fig, ax = plt.subplots(1, 1)
ax.plot(
    history_df["n"],
    history_df["Standard acquisition"],
    label="Standard acquisition",
)
ax.plot(history_df["n"], history_df["Batched acquisition"], label="Batched acquisition")
ax.set_ylabel("Time (s)")
ax.set_xlabel("Iteration")
ax.legend()

Out[6]:

<matplotlib.legend.Legend at 0x7f8124491ee0>