NSGA2 Generator¶

This notebook demonstrates the use of the generator NSGA2Generator which implements the NSGA-II algorithm from [1]. We show how to set up the optimizer object, use it to solve a test problem, and show off some of the generator's output features. Running this notebook will generate files in a temporary directory on your computer. These files will be cleaned up at the end.

In [1]:

Copied!





import json
import logging
import matplotlib.pyplot as plt
import os
import pandas as pd
import shutil
import tempfile

from xopt.generators.ga.nsga2 import (
    NSGA2Generator,
    PolynomialMutation,
    SimulatedBinaryCrossover,
)
from xopt.resources.test_functions.zdt import construct_zdt
from xopt import Xopt, Evaluator, VOCS
import json
import logging
import matplotlib.pyplot as plt
import os
import pandas as pd
import shutil
import tempfile

from xopt.generators.ga.nsga2 import (
    NSGA2Generator,
    PolynomialMutation,
    SimulatedBinaryCrossover,
)
from xopt.resources.test_functions.zdt import construct_zdt
from xopt import Xopt, Evaluator, VOCS

In [2]:

Copied!





# The NSGA2Generator object emits logger messages
# Configure the logging module to output them to console
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",  # Format for log messages
    datefmt="%Y-%m-%d %H:%M:%S",  # Date/time format
)
# The NSGA2Generator object emits logger messages
# Configure the logging module to output them to console
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",  # Format for log messages
    datefmt="%Y-%m-%d %H:%M:%S",  # Date/time format
)

In [3]:

Copied!

# Get the problem ZDT2 and create an Xopt evaluator
prob_vocs, prob_fun, _ = construct_zdt(30, 3)
ev = Evaluator(function=prob_fun)
# Get the problem ZDT2 and create an Xopt evaluator
prob_vocs, prob_fun, _ = construct_zdt(30, 3)
ev = Evaluator(function=prob_fun)

Generator Setup and Use¶

First we create the NSGA2Generator object, demonstrate some of its settings, and then use it to solve the ZDT3 test problem.

In [4]:

Copied!





# Create the NSGA2 generator with default settings
generator = NSGA2Generator(
    vocs=prob_vocs,  # Must provide the problem's details
)

# Let's demonstrate controlling the generator's hyperparameters and settings
generator = NSGA2Generator(
    vocs=prob_vocs,  # Must provide the problem's details
    population_size=50,  # How many individuals in a population
    mutation_operator=PolynomialMutation(
        pm=1 / 30, eta_m=20
    ),  # Can manually specify mutation operator and hyperparameters
    crossover_operator=SimulatedBinaryCrossover(
        delta_1=0.5, delta_2=0.5, eta_c=20
    ),  # Similarly can specify crossover manually
    deduplicate_output=True,  # Performs deduplication of candidate individuals. Ensure unique outputs from generator.
)
# Create the NSGA2 generator with default settings
generator = NSGA2Generator(
    vocs=prob_vocs,  # Must provide the problem's details
)

# Let's demonstrate controlling the generator's hyperparameters and settings
generator = NSGA2Generator(
    vocs=prob_vocs,  # Must provide the problem's details
    population_size=50,  # How many individuals in a population
    mutation_operator=PolynomialMutation(
        pm=1 / 30, eta_m=20
    ),  # Can manually specify mutation operator and hyperparameters
    crossover_operator=SimulatedBinaryCrossover(
        delta_1=0.5, delta_2=0.5, eta_c=20
    ),  # Similarly can specify crossover manually
    deduplicate_output=True,  # Performs deduplication of candidate individuals. Ensure unique outputs from generator.
)

2025-05-30 16:03:16 - xopt.generator - INFO - Initialized generator nsga2

2025-05-30 16:03:16 - xopt.generator - INFO - Initialized generator nsga2

In [5]:

Copied!





# Run the optimizer for a few generations. Notice log output printed below this cell
ev.max_workers = generator.population_size
X = Xopt(generator=generator, evaluator=ev, vocs=prob_vocs)

for _ in range(3):
    X.step()
# Run the optimizer for a few generations. Notice log output printed below this cell
ev.max_workers = generator.population_size
X = Xopt(generator=generator, evaluator=ev, vocs=prob_vocs)

for _ in range(3):
    X.step()

2025-05-30 16:03:16 - xopt.base - INFO - Running Xopt step

2025-05-30 16:03:16 - xopt.generators.ga.nsga2.NSGA2Generator.140602176003584 - INFO - generated 50 candidates in 3.34ms (removed 0 duplicate individuals)

2025-05-30 16:03:16 - xopt.generators.ga.nsga2.NSGA2Generator.140602176003584 - INFO - adding 50 new evaluated individuals to generator

2025-05-30 16:03:16 - xopt.generators.ga.nsga2.NSGA2Generator.140602176003584 - INFO - completed generation 1 in 0.022s (n_feasible=0, n_err=0, children_performance=0.000s (0.000s), add_data_round=0, fevals=50, n_candidates=50)

2025-05-30 16:03:16 - xopt.base - INFO - Running Xopt step

2025-05-30 16:03:16 - xopt.generators.ga.nsga2.NSGA2Generator.140602176003584 - INFO - generated 50 candidates in 16.55ms (removed 0 duplicate individuals)

2025-05-30 16:03:16 - xopt.generators.ga.nsga2.NSGA2Generator.140602176003584 - INFO - adding 50 new evaluated individuals to generator

2025-05-30 16:03:16 - xopt.generators.ga.nsga2.NSGA2Generator.140602176003584 - INFO - completed generation 2 in 0.031s (n_feasible=0, n_err=0, children_performance=0.000s (0.000s), add_data_round=0, fevals=100, n_candidates=100)

2025-05-30 16:03:16 - xopt.base - INFO - Running Xopt step

2025-05-30 16:03:16 - xopt.generators.ga.nsga2.NSGA2Generator.140602176003584 - INFO - generated 50 candidates in 16.51ms (removed 0 duplicate individuals)

2025-05-30 16:03:16 - xopt.generators.ga.nsga2.NSGA2Generator.140602176003584 - INFO - adding 50 new evaluated individuals to generator

2025-05-30 16:03:16 - xopt.generators.ga.nsga2.NSGA2Generator.140602176003584 - INFO - completed generation 3 in 0.030s (n_feasible=0, n_err=0, children_performance=0.000s (0.000s), add_data_round=0, fevals=150, n_candidates=150)

Exploring Optimizer Output¶

We now take a look at the metadata associated with the optimizer run as well as its output.

In [6]:

Copied!





%%time
# Run for longer with log output turned off
for handler in logging.root.handlers[:]:
    logging.root.removeHandler(handler)

for _ in range(47):
    X.step()
%%time
# Run for longer with log output turned off
for handler in logging.root.handlers[:]:
    logging.root.removeHandler(handler)

for _ in range(47):
    X.step()

CPU times: user 1.34 s, sys: 7.32 ms, total: 1.34 s
Wall time: 1.34 s

In [7]:

Copied!





# Inspect generator properties
print(
    f"Saw {generator.fevals} function evaluations"
)  # Number of function evaluations returned to generator
print(
    f"Completed {generator.n_generations} generations"
)  # Number of generations finished
print(
    f"Generated {generator.n_candidates} candidate solutions"
)  # Number of individuals generated
# Inspect generator properties
print(
    f"Saw {generator.fevals} function evaluations"
)  # Number of function evaluations returned to generator
print(
    f"Completed {generator.n_generations} generations"
)  # Number of generations finished
print(
    f"Generated {generator.n_candidates} candidate solutions"
)  # Number of individuals generated

Saw 0 function evaluations
Completed 0 generations
Generated 0 candidate solutions

In [8]:

Copied!





# All evaluations are stored in the following Dataframe. Speific to this generator, a `xopt_parent_generation` and
# `xopt_candidate_idx` columns which indicate from which generation the indvidual's parents belong to as well as providing a
# unique index for indviduals.
#
# NOTE: The data DataFrame is not stored when serializing the generator. It must be saved on its own for later use.
X.generator.data.head()
# All evaluations are stored in the following Dataframe. Speific to this generator, a `xopt_parent_generation` and
# `xopt_candidate_idx` columns which indicate from which generation the indvidual's parents belong to as well as providing a
# unique index for indviduals.
#
# NOTE: The data DataFrame is not stored when serializing the generator. It must be saved on its own for later use.
X.generator.data.head()

Out[8]:

	x1	x10	x11	x12	x13	x14	x15	x16	x17	x18	...	x7	x8	x9	xopt_candidate_idx	f1	f2	g	xopt_runtime	xopt_error
0	0.012569	0.476055	0.335520	0.124207	0.054812	0.959095	0.774935	0.362657	0.767392	0.012418	...	0.271398	0.407106	0.056628	28	0.012569	4.538309	4.788475	0.000041	False
1	0.012925	0.425204	0.348392	0.964047	0.990236	0.979217	0.864878	0.370094	0.397858	0.685413	...	0.827190	0.405617	0.031834	4	0.012925	5.641480	5.923277	0.000020	False
2	0.050028	0.496630	0.379873	0.979644	0.894721	0.122005	0.752557	0.298314	0.038280	0.845869	...	0.802374	0.289322	0.852608	1	0.050028	5.238783	5.828817	0.000016	False
3	0.054737	0.618905	0.730224	0.133943	0.063528	0.402270	0.307045	0.340946	0.378805	0.533254	...	0.612932	0.853343	0.144090	7	0.054737	4.387877	4.963232	0.000014	False
4	0.076123	0.638767	0.077552	0.258307	0.800304	0.756873	0.906303	0.401599	0.866955	0.645431	...	0.994416	0.733286	0.390329	26	0.076123	5.437064	6.174542	0.000014	False

5 rows × 37 columns

In [9]:

Copied!





# Each population the optimizer has seen is stored by the unique indices of each individual.
print(X.generator.history_idx[-1][:16])  # Show the first few indices of last generation

# If you have the data DataFrame you can extract all variables, objectives, constraints for each population
# Get a DataFrame of all information for the first population with every row being an individual
X.generator.data[
    X.generator.data["xopt_candidate_idx"].isin(X.generator.history_idx[0])
].head()
# Each population the optimizer has seen is stored by the unique indices of each individual.
print(X.generator.history_idx[-1][:16])  # Show the first few indices of last generation

# If you have the data DataFrame you can extract all variables, objectives, constraints for each population
# Get a DataFrame of all information for the first population with every row being an individual
X.generator.data[
    X.generator.data["xopt_candidate_idx"].isin(X.generator.history_idx[0])
].head()

[2453, 2489, 2486, 2448, 2223, 2397, 2262, 2426, 2328, 2482, 2281, 2439, 2435, 2485, 2483, 2356]

Out[9]:

	x1	x10	x11	x12	x13	x14	x15	x16	x17	x18	...	x7	x8	x9	xopt_candidate_idx	f1	f2	g	xopt_runtime	xopt_error
0	0.012569	0.476055	0.335520	0.124207	0.054812	0.959095	0.774935	0.362657	0.767392	0.012418	...	0.271398	0.407106	0.056628	28	0.012569	4.538309	4.788475	0.000041	False
1	0.012925	0.425204	0.348392	0.964047	0.990236	0.979217	0.864878	0.370094	0.397858	0.685413	...	0.827190	0.405617	0.031834	4	0.012925	5.641480	5.923277	0.000020	False
2	0.050028	0.496630	0.379873	0.979644	0.894721	0.122005	0.752557	0.298314	0.038280	0.845869	...	0.802374	0.289322	0.852608	1	0.050028	5.238783	5.828817	0.000016	False
3	0.054737	0.618905	0.730224	0.133943	0.063528	0.402270	0.307045	0.340946	0.378805	0.533254	...	0.612932	0.853343	0.144090	7	0.054737	4.387877	4.963232	0.000014	False
4	0.076123	0.638767	0.077552	0.258307	0.800304	0.756873	0.906303	0.401599	0.866955	0.645431	...	0.994416	0.733286	0.390329	26	0.076123	5.437064	6.174542	0.000014	False

5 rows × 37 columns

In [10]:

Copied!





# Using the population records we can plot the final generation's objective functions
final_pop = X.generator.data[
    X.generator.data["xopt_candidate_idx"].isin(X.generator.history_idx[-1])
]
plt.scatter(final_pop["f1"], final_pop["f2"])
plt.xlabel("f1")
plt.ylabel("f2")
plt.title(f"ZDT3, Generation {X.generator.n_generations}")
# Using the population records we can plot the final generation's objective functions
final_pop = X.generator.data[
    X.generator.data["xopt_candidate_idx"].isin(X.generator.history_idx[-1])
]
plt.scatter(final_pop["f1"], final_pop["f2"])
plt.xlabel("f1")
plt.ylabel("f2")
plt.title(f"ZDT3, Generation {X.generator.n_generations}")

Out[10]:

Text(0.5, 1.0, 'ZDT3, Generation 50')

No description has been provided for this image

File Output from Generator¶

In this section, we will take a look at the files which can be automatically written by the optimizer. We will create a temporary directory and clean it up at the end of this notebook.

The output files are the following.

data.csv: All data evaluated during the optimization
vocs.txt: The VOCS object so that the objectives, constraints, decision variables are retained alongside the data
populations.csv: Each population is written here with a column xopt_generation to distinguish which generation the row belongs to
checkpoints: This generator periodically saves its full state to timestamped files in this directory
log.txt: Log output from the generator is recorded to this file

Wile running each of the blocks, go ahead and open the temporary directory printed from the next cell and take a look at the files for yourself.

In [11]:

Copied!

# Setup our output directory
output_dir = tempfile.mkdtemp()
print(f'Will write data to "{output_dir}"')
# Setup our output directory
output_dir = tempfile.mkdtemp()
print(f'Will write data to "{output_dir}"')

Will write data to "/tmp/tmpxucrj7kw"

In [12]:

Copied!





# Set up a generator configured to output data
generator = NSGA2Generator(
    vocs=prob_vocs,
    output_dir=output_dir,  # Where will we write data
    checkpoint_freq=8,  # Turn on checkpoints and set how often (in terms of generations) we save the optimizer state
    log_level=logging.DEBUG,  # Set the level of log messages output to our log file
)

# Run it for a couple of generations
ev.max_workers = generator.population_size
X = Xopt(generator=generator, evaluator=ev, vocs=prob_vocs)
for _ in range(32):
    X.step()
# Set up a generator configured to output data
generator = NSGA2Generator(
    vocs=prob_vocs,
    output_dir=output_dir,  # Where will we write data
    checkpoint_freq=8,  # Turn on checkpoints and set how often (in terms of generations) we save the optimizer state
    log_level=logging.DEBUG,  # Set the level of log messages output to our log file
)

# Run it for a couple of generations
ev.max_workers = generator.population_size
X = Xopt(generator=generator, evaluator=ev, vocs=prob_vocs)
for _ in range(32):
    X.step()

In [13]:

Copied!





# Check out the generated files
print(f'"{output_dir}": {os.listdir(output_dir)}')
checkpoint_dir = os.path.join(output_dir, "checkpoints")
print(f'"checkpoints": {os.listdir(checkpoint_dir)}')
# Check out the generated files
print(f'"{output_dir}": {os.listdir(output_dir)}')
checkpoint_dir = os.path.join(output_dir, "checkpoints")
print(f'"checkpoints": {os.listdir(checkpoint_dir)}')

"/tmp/tmpxucrj7kw": ['populations.csv', 'vocs.txt', 'data.csv', 'log.txt', 'checkpoints']
"checkpoints": ['20250530_160319_1.txt', '20250530_160320_1.txt', '20250530_160319_2.txt', '20250530_160318_1.txt']

In [14]:

Copied!





# In the event data was already written to `output_dir` the generator will choose a new path with a numeric suffix
# to avoid overwriting anything.
X = Xopt(
    generator=NSGA2Generator(vocs=prob_vocs, output_dir=output_dir),
    evaluator=ev,
    vocs=prob_vocs,
)
for _ in range(32):
    X.step()

# Compare the requested path and where the data ended up
print(f'Requested path: "{output_dir}"')
print(f'Path used:      "{X.generator.output_dir}"')

# Clean up the directory
X.generator.close_log_file()
# In the event data was already written to `output_dir` the generator will choose a new path with a numeric suffix
# to avoid overwriting anything.
X = Xopt(
    generator=NSGA2Generator(vocs=prob_vocs, output_dir=output_dir),
    evaluator=ev,
    vocs=prob_vocs,
)
for _ in range(32):
    X.step()

# Compare the requested path and where the data ended up
print(f'Requested path: "{output_dir}"')
print(f'Path used:      "{X.generator.output_dir}"')

# Clean up the directory
X.generator.close_log_file()

Requested path: "/tmp/tmpxucrj7kw"
Path used:      "/tmp/tmpxucrj7kw_2"

In [15]:

Copied!

# Load all data back in
df = pd.read_csv(os.path.join(X.generator.output_dir, "data.csv"))
df.head()
# Load all data back in
df = pd.read_csv(os.path.join(X.generator.output_dir, "data.csv"))
df.head()

Out[15]:

	x1	x10	x11	x12	x13	x14	x15	x16	x17	x18	...	x7	x8	x9	xopt_candidate_idx	f1	f2	g	xopt_runtime	xopt_error
0	0.031420	0.308089	0.766190	0.824291	0.676276	0.432043	0.474503	0.854264	0.464387	0.621621	...	0.347205	0.039111	0.588284	37	0.031420	5.227840	5.676371	0.000035	False
1	0.077498	0.603628	0.386426	0.291381	0.631599	0.703805	0.324760	0.106815	0.367433	0.244792	...	0.375737	0.215403	0.370943	15	0.077498	4.278654	4.948247	0.000017	False
2	0.080987	0.715079	0.013213	0.150883	0.917801	0.563331	0.265194	0.640450	0.188675	0.193972	...	0.611297	0.063715	0.518636	6	0.080987	4.710976	5.418996	0.000014	False
3	0.086171	0.129628	0.123349	0.111159	0.028981	0.806938	0.059500	0.565446	0.987309	0.814399	...	0.678086	0.272904	0.774918	47	0.086171	4.722921	5.444118	0.000013	False
4	0.134838	0.747486	0.368540	0.572957	0.988475	0.281245	0.546621	0.117454	0.906214	0.631666	...	0.276204	0.298123	0.037875	24	0.134838	5.084805	5.853378	0.000012	False

5 rows × 37 columns

In [16]:

Copied!





# Read the VOCS object back in. This can be used for data analysis / restarting optimizations
with open(os.path.join(X.generator.output_dir, "vocs.txt")) as f:
    vocs_from_file = VOCS.from_dict(json.load(f))

# Show the objectives
vocs_from_file.objectives
# Read the VOCS object back in. This can be used for data analysis / restarting optimizations
with open(os.path.join(X.generator.output_dir, "vocs.txt")) as f:
    vocs_from_file = VOCS.from_dict(json.load(f))

# Show the objectives
vocs_from_file.objectives

Out[16]:

{'f1': 'MINIMIZE', 'f2': 'MINIMIZE'}

In [17]:

Copied!





# Load the populations and get just the last population
df = pd.read_csv(os.path.join(X.generator.output_dir, "populations.csv"))
last_pop = df[df["xopt_generation"] == df["xopt_generation"].max()]
last_pop.head()
# Load the populations and get just the last population
df = pd.read_csv(os.path.join(X.generator.output_dir, "populations.csv"))
last_pop = df[df["xopt_generation"] == df["xopt_generation"].max()]
last_pop.head()

Out[17]:

	x1	x10	x11	x12	x13	x14	x15	x16	x17	x18	...	x8	x9	xopt_parent_generation	xopt_candidate_idx	f1	f2	g	xopt_runtime	xopt_error	xopt_generation
1550	0.828077	0.003505	0.042163	0.175324	0.002997	0.102772	0.077340	0.138130	0.138994	0.019268	...	0.062971	0.368184	27	1350	0.828077	0.172581	2.144506	0.000011	False	32
1551	0.252260	0.004252	0.010522	0.025775	0.045600	0.101881	0.191538	0.149465	0.164312	0.001020	...	0.064204	0.332260	29	1496	0.252260	1.198514	2.194105	0.000012	False	32
1552	0.430407	0.037659	0.063895	0.062252	0.032899	0.076310	0.034385	0.155772	0.139606	0.115737	...	0.061664	0.383426	29	1463	0.430407	0.864135	2.185400	0.000012	False	32
1553	0.191431	0.005232	0.030983	0.082133	0.002585	0.097348	0.073224	0.130621	0.064873	0.013615	...	0.062970	0.272055	30	1536	0.191431	1.510524	2.092515	0.000012	False	32
1554	0.198933	0.030012	0.040216	0.177062	0.002631	0.102832	0.072861	0.143570	0.063284	0.013791	...	0.009575	0.275228	30	1544	0.198933	1.381572	2.006733	0.000012	False	32

5 rows × 38 columns

In [18]:

Copied!





# Reload the optimizer from its last checkpoint
# NOTE: the vocs object is not retained with the checkpoint and should be added as in the following code
last_checkpoint = os.path.join(checkpoint_dir, os.listdir(checkpoint_dir)[-1])
with open(last_checkpoint) as f:
    restored_generator = NSGA2Generator.from_dict({"vocs": prob_vocs, **json.load(f)})
print(f"From file: {restored_generator}")

# Demonstrate using the generator after loading (starting optimization from its last saved point)
X = Xopt(generator=restored_generator, evaluator=ev, vocs=prob_vocs)
for _ in range(32):
    X.step()
print(f"Further optimization: {restored_generator}")

# Clean up the output
X.generator.close_log_file()
shutil.rmtree(X.generator.output_dir)
# Reload the optimizer from its last checkpoint
# NOTE: the vocs object is not retained with the checkpoint and should be added as in the following code
last_checkpoint = os.path.join(checkpoint_dir, os.listdir(checkpoint_dir)[-1])
with open(last_checkpoint) as f:
    restored_generator = NSGA2Generator.from_dict({"vocs": prob_vocs, **json.load(f)})
print(f"From file: {restored_generator}")

# Demonstrate using the generator after loading (starting optimization from its last saved point)
X = Xopt(generator=restored_generator, evaluator=ev, vocs=prob_vocs)
for _ in range(32):
    X.step()
print(f"Further optimization: {restored_generator}")

# Clean up the output
X.generator.close_log_file()
shutil.rmtree(X.generator.output_dir)

From file: NSGA2Generator(pop_size=50, crossover=SimulatedBinaryCrossover, mutation=PolynomialMutation, deduplicated=True, completed_gens=8, fevals=400)

Further optimization: NSGA2Generator(pop_size=50, crossover=SimulatedBinaryCrossover, mutation=PolynomialMutation, deduplicated=True, completed_gens=8, fevals=400)

In [19]:

Copied!

# Clean up the original output
X.generator.close_log_file()
# Clean up the original output
X.generator.close_log_file()

In [20]:

Copied!

shutil.rmtree(output_dir)
shutil.rmtree(output_dir)

In [ ]: