Skip to content

BAX Generator

BaxGenerator

Bases: BayesianGenerator

BAX Generator for Bayesian optimization.

Attributes:

Name Type Description
name str

The name of the generator.

algorithm Algorithm

Algorithm evaluated in the BAX process.

algorithm_results Dict

Dictionary results from the algorithm.

algorithm_results_file str

File name to save algorithm results at every step.

_n_calls int

Internal counter for the number of calls to the generate method.

Methods:

Name Description
validate_turbo_controller

Validate the turbo controller.

validate_vocs

Validate the VOCS object.

generate

Generate a specified number of candidate samples.

_get_acquisition

Get the acquisition function.

Source code in xopt/generators/bayesian/bax_generator.py
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
class BaxGenerator(BayesianGenerator):
    """
    BAX Generator for Bayesian optimization.

    Attributes
    ----------
    name : str
        The name of the generator.
    algorithm : Algorithm
        Algorithm evaluated in the BAX process.
    algorithm_results : Dict
        Dictionary results from the algorithm.
    algorithm_results_file : str
        File name to save algorithm results at every step.
    _n_calls : int
        Internal counter for the number of calls to the generate method.

    Methods
    -------
    validate_turbo_controller(cls, value, info: ValidationInfo) -> Any
        Validate the turbo controller.
    validate_vocs(cls, v, info: ValidationInfo) -> VOCS
        Validate the VOCS object.
    generate(self, n_candidates: int) -> List[Dict]
        Generate a specified number of candidate samples.
    _get_acquisition(self, model) -> ModelListExpectedInformationGain
        Get the acquisition function.
    """

    name = "BAX"
    supports_constraints: bool = True
    algorithm: Algorithm = Field(description="algorithm evaluated in the BAX process")
    algorithm_results: Dict = Field(
        None, description="dictionary results from algorithm", exclude=True
    )
    algorithm_results_file: str = Field(
        None, description="file name to save algorithm results at every step"
    )
    _n_calls: int = 0
    _compatible_turbo_controllers = [EntropyTurboController, SafetyTurboController]

    @field_validator("vocs", mode="after")
    def validate_vocs(cls, v, info: ValidationInfo):
        if v.n_constraints > 0 and not info.data["supports_constraints"]:
            raise VOCSError("this generator does not support constraints")

        # assert that the generator had no objectives
        if not v.n_objectives == 0:
            raise VOCSError("BAX generator only supports problems with no objectives")

        return v

    @model_validator(mode="after")
    def validate_model_after(self):
        # validate turbo controller center if it exists
        validate_turbo_controller_center(self)

        return self

    def generate(self, n_candidates: int) -> List[Dict]:
        """
        Generate a specified number of candidate samples.

        Parameters
        ----------
        n_candidates : int
            The number of candidate samples to generate.

        Returns
        -------
        List[Dict]
            A list of dictionaries containing the generated samples.
        """
        self._n_calls += 1
        return super().generate(n_candidates)

    def _get_acquisition(self, model) -> ModelListExpectedInformationGain:
        """
        Get the acquisition function.

        Parameters
        ----------
        model : Model
            The model to use for the acquisition function.

        Returns
        -------
        ModelListExpectedInformationGain
            The acquisition function.
        """
        bax_model_ids = [
            self.vocs.output_names.index(name)
            for name in self.algorithm.observable_names_ordered
        ]
        bax_model = model.subset_output(bax_model_ids)

        if isinstance(bax_model, SingleTaskGP):
            bax_model = ModelListGP(bax_model)

        eig = ModelListExpectedInformationGain(
            bax_model, self.algorithm, self._get_optimization_bounds()
        )
        self.algorithm_results = eig.algorithm_results
        if self.algorithm_results_file is not None:
            results = deepcopy(self.algorithm_results)

            with open(
                f"{self.algorithm_results_file}_{self._n_calls}.pkl", "wb"
            ) as outfile:
                pickle.dump(results, outfile, protocol=pickle.HIGHEST_PROTOCOL)

        return eig

model_input_names property

variable names corresponding to trained model

__init__(**kwargs)

Initialize the generator.

Source code in xopt/generator.py
119
120
121
122
123
124
def __init__(self, **kwargs):
    """
    Initialize the generator.
    """
    super().__init__(**kwargs)
    logger.info(f"Initialized generator {self.name}")

add_data(new_data)

Add new data to the generator for Bayesian Optimization.

Parameters:

Name Type Description Default
new_data DataFrame

The new data to be added to the generator.

required
Notes

This method appends the new data to the existing data in the generator.

Source code in xopt/generators/bayesian/bayesian_generator.py
337
338
339
340
341
342
343
344
345
346
347
348
349
350
def add_data(self, new_data: pd.DataFrame):
    """
    Add new data to the generator for Bayesian Optimization.

    Parameters
    ----------
    new_data : pd.DataFrame
        The new data to be added to the generator.

    Notes
    -----
    This method appends the new data to the existing data in the generator.
    """
    self.data = pd.concat([self.data, new_data], axis=0, ignore_index=True)

generate(n_candidates)

Generate a specified number of candidate samples.

Parameters:

Name Type Description Default
n_candidates int

The number of candidate samples to generate.

required

Returns:

Type Description
List[Dict]

A list of dictionaries containing the generated samples.

Source code in xopt/generators/bayesian/bax_generator.py
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
def generate(self, n_candidates: int) -> List[Dict]:
    """
    Generate a specified number of candidate samples.

    Parameters
    ----------
    n_candidates : int
        The number of candidate samples to generate.

    Returns
    -------
    List[Dict]
        A list of dictionaries containing the generated samples.
    """
    self._n_calls += 1
    return super().generate(n_candidates)

get_acquisition(model)

Define the acquisition function based on the given GP model.

Lives on target device specified by tkwargs / use_cuda.

Parameters:

Name Type Description Default
model Module

The BoTorch model to be used for generating the acquisition function.

required

Returns:

Name Type Description
acqusition_function AcquisitionFunction

Raises:

Type Description
ValueError

If the provided 'model' is None. A valid model is required to create the acquisition function.

Source code in xopt/generators/bayesian/bayesian_generator.py
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
def get_acquisition(self, model: Module) -> AcquisitionFunction:
    """
    Define the acquisition function based on the given GP model.

    Lives on target device specified by tkwargs / use_cuda.

    Parameters
    ----------
    model : Module
        The BoTorch model to be used for generating the acquisition function.

    Returns
    -------
    acqusition_function : AcquisitionFunction

    Raises
    ------
    ValueError
        If the provided 'model' is None. A valid model is required to create the
        acquisition function.
    """
    if model is None:
        raise ValueError("model cannot be None")

    # get base acquisition function
    acq = self._get_acquisition(model)

    # apply constraints if specified in vocs
    # TODO: replace with direct constrainted acquisition function calls
    # see SampleReducingMCAcquisitionFunction in botorch for rationale
    if len(self.vocs.constraints):
        try:
            sampler = acq.sampler
        except AttributeError:
            sampler = self._get_sampler(model)

        acq = ConstrainedMCAcquisitionFunction(
            model, acq, self._get_constraint_callables(), sampler=sampler
        )

        # log transform the result to handle the constraints
        acq = LogAcquisitionFunction(acq)

    acq = self._apply_fixed_features(acq)
    acq = acq.to(**self.tkwargs)
    return acq

get_input_data(data)

Convert input data to a torch tensor.

Parameters:

Name Type Description Default
data DataFrame

The input data in the form of a pandas DataFrame.

required

Returns:

Type Description
Tensor

A torch tensor containing the input data.

Notes

This method takes a pandas DataFrame as input data and converts it into a torch tensor. It specifically selects columns corresponding to the model's input names (variables), and the resulting tensor is configured with the data type and device settings from the generator.

Source code in xopt/generators/bayesian/bayesian_generator.py
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
def get_input_data(self, data: pd.DataFrame) -> torch.Tensor:
    """
    Convert input data to a torch tensor.

    Parameters
    ----------
    data : pd.DataFrame
        The input data in the form of a pandas DataFrame.

    Returns
    -------
    torch.Tensor
        A torch tensor containing the input data.

    Notes
    -----
    This method takes a pandas DataFrame as input data and converts it into a
    torch tensor. It specifically selects columns corresponding to the model's
    input names (variables), and the resulting tensor is configured with the data
    type and device settings from the generator.
    """
    return torch.tensor(
        data[self.model_input_names].to_numpy().copy(), **self.tkwargs
    )

get_optimum()

select the best point(s) given by the model using the Posterior mean

Source code in xopt/generators/bayesian/bayesian_generator.py
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
def get_optimum(self):
    """select the best point(s) given by the
    model using the Posterior mean"""
    acq = qUpperConfidenceBound(
        model=self.model, beta=0.0, objective=self._get_objective()
    )
    if len(self.vocs.constraints):
        acq = ConstrainedMCAcquisitionFunction(
            self.model,
            acq,
            self._get_constraint_callables(),
            sampler=self._get_sampler(self.model),
        )
    bounds = self._get_bounds()

    if self.fixed_features is not None:
        acq = self._apply_fixed_features(acq)

        indices = []
        for idx, name in enumerate(self.vocs.variable_names):
            if name not in self.fixed_features:
                indices += [idx]

        bounds = bounds[:, indices]

    bounds = bounds.to(**self.tkwargs)
    acq = acq.to(**self.tkwargs)

    # use default initial conditions for a global search
    result = self.numerical_optimizer.optimize(acq, bounds, 1)

    return self._process_candidates(result)

get_training_data(data)

Get training data used to train the GP model.

If a turbo controller is specified with the flag restrict_model_data this will return a subset of data that is inside the trust region.

Parameters:

Name Type Description Default
data DataFrame

The data in the form of a pandas DataFrame.

required

Returns:

Name Type Description
data DataFrame

A subset of data used to train the model form of a pandas DataFrame.

Source code in xopt/generators/bayesian/bayesian_generator.py
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
def get_training_data(self, data: pd.DataFrame) -> pd.DataFrame:
    """
    Get training data used to train the GP model.

    If a turbo controller is specified with the flag `restrict_model_data` this
    will return a subset of data that is inside the trust region.

    Parameters
    ----------
    data : pd.DataFrame
        The data in the form of a pandas DataFrame.

    Returns
    -------
    data : pd.DataFrame
        A subset of data used to train the model form of a pandas DataFrame.

    """
    if self.turbo_controller is not None:
        if self.turbo_controller.restrict_model_data:
            data = self.turbo_controller.get_data_in_trust_region(data, self)
            if data.empty:
                raise FeasibilityError(
                    "No training data available to build model, because ",
                    "no points in the dataset are within the TuRBO trust region. ",
                )
    return data

model_dump(*args, **kwargs)

overwrite model dump to remove faux class attrs

Source code in xopt/generator.py
152
153
154
155
156
157
158
159
160
def model_dump(self, *args: Any, **kwargs: Any) -> dict[str, Any]:
    """overwrite model dump to remove faux class attrs"""

    res = super().model_dump(*args, **kwargs)

    res.pop("supports_batch_generation", None)
    res.pop("supports_multi_objective", None)

    return res

propose_candidates(model, n_candidates=1)

Propose candidates using Bayesian Optimization.

Parameters:

Name Type Description Default
model Module

The trained Bayesian model.

required
n_candidates int

The number of candidates to propose (default is 1).

1

Returns:

Type Description
Tensor

A tensor containing the proposed candidates.

Notes

This method proposes candidates for Bayesian Optimization by numerically optimizing the acquisition function using the trained model. It updates the state of the Turbo controller if used and calculates the optimization bounds.

Source code in xopt/generators/bayesian/bayesian_generator.py
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
def propose_candidates(self, model: Module, n_candidates: int = 1) -> Tensor:
    """
    Propose candidates using Bayesian Optimization.

    Parameters
    ----------
    model : Module
        The trained Bayesian model.
    n_candidates : int, optional
        The number of candidates to propose (default is 1).

    Returns
    -------
    Tensor
        A tensor containing the proposed candidates.

    Notes
    -----
    This method proposes candidates for Bayesian Optimization by numerically
    optimizing the acquisition function using the trained model. It updates the
    state of the Turbo controller if used and calculates the optimization bounds.
    """
    # update TurBO state if used with the last `n_candidates` points
    if self.turbo_controller is not None:
        self.turbo_controller.update_state(self, n_candidates)

    # calculate optimization bounds
    bounds = self._get_optimization_bounds()

    # get acquisition function
    acq_funct = self.get_acquisition(model)

    # get initial candidates to start acquisition function optimization
    initial_points = self._get_initial_conditions(n_candidates)

    # get candidates -- grid optimizer does not support batch_initial_conditions
    if isinstance(self.numerical_optimizer, GridOptimizer):
        candidates = self.numerical_optimizer.optimize(
            acq_funct, bounds, n_candidates
        )
    else:
        candidates = self.numerical_optimizer.optimize(
            acq_funct, bounds, n_candidates, batch_initial_conditions=initial_points
        )
    return candidates

train_model(data=None, update_internal=True)

Train a Bayesian model for Bayesian Optimization.

Parameters:

Name Type Description Default
data DataFrame

The data to be used for training the model. If not provided, the internal data of the generator is used.

None
update_internal bool

Flag to indicate whether to update the internal model of the generator with the trained model (default is True).

True

Returns:

Type Description
Module

The trained Bayesian model.

Raises:

Type Description
ValueError

If no data is available to build the model.

Notes

This method trains a Bayesian model using the provided data or the internal data of the generator. It updates the internal model with the trained model if the 'update_internal' flag is set to True.

Source code in xopt/generators/bayesian/bayesian_generator.py
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
def train_model(
    self, data: pd.DataFrame | None = None, update_internal: bool = True
) -> Module:
    """
    Train a Bayesian model for Bayesian Optimization.

    Parameters
    ----------
    data : pd.DataFrame, optional
        The data to be used for training the model. If not provided, the internal
        data of the generator is used.
    update_internal : bool, optional
        Flag to indicate whether to update the internal model of the generator
        with the trained model (default is True).

    Returns
    -------
    Module
        The trained Bayesian model.

    Raises
    ------
    ValueError
        If no data is available to build the model.

    Notes
    -----
    This method trains a Bayesian model using the provided data or the internal
    data of the generator. It updates the internal model with the trained model
    if the 'update_internal' flag is set to True.
    """
    if data is None:
        data = self.get_training_data(self.data)
        if data is None:
            raise ValueError("no data available to build model")

    if data.empty:
        raise ValueError("no data available to build model")

    # get input bounds
    variable_bounds = {
        name: ele.domain for name, ele in self.vocs.variables.items()
    }

    # if turbo restrict points is true then set the bounds to the trust region
    # bounds
    if self.turbo_controller is not None:
        if self.turbo_controller.restrict_model_data:
            variable_bounds = dict(
                zip(
                    self.vocs.variable_names,
                    self.turbo_controller.get_trust_region(self).numpy().T,
                )
            )

    # add fixed feature bounds if requested
    if self.fixed_features is not None:
        # get bounds for each fixed_feature (vocs bounds take precedent)
        for key in self.fixed_features:
            if key not in variable_bounds:
                if key not in data:
                    raise KeyError(
                        "generator data needs to contain fixed feature "
                        f"column name `{key}`"
                    )
                f_data = data[key]
                bounds = [f_data.min(), f_data.max()]
                if bounds[1] - bounds[0] < 1e-8:
                    bounds[1] = bounds[0] + 1e-8
                variable_bounds[key] = bounds

    _model = self.gp_constructor.build_model(
        self.model_input_names,
        self.vocs.output_names,
        data,
        {name: variable_bounds[name] for name in self.model_input_names},
        **self.tkwargs,
    )

    if update_internal:
        self.model = _model

    return _model

validate_turbo_controller(value, info) classmethod

note default behavior is no use of turbo

Source code in xopt/generators/bayesian/bayesian_generator.py
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
@field_validator("turbo_controller", mode="before")
@classmethod
def validate_turbo_controller(cls, value: Any, info: ValidationInfo):
    """note default behavior is no use of turbo"""
    if value is None:
        return value

    compatible_turbo_controllers = [
        turbo_controller
        for turbo_controller in cls.get_compatible_turbo_controllers()
        if turbo_controller is not None
    ]

    if len(compatible_turbo_controllers) == 0:
        raise ValueError("no turbo controllers are compatible with this generator")
    else:
        return validate_turbo_controller_base(
            value, compatible_turbo_controllers, info
        )

visualize_model(**kwargs)

Display GP model predictions for the selected output(s).

The GP models are displayed with respect to the named variables. If None are given, the list of variables in vocs is used. Feasible samples are indicated with a filled orange "o", infeasible samples with a hollow red "o". Feasibility is calculated with respect to all constraints unless the selected output is a constraint itself, in which case only that one is considered.

Parameters:

Name Type Description Default
**kwargs

Supported keyword arguments: - output_names : List[str] Outputs for which the GP models are displayed. Defaults to all outputs in vocs. - variable_names : List[str] The variables with respect to which the GP models are displayed (maximum of 2). Defaults to vocs.variable_names. - idx : int Index of the last sample to use. This also selects the point of reference in higher dimensions unless an explicit reference_point is given. - reference_point : dict Reference point determining the value of variables in vocs.variable_names, but not in variable_names (slice plots in higher dimensions). Defaults to last used sample. - show_samples : bool, optional Whether samples are shown. - show_prior_mean : bool, optional Whether the prior mean is shown. - show_feasibility : bool, optional Whether the feasibility region is shown. - show_acquisition : bool, optional Whether the acquisition function is computed and shown (only if acquisition function is not None). - n_grid : int, optional Number of grid points per dimension used to display the model predictions. - axes : Axes, optional Axes object used for plotting. - exponentiate : bool, optional Flag to exponentiate acquisition function before plotting.

{}

Returns:

Name Type Description
result tuple

The matplotlib figure and axes objects.

Source code in xopt/generators/bayesian/bayesian_generator.py
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
def visualize_model(self, **kwargs):
    """Display GP model predictions for the selected output(s).

    The GP models are displayed with respect to the named variables. If None are given, the list of variables in
    vocs is used. Feasible samples are indicated with a filled orange "o", infeasible samples with a hollow
    red "o". Feasibility is calculated with respect to all constraints unless the selected output is a
    constraint itself, in which case only that one is considered.

    Parameters
    ----------
    **kwargs: dict, optional
        Supported keyword arguments:
        - output_names : List[str]
            Outputs for which the GP models are displayed. Defaults to all outputs in vocs.
        - variable_names : List[str]
            The variables with respect to which the GP models are displayed (maximum of 2).
            Defaults to vocs.variable_names.
        - idx : int
            Index of the last sample to use. This also selects the point of reference in
            higher dimensions unless an explicit reference_point is given.
        - reference_point : dict
            Reference point determining the value of variables in vocs.variable_names, but not in variable_names
            (slice plots in higher dimensions). Defaults to last used sample.
        - show_samples : bool, optional
            Whether samples are shown.
        - show_prior_mean : bool, optional
            Whether the prior mean is shown.
        - show_feasibility : bool, optional
            Whether the feasibility region is shown.
        - show_acquisition : bool, optional
            Whether the acquisition function is computed and shown (only if acquisition function is not None).
        - n_grid : int, optional
            Number of grid points per dimension used to display the model predictions.
        - axes : Axes, optional
            Axes object used for plotting.
        - exponentiate : bool, optional
            Flag to exponentiate acquisition function before plotting.

    Returns
    -------
    result : tuple
        The matplotlib figure and axes objects.
    """
    return visualize_generator_model(self, **kwargs)

yaml(**kwargs)

serialize first then dump to yaml string

Source code in xopt/pydantic.py
231
232
233
234
235
236
237
238
def yaml(self, **kwargs):
    """serialize first then dump to yaml string"""
    output = json.loads(
        self.to_json(
            **kwargs,
        )
    )
    return yaml.dump(output)