Skip to content

Deduplicated Generator Base

DeduplicatedGeneratorBase

Bases: Generator

Base class for generators that avoid producing duplicate candidates.

Parameters:

Name Type Description Default
deduplicate_output bool

Whether to perform deduplication on generated candidates.

True
decision_vars_seen ndarray

Array of previously seen decision variables, shape (n_seen, n_variables). If None, will be initialized on first generation.

required
Notes

Subclasses must implement the _generate method which produces candidate solutions. The base class handles the deduplication logic.

Deduplication is performed using numpy's unique function to identify and filter out duplicate decision vectors. The class maintains a history of all previously seen decision variables to ensure global uniqueness across multiple generate calls.

Source code in xopt/generators/deduplicated.py
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
class DeduplicatedGeneratorBase(Generator):
    """
    Base class for generators that avoid producing duplicate candidates.

    Parameters
    ----------
    deduplicate_output : bool, default=True
        Whether to perform deduplication on generated candidates.
    decision_vars_seen : numpy.ndarray, optional
        Array of previously seen decision variables, shape (n_seen, n_variables).
        If None, will be initialized on first generation.

    Notes
    -----
    Subclasses must implement the `_generate` method which produces
    candidate solutions. The base class handles the deduplication logic.

    Deduplication is performed using numpy's `unique` function to identify
    and filter out duplicate decision vectors. The class maintains a history
    of all previously seen decision variables to ensure global uniqueness
    across multiple generate calls.
    """

    # Whether to perform deduplication or not
    deduplicate_output: bool = True

    # The decision vars seen so far
    decision_vars_seen: Optional[np.ndarray] = None

    # For per-object log output in child objects (see eg NSGA2Generator)
    _logger: Optional[logging.Logger] = None

    def model_post_init(self, context):
        # Get a unique logger per object
        self._logger = logging.getLogger(
            f"{__name__}.DeduplicatedGeneratorBase.{id(self)}"
        )

    @field_validator("decision_vars_seen", mode="before")
    @classmethod
    def cast_arr(cls, value):
        if isinstance(value, list):
            return np.array(value)
        return value

    def generate(self, n_candidates: int) -> list[dict]:
        """
        Generate the unique candidates.

        If deduplication is enabled, ensures all returned candidates have
        unique decision variables that have not been seen before.

        Parameters
        ----------
        n_candidates : int
            Number of unique candidates to generate.

        Returns
        -------
        list of dict
            List of candidate solutions.

        Notes
        -----
        When deduplication is enabled, the method may make multiple calls
        to the underlying `_generate` method if duplicates are found, until
        the requested number of unique candidates is obtained.
        """
        start_t = time.perf_counter()
        if not self.deduplicate_output:
            candidates = self._generate(n_candidates)
            n_removed = 0
        else:
            # Create never before seen candidates by calling child generator and only taking unique
            # value from it until we have `n_candidates` values.
            candidates = []
            n_removed = 0
            round_idx = 0
            while len(candidates) < n_candidates:
                from_generator = self._generate(n_candidates - len(candidates))

                # Add the new data
                if self.decision_vars_seen is None:
                    n_existing_vars = 0
                    self.decision_vars_seen = get_variable_data(
                        self.vocs, from_generator
                    ).to_numpy()
                else:
                    n_existing_vars = self.decision_vars_seen.shape[0]
                    self.decision_vars_seen = np.concatenate(
                        (
                            self.decision_vars_seen,  # Must go first since first instance of unique elements are included
                            get_variable_data(
                                self.vocs, from_generator
                            ).to_numpy(),  # Do not accept repeated elements here
                        ),
                        axis=0,
                    )

                # Unique it and get the new candidates
                self.decision_vars_seen, idx = np.unique(
                    self.decision_vars_seen,
                    return_index=True,
                    axis=0,
                )
                n_removed += n_existing_vars + len(from_generator) - len(idx)
                idx = idx - n_existing_vars
                idx = idx[idx >= 0]
                for i in idx:
                    candidates.append(from_generator[i])
                self._logger.debug(
                    f"deduplicated generation round {round_idx} completed (n_removed={n_removed}, "
                    f"len(idx)={len(idx)}, n_existing_vars={n_existing_vars}, "
                    f"len(self.decision_vars_seen)={len(self.decision_vars_seen)})"
                )
                round_idx += 1

            # Hand candidates back to user
            candidates = candidates[:n_candidates]

        msg = f"generated {len(candidates)} candidates in {1000 * (time.perf_counter() - start_t):.2f}ms"
        if self.deduplicate_output:
            msg += f" (removed {n_removed} duplicate individuals)"
        self._logger.debug(msg)
        return candidates

    def _generate(self, n_candidates: int) -> list[dict]:
        """
        Generate candidate solutions without deduplication.

        This abstract method must be implemented by subclasses to provide
        the actual generation mechanism.

        Parameters
        ----------
        n_candidates : int
            Number of candidates to generate.

        Returns
        -------
        list of dict
            List of candidate solutions.
        """
        raise NotImplementedError

__init__(**kwargs)

Initialize the generator.

Source code in xopt/generator.py
119
120
121
122
123
124
def __init__(self, **kwargs):
    """
    Initialize the generator.
    """
    super().__init__(**kwargs)
    logger.info(f"Initialized generator {self.name}")

add_data(new_data)

update dataframe with results from new evaluations.

This is intended for generators that maintain their own data.

Source code in xopt/generator.py
140
141
142
143
144
145
146
147
148
149
150
def add_data(self, new_data: pd.DataFrame):
    """
    update dataframe with results from new evaluations.

    This is intended for generators that maintain their own data.

    """
    if self.data is not None:
        self.data = pd.concat([self.data, new_data], axis=0, ignore_index=True)
    else:
        self.data = new_data

generate(n_candidates)

Generate the unique candidates.

If deduplication is enabled, ensures all returned candidates have unique decision variables that have not been seen before.

Parameters:

Name Type Description Default
n_candidates int

Number of unique candidates to generate.

required

Returns:

Type Description
list of dict

List of candidate solutions.

Notes

When deduplication is enabled, the method may make multiple calls to the underlying _generate method if duplicates are found, until the requested number of unique candidates is obtained.

Source code in xopt/generators/deduplicated.py
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
def generate(self, n_candidates: int) -> list[dict]:
    """
    Generate the unique candidates.

    If deduplication is enabled, ensures all returned candidates have
    unique decision variables that have not been seen before.

    Parameters
    ----------
    n_candidates : int
        Number of unique candidates to generate.

    Returns
    -------
    list of dict
        List of candidate solutions.

    Notes
    -----
    When deduplication is enabled, the method may make multiple calls
    to the underlying `_generate` method if duplicates are found, until
    the requested number of unique candidates is obtained.
    """
    start_t = time.perf_counter()
    if not self.deduplicate_output:
        candidates = self._generate(n_candidates)
        n_removed = 0
    else:
        # Create never before seen candidates by calling child generator and only taking unique
        # value from it until we have `n_candidates` values.
        candidates = []
        n_removed = 0
        round_idx = 0
        while len(candidates) < n_candidates:
            from_generator = self._generate(n_candidates - len(candidates))

            # Add the new data
            if self.decision_vars_seen is None:
                n_existing_vars = 0
                self.decision_vars_seen = get_variable_data(
                    self.vocs, from_generator
                ).to_numpy()
            else:
                n_existing_vars = self.decision_vars_seen.shape[0]
                self.decision_vars_seen = np.concatenate(
                    (
                        self.decision_vars_seen,  # Must go first since first instance of unique elements are included
                        get_variable_data(
                            self.vocs, from_generator
                        ).to_numpy(),  # Do not accept repeated elements here
                    ),
                    axis=0,
                )

            # Unique it and get the new candidates
            self.decision_vars_seen, idx = np.unique(
                self.decision_vars_seen,
                return_index=True,
                axis=0,
            )
            n_removed += n_existing_vars + len(from_generator) - len(idx)
            idx = idx - n_existing_vars
            idx = idx[idx >= 0]
            for i in idx:
                candidates.append(from_generator[i])
            self._logger.debug(
                f"deduplicated generation round {round_idx} completed (n_removed={n_removed}, "
                f"len(idx)={len(idx)}, n_existing_vars={n_existing_vars}, "
                f"len(self.decision_vars_seen)={len(self.decision_vars_seen)})"
            )
            round_idx += 1

        # Hand candidates back to user
        candidates = candidates[:n_candidates]

    msg = f"generated {len(candidates)} candidates in {1000 * (time.perf_counter() - start_t):.2f}ms"
    if self.deduplicate_output:
        msg += f" (removed {n_removed} duplicate individuals)"
    self._logger.debug(msg)
    return candidates

model_dump(*args, **kwargs)

overwrite model dump to remove faux class attrs

Source code in xopt/generator.py
152
153
154
155
156
157
158
159
160
def model_dump(self, *args: Any, **kwargs: Any) -> dict[str, Any]:
    """overwrite model dump to remove faux class attrs"""

    res = super().model_dump(*args, **kwargs)

    res.pop("supports_batch_generation", None)
    res.pop("supports_multi_objective", None)

    return res

yaml(**kwargs)

serialize first then dump to yaml string

Source code in xopt/pydantic.py
231
232
233
234
235
236
237
238
def yaml(self, **kwargs):
    """serialize first then dump to yaml string"""
    output = json.loads(
        self.to_json(
            **kwargs,
        )
    )
    return yaml.dump(output)