How to create a surrogate model class
The primary approach to create surrogate models for power systems is to strucutre them in the form of a class that can be used in the IDAES framework. This approach allows for the surrogate model to be used in the same way as other unit models where the surrogate can be assigned as a flowsheet attributute. The following example will demonstrate the creation of a surrogate model class for PV energy generation.
Surrogate modeling toolboxes
-
PySMO offers several surrogate modeling techniques, including:
-
Currently REFLO models utilize the PySMO toolbox due to its ease of use. Radial basis functions have been found to be reliable for REFLO models. The following example will demonstrate the creation of Radial Basis Function surrogate models.
Example 1: Surrogate model for PV energy generation
Import required libraries
import os
import sys
import time
import pandas as pd
from pyomo.environ import Var, Constraint, units as pyunits, value, Param
from idaes.core.solvers import get_solver
from idaes.core.surrogate.sampling.data_utils import split_training_validation
from idaes.core.surrogate.pysmo_surrogate import PysmoRBFTrainer, PysmoSurrogate
from idaes.core.surrogate.surrogate_block import SurrogateBlock
Define the surrogate model class and required parameters. The model class should contain the following:
buildmethod that defines the surrogate inputs, outputs, and their respective boundsget_training_validationmethod that loads the training and validation datacreate_surrogatemethod that creates the surrogate model, provides the training options, and saves the resulting modelload_surrogatemethod that loads the surrogate model
@declare_process_block_class("PVSurrogate")
class PVSurrogateData(SolarEnergyBaseData):
"""
Surrogate model for PV.
"""
CONFIG = SolarEnergyBaseData.CONFIG()
def build(self):
super().build()
self._tech_type = "PV"
self.design_size = Var(
initialize=1000,
bounds=[1, 200000],
units=pyunits.kW,
doc="PV design size in kW",
)
self.annual_energy = Var(
initialize=1,
units=pyunits.kWh,
doc="Annual energy produced by the plant in kWh",
)
self.land_req = Var(
initialize=7e7,
units=pyunits.acre,
doc="Land area required by the plant in acres",
)
self.surrogate_inputs = [self.design_size]
self.surrogate_outputs = [self.annual_energy, self.land_req]
self.input_labels = ["design_size"]
self.output_labels = ["annual_energy", "land_req"]
self.electricity_constraint = Constraint(
expr=self.annual_energy
== -1 * self.electricity
* pyunits.convert(1 * pyunits.year, to_units=pyunits.hour)
)
Create Training Data
def get_training_validation(self):
self.dataset_filename = os.path.join(
os.path.dirname(__file__), "data/dataset.pkl"
)
print('Loading Training Data...\n')
time_start = time.process_time()
pkl_data = pd.read_pickle(self.dataset_filename)
data = pkl_data.sample(n=int(len(pkl_data)))
self.data_training, self.data_validation = split_training_validation(
data, self.training_fraction, seed=len(data)
)
time_stop = time.process_time()
print("Data Loading Time:", time_stop - time_start, "\n")
Create Surrogate
def create_surrogate(self):
self.training_fraction = 0.8 # Fraction of the sampled data to split for training and validation
self.get_training_validation()
time_start = time.process_time()
# Create PySMO trainer object
trainer = PysmoRBFTrainer(
input_labels=self.input_labels,
output_labels=self.output_labels,
training_dataframe=self.data_training,
)
# Set PySMO options
trainer.config.basis_function = "gaussian" # default = gaussian
trainer.config.solution_method = "algebraic" # default = algebraic
trainer.config.regularization = True # default = True
# Train surrogate
rbf_train = trainer.train_surrogate()
# Create callable surrogate object
xmin, xmax = [self.design_size.bounds[0]], [self.design_size.bounds[1]]
input_bounds = {
self.input_labels[i]: (xmin[i], xmax[i]) for i in range(len(self.input_labels))
}
rbf_surr = PysmoSurrogate(rbf_train, self.input_labels, self.output_labels, input_bounds)
# Save model to JSON
if self.surrogate_file is not None:
print(f'Writing surrogate model to {self.surrogate_file}')
model = rbf_surr.save_to_file(self.surrogate_file, overwrite=True)
Load the Surrogate
def load_surrogate(self):
print('Loading surrogate file...')
self.surrogate_file = os.path.join(
os.path.dirname(__file__), "pv_surrogate.json"
)
if os.path.exists(self.surrogate_file):
self.surrogate_blk = SurrogateBlock(concrete=True)
self.surrogate = PysmoSurrogate.load_from_file(self.surrogate_file)
self.surrogate_blk.build_model(
self.surrogate,
input_vars=self.surrogate_inputs,
output_vars=self.surrogate_outputs,
)
Evaluate the surrogate: evaluate_surrogate is a built-in function of the PysmoSurrogate class. There is no need to define this function in the PVSurrogateData class, but it can be called upon to evaluate the surrogate for a given set of inputs. For reference the source code for this function is provided below.
def evaluate_surrogate(self, inputs: pd.DataFrame) -> pd.DataFrame:
"""Evaluate the surrogate model at a set of user-provided values.
Args:
inputs: The dataframe of input values to be used in the evaluation.
The dataframe needs to contain a column corresponding to each of the input labels.
Additional columns are fine, but are not used.
Returns:
output: A dataframe of the the output values evaluated at the provided inputs.
The index of the output dataframe should match the index of the provided inputs.
"""
inputdata = inputs[self._input_labels].to_numpy()
outputs = np.zeros(shape=(inputs.shape[0], len(self._output_labels)))
for i in range(inputdata.shape[0]):
row_data = inputdata[i, :].reshape(1, len(self._input_labels))
for j, output_label in enumerate(self._output_labels):
result = self._trained.get_result(output_label)
outputs[i, j] = result.model.predict_output(row_data)
return pd.DataFrame(
data=outputs, index=inputs.index, columns=self._output_labels
)
Use the surrogate
m = ConcreteModel()
m.fs = FlowsheetBlock(dynamic=False)
m.fs.pv = PVSurrogate()
m.fs.pv.create_surrogate(save=True)
m.fs.pv.load_surrogate()
results = m.fs.pv.surrogate.evaluate_surrogate(
m.fs.pv.data_validation[m.fs.pv.input_labels]
)
print(results)