Model/model_data/simulation_system/MLModel/Models.py
2023-08-25 15:25:35 +01:00

138 lines
4.6 KiB
Python

"""
Different implementations of the MLModel Protocol
Uses the BaseMLModel protocol
Key tasks:
- Template Model class for different model types
- Save model
- Load Model
- Generate Inference
"""
from typing import NamedTuple
from pathlib import Path
import pandas as pd
from autogluon.tabular import TabularDataset, TabularPredictor
from sklearn.metrics import mean_absolute_percentage_error
from model_data.simulation_system.core.Logger import logger
AUTOGLUON_HYPERPARAMETERS = ['problem_type', 'eval_metric', 'time_limit', 'presets', 'excluded_model_types']
METRIC_FILENAME = "metrics.csv"
class AutogluonModel:
"""
Autogluon model that implements the MLModel Protocol
"""
def __init__(self, output_filepath: Path = None) -> None:
self.model = None
self.output_filepath = output_filepath
self.predictions = None
def load_model(self, filepath: Path) -> None:
"""
Providing a path, this function will load the model to be used. Will load to internal variable
"""
self.model = TabularPredictor.load(path=filepath)
def save_model(self, output_filepath: Path = None) -> None:
"""
Providing a path, this function will save the model to be used.
"""
logger.info("Using AutoGluon Model - Model saving already occured")
def train_model(
self,
data: pd.DataFrame,
target_column: str,
hyperparameters: dict = None) -> None:
"""
For the given data and hyperparameters, a model is trained
"""
if self.output_filepath is None:
logger.error("Please specify a output_filepath in order to train a model")
exit(1)
if set(AUTOGLUON_HYPERPARAMETERS) != set(hyperparameters.keys()):
print("Hyperparameters (dict) is incorrectly defined - please check what hyperparameters are required")
exit(1)
AGdata = TabularDataset(data=data)
self.model = TabularPredictor(
label=target_column,
path=self.output_filepath,
problem_type=hyperparameters['problem_type'],
eval_metric=hyperparameters['eval_metric']
).fit(
AGdata,
time_limit=hyperparameters['time_limit'],
presets=hyperparameters['presets'],
excluded_model_types=hyperparameters['excluded_model_types']
)
def generate_predictions(self, data: pd.DataFrame) -> pd.DataFrame:
"""
For the given dataframe, model is loaded and predictions are generated
"""
if self.model is None:
print("No model loaded/ trained")
exit(1)
predictions = self.model.predict(data)
return predictions
def model_evaluation(
self,
validation_data: pd.DataFrame,
target_column: str,
metrics_location: Path = None,
metric_filename: str = METRIC_FILENAME
) -> pd.DataFrame:
"""
For any validation data, a set of predictions and metrics are return
"""
if metrics_location is None:
logger.warning("Metrics will be outputted to current folder")
if self.model is None:
logger.error("No model loaded/ trained - Unable to generate evaluation")
exit(1)
performance = self.model.evaluate(validation_data)
predictions = self.generate_predictions(validation_data)
logger.info("Prediction used for evaluations are saved in self.prediction")
self.predictions = predictions
# TODO: Can have a custom metric class that defines all different metrics we want
metric_mape = mean_absolute_percentage_error(validation_data[target_column], predictions)
performance['mape'] = metric_mape
logger.info("Saving metric file as metric.csv")
metrics_location.mkdir(exist_ok=True)
metrics_df = pd.DataFrame([performance])
metrics_df.to_csv(metrics_location / metric_filename)
markdown_filename = metric_filename.split(".")[0] + ".md"
metrics_df.to_markdown(metrics_location / markdown_filename)
return metrics_df
def optimise_model_for_deployment(self, deployment_path: Path = None) -> None:
"""
We can optimise the deployment for a autogluon model
"""
if self.model is None:
logger.error("No model to optimise for deployment")
exit(1)
if deployment_path is None:
logger.error("Deployment path required")
exit(1)
# This will return a string path of the location
return self.model.clone_for_deployment(deployment_path)