diff --git a/.gitignore b/.gitignore index cb17846e..be9da3aa 100644 --- a/.gitignore +++ b/.gitignore @@ -252,6 +252,7 @@ backend/.idea open_uprn/.idea/ conservation_areas/.idea/ model_data/.idea/ +model_data/simulation_system/.idea/ model_data/simulation_system/data* diff --git a/model_data/simulation_system/MLModel/Models.py b/model_data/simulation_system/MLModel/Models.py index 137f2f20..89bbe762 100644 --- a/model_data/simulation_system/MLModel/Models.py +++ b/model_data/simulation_system/MLModel/Models.py @@ -13,15 +13,17 @@ from pathlib import Path import pandas as pd from autogluon.tabular import TabularDataset, TabularPredictor from sklearn.metrics import mean_absolute_percentage_error -from core.Logger import logger +from model_data.simulation_system.core.Logger import logger AUTOGLUON_HYPERPARAMETERS = ['problem_type', 'eval_metric', 'time_limit', 'presets', 'excluded_model_types'] METRIC_FILENAME = "metrics.csv" + class AutogluonModel: """ Autogluon model that implements the MLModel Protocol """ + def __init__(self, output_filepath: Path = None) -> None: self.model = None self.output_filepath = output_filepath @@ -40,10 +42,10 @@ class AutogluonModel: logger.info("Using AutoGluon Model - Model saving already occured") def train_model( - self, - data: pd.DataFrame, - target_column: str, - hyperparameters: dict = None) -> None: + self, + data: pd.DataFrame, + target_column: str, + hyperparameters: dict = None) -> None: """ For the given data and hyperparameters, a model is trained """ @@ -58,17 +60,16 @@ class AutogluonModel: AGdata = TabularDataset(data=data) self.model = TabularPredictor( - label=target_column, - path=self.output_filepath, + label=target_column, + path=self.output_filepath, problem_type=hyperparameters['problem_type'], eval_metric=hyperparameters['eval_metric'] - ).fit( - AGdata, - time_limit=hyperparameters['time_limit'], - presets=hyperparameters['presets'], + ).fit( + AGdata, + time_limit=hyperparameters['time_limit'], + presets=hyperparameters['presets'], excluded_model_types=hyperparameters['excluded_model_types'] - ) - + ) def generate_predictions(self, data: pd.DataFrame) -> pd.DataFrame: """ @@ -84,12 +85,12 @@ class AutogluonModel: return predictions def model_evaluation( - self, - validation_data: pd.DataFrame, - target_column: str, - metrics_location: Path = None, - metric_filename: str = METRIC_FILENAME - ) -> pd.DataFrame: + self, + validation_data: pd.DataFrame, + target_column: str, + metrics_location: Path = None, + metric_filename: str = METRIC_FILENAME + ) -> pd.DataFrame: """ For any validation data, a set of predictions and metrics are return """ @@ -105,7 +106,7 @@ class AutogluonModel: logger.info("Prediction used for evaluations are saved in self.prediction") self.predictions = predictions - + # TODO: Can have a custom metric class that defines all different metrics we want metric_mape = mean_absolute_percentage_error(validation_data[target_column], predictions) @@ -117,7 +118,7 @@ class AutogluonModel: metrics_df = pd.DataFrame([performance]) metrics_df.to_csv(metrics_location / metric_filename) markdown_filename = metric_filename.split(".")[0] + ".md" - metrics_df.to_markdown(metrics_location/ markdown_filename) + metrics_df.to_markdown(metrics_location / markdown_filename) return metrics_df @@ -135,8 +136,3 @@ class AutogluonModel: # This will return a string path of the location return self.model.clone_for_deployment(deployment_path) - - - - - \ No newline at end of file diff --git a/model_data/simulation_system/training.py b/model_data/simulation_system/training.py index d41e6c56..561d1e1d 100644 --- a/model_data/simulation_system/training.py +++ b/model_data/simulation_system/training.py @@ -7,7 +7,7 @@ from typing import List from model_data.simulation_system.core.Logger import logger from model_data.simulation_system.core.DataLoader import DataLoader from model_data.simulation_system.core.FeatureProcessor import FeatureProcessor -from MLModel.Models import AutogluonModel +from model_data.simulation_system.MLModel.Models import AutogluonModel import pandas as pd from model_data.simulation_system.core.Settings import ( MODEL_DIRECTORY,