updating imports for MlModel

This commit is contained in:
Khalim Conn-Kowlessar 2023-08-25 15:25:35 +01:00
parent 0e755626de
commit d6562bfab9
3 changed files with 24 additions and 27 deletions

1
.gitignore vendored
View file

@ -252,6 +252,7 @@ backend/.idea
open_uprn/.idea/
conservation_areas/.idea/
model_data/.idea/
model_data/simulation_system/.idea/
model_data/simulation_system/data*

View file

@ -13,15 +13,17 @@ from pathlib import Path
import pandas as pd
from autogluon.tabular import TabularDataset, TabularPredictor
from sklearn.metrics import mean_absolute_percentage_error
from core.Logger import logger
from model_data.simulation_system.core.Logger import logger
AUTOGLUON_HYPERPARAMETERS = ['problem_type', 'eval_metric', 'time_limit', 'presets', 'excluded_model_types']
METRIC_FILENAME = "metrics.csv"
class AutogluonModel:
"""
Autogluon model that implements the MLModel Protocol
"""
def __init__(self, output_filepath: Path = None) -> None:
self.model = None
self.output_filepath = output_filepath
@ -40,10 +42,10 @@ class AutogluonModel:
logger.info("Using AutoGluon Model - Model saving already occured")
def train_model(
self,
data: pd.DataFrame,
target_column: str,
hyperparameters: dict = None) -> None:
self,
data: pd.DataFrame,
target_column: str,
hyperparameters: dict = None) -> None:
"""
For the given data and hyperparameters, a model is trained
"""
@ -58,17 +60,16 @@ class AutogluonModel:
AGdata = TabularDataset(data=data)
self.model = TabularPredictor(
label=target_column,
path=self.output_filepath,
label=target_column,
path=self.output_filepath,
problem_type=hyperparameters['problem_type'],
eval_metric=hyperparameters['eval_metric']
).fit(
AGdata,
time_limit=hyperparameters['time_limit'],
presets=hyperparameters['presets'],
).fit(
AGdata,
time_limit=hyperparameters['time_limit'],
presets=hyperparameters['presets'],
excluded_model_types=hyperparameters['excluded_model_types']
)
)
def generate_predictions(self, data: pd.DataFrame) -> pd.DataFrame:
"""
@ -84,12 +85,12 @@ class AutogluonModel:
return predictions
def model_evaluation(
self,
validation_data: pd.DataFrame,
target_column: str,
metrics_location: Path = None,
metric_filename: str = METRIC_FILENAME
) -> pd.DataFrame:
self,
validation_data: pd.DataFrame,
target_column: str,
metrics_location: Path = None,
metric_filename: str = METRIC_FILENAME
) -> pd.DataFrame:
"""
For any validation data, a set of predictions and metrics are return
"""
@ -105,7 +106,7 @@ class AutogluonModel:
logger.info("Prediction used for evaluations are saved in self.prediction")
self.predictions = predictions
# TODO: Can have a custom metric class that defines all different metrics we want
metric_mape = mean_absolute_percentage_error(validation_data[target_column], predictions)
@ -117,7 +118,7 @@ class AutogluonModel:
metrics_df = pd.DataFrame([performance])
metrics_df.to_csv(metrics_location / metric_filename)
markdown_filename = metric_filename.split(".")[0] + ".md"
metrics_df.to_markdown(metrics_location/ markdown_filename)
metrics_df.to_markdown(metrics_location / markdown_filename)
return metrics_df
@ -135,8 +136,3 @@ class AutogluonModel:
# This will return a string path of the location
return self.model.clone_for_deployment(deployment_path)

View file

@ -7,7 +7,7 @@ from typing import List
from model_data.simulation_system.core.Logger import logger
from model_data.simulation_system.core.DataLoader import DataLoader
from model_data.simulation_system.core.FeatureProcessor import FeatureProcessor
from MLModel.Models import AutogluonModel
from model_data.simulation_system.MLModel.Models import AutogluonModel
import pandas as pd
from model_data.simulation_system.core.Settings import (
MODEL_DIRECTORY,