mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
handling relative paths for autogluon
This commit is contained in:
parent
67fd184ac5
commit
2ff57a83ed
5 changed files with 43 additions and 22 deletions
|
|
@ -122,17 +122,15 @@ class AutogluonModel:
|
|||
|
||||
return metrics_df
|
||||
|
||||
def optimise_model_for_deployment(self, deployment_path: Path = None) -> None:
|
||||
def optimise_model_for_deployment(self, deployment_path: Path = None) -> str:
|
||||
"""
|
||||
We can optimise the deployment for a autogluon model
|
||||
"""
|
||||
if self.model is None:
|
||||
logger.error("No model to optimise for deployment")
|
||||
exit(1)
|
||||
raise ValueError("No model to optimise for deployment")
|
||||
|
||||
if deployment_path is None:
|
||||
logger.error("Deployment path required")
|
||||
exit(1)
|
||||
raise ValueError("Deployment path required")
|
||||
|
||||
# This will return a string path of the location
|
||||
return self.model.clone_for_deployment(deployment_path)
|
||||
|
|
|
|||
17
model_data/simulation_system/core/Helpers.py
Normal file
17
model_data/simulation_system/core/Helpers.py
Normal file
|
|
@ -0,0 +1,17 @@
|
|||
from pathlib import Path
|
||||
|
||||
|
||||
def ensure_relative_path(file_path: str, relative_to: str | Path = None) -> Path:
|
||||
"""
|
||||
Convert the given path to a relative path.
|
||||
|
||||
:param file_path: The path to check and possibly convert.
|
||||
:param relative_to: Optional path to which the given path should be made relative.
|
||||
If not provided, the current working directory is used.
|
||||
:return: The relative path.
|
||||
"""
|
||||
path = Path(file_path)
|
||||
if path.is_absolute():
|
||||
base_path = Path(relative_to) if relative_to else Path.cwd()
|
||||
return path.relative_to(base_path)
|
||||
return path
|
||||
|
|
@ -4,14 +4,13 @@ Script to load MLModel class and generate predictions
|
|||
|
||||
import json
|
||||
import argparse
|
||||
from MLModel.Models import AutogluonModel
|
||||
from core.Logger import logger
|
||||
from core.DataLoader import DataLoader
|
||||
from pathlib import Path
|
||||
from model_data.simulation_system.MLModel.Models import AutogluonModel
|
||||
from model_data.simulation_system.core.Logger import logger
|
||||
from model_data.simulation_system.core.DataLoader import DataLoader
|
||||
import pandas as pd
|
||||
from typing import Optional
|
||||
from datetime import datetime
|
||||
from core.Settings import (
|
||||
from model_data.simulation_system.core.Settings import (
|
||||
BASE_REGISTRY_PATH,
|
||||
REGISTRY_FILE,
|
||||
PREDICTION_LOCATION,
|
||||
|
|
@ -19,10 +18,12 @@ from core.Settings import (
|
|||
METADATA_FILE
|
||||
)
|
||||
|
||||
TIMESTAMP = datetime.now().strftime(format="%Y-%m-%d_%H-%M-%S")
|
||||
TIMESTAMP = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
|
||||
|
||||
|
||||
# FOR TESTING
|
||||
# For now just loading data first and then passing into function (i.e. as if we receive json data and convert to DataFrame)
|
||||
# For now just loading data first and then passing into function (i.e. as if we receive json data and convert to
|
||||
# DataFrame)
|
||||
# TEST_DATA = DataLoader.load(filepath="../simulation_system/model_build_data/change_data/rdsap_full/test_data.parquet")
|
||||
# DATA = TEST_DATA.sample(1)
|
||||
|
||||
|
|
@ -33,18 +34,20 @@ def ingest_arguments() -> argparse.Namespace:
|
|||
"""
|
||||
|
||||
parser = argparse.ArgumentParser(description='Inputs for training script')
|
||||
parser.add_argument('--target-column', type=str, help='The response variable you are predicting for', choices=['RDSAP_CHANGE', 'HEAT_DEMAND_CHANGE'], default='RDSAP_CHANGE')
|
||||
parser.add_argument('--model-path', type=str, help='If you wish to use a specific model, specify the model path here')
|
||||
parser.add_argument('--target-column', type=str, help='The response variable you are predicting for',
|
||||
choices=['RDSAP_CHANGE', 'HEAT_DEMAND_CHANGE'], default='RDSAP_CHANGE')
|
||||
parser.add_argument('--model-path', type=str,
|
||||
help='If you wish to use a specific model, specify the model path here')
|
||||
parser.add_argument('--data', type=str, help='Json data for predictions')
|
||||
parser.add_argument('--data-path', type=str, help='Location of Parquet dataset to load for training')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
return args
|
||||
|
||||
|
||||
|
||||
def prediction(target_column: str = "RDSAP_CHANGE", model_path: str = None, data: pd.DataFrame = None, data_path: Optional[str] = None):
|
||||
def prediction(target_column: str = "RDSAP_CHANGE", model_path: str = None, data: pd.DataFrame = None,
|
||||
data_path: Optional[str] = None):
|
||||
"""
|
||||
Main pipeline function
|
||||
"""
|
||||
|
|
@ -93,6 +96,7 @@ def prediction(target_column: str = "RDSAP_CHANGE", model_path: str = None, data
|
|||
|
||||
logger.info("--- Loading Model ---")
|
||||
model = AutogluonModel()
|
||||
|
||||
model.load_model(filepath=model_location)
|
||||
|
||||
logger.info("--- Generating Predictions ---")
|
||||
|
|
@ -125,10 +129,11 @@ def prediction(target_column: str = "RDSAP_CHANGE", model_path: str = None, data
|
|||
|
||||
return json_prediction
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
if __name__ == "__main__":
|
||||
args = ingest_arguments()
|
||||
|
||||
# Data can be passed in as JSON string: python3 predictions.py --data '{"TOTAL_FLOOR_AREA": 1}'
|
||||
# Data path can be passed as so: python3 predictions.py --data-path ../simulation_system/model_build_data/change_data/rdsap_full/test_data.parquet
|
||||
prediction(target_column=args.target_column, model_path=args.model_path, data=args.data, data_path=args.data_path)
|
||||
# Data path can be passed as so: python3 predictions.py --data-path
|
||||
# ../simulation_system/model_build_data/change_data/rdsap_full/test_data.parquet
|
||||
prediction(target_column=args.target_column, model_path=args.model_path, data=args.data, data_path=args.data_path)
|
||||
|
|
|
|||
0
model_data/simulation_system/requirements/prediction.txt
Normal file
0
model_data/simulation_system/requirements/prediction.txt
Normal file
|
|
@ -1,16 +1,13 @@
|
|||
import argparse
|
||||
# import boto3
|
||||
import os
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
from typing import List
|
||||
from model_data.simulation_system.core.Logger import logger
|
||||
from model_data.simulation_system.core.DataLoader import DataLoader
|
||||
from model_data.simulation_system.core.FeatureProcessor import FeatureProcessor
|
||||
from model_data.simulation_system.MLModel.Models import AutogluonModel
|
||||
import pandas as pd
|
||||
from model_data.simulation_system.core.Settings import (
|
||||
MODEL_DIRECTORY,
|
||||
BASE_REGISTRY_PATH,
|
||||
REGISTRY_FILE,
|
||||
MODEL_FOLDER,
|
||||
|
|
@ -19,6 +16,7 @@ from model_data.simulation_system.core.Settings import (
|
|||
SUBSAMPLE_FACTOR,
|
||||
MODEL_HYPERPARAMETERS
|
||||
)
|
||||
from model_data.simulation_system.core.Helpers import ensure_relative_path
|
||||
import seaborn as sns
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
|
|
@ -159,6 +157,9 @@ def training(
|
|||
logger.info("--- Optimising model for deployment ---")
|
||||
|
||||
deployment_model_path = model.optimise_model_for_deployment(deployment_path=output_base / DEPLOYMENT_FOLDER)
|
||||
# Autogluon requires models to be stored at relative paths. This will likely eventually be s3 however we
|
||||
# make sure the path is relative to the location of this script
|
||||
deployment_model_path = ensure_relative_path(deployment_model_path, Path(__file__).parent)
|
||||
logger.info(f"Optimised version of best model can be found at: {deployment_model_path}")
|
||||
|
||||
# TODO: Need a model registry - for now have this as a CSV
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue