got the predictions working

This commit is contained in:
Khalim Conn-Kowlessar 2023-08-25 18:08:56 +01:00
parent 2ff57a83ed
commit a5062b24f0
5 changed files with 6 additions and 24 deletions

2
.idea/Model.iml generated
View file

@ -7,7 +7,7 @@
<sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
</content>
<orderEntry type="jdk" jdkName="Python 3.10 (simulation_system)" jdkType="Python SDK" />
<orderEntry type="jdk" jdkName="Python 3.10 (simulation_system_prediction)" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>

2
.idea/misc.xml generated
View file

@ -1,6 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (simulation_system)" project-jdk-type="Python SDK" />
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (simulation_system_prediction)" project-jdk-type="Python SDK" />
<component name="PythonCompatibilityInspectionAdvertiser">
<option name="version" value="3" />
</component>

View file

@ -1,17 +0,0 @@
from pathlib import Path
def ensure_relative_path(file_path: str, relative_to: str | Path = None) -> Path:
"""
Convert the given path to a relative path.
:param file_path: The path to check and possibly convert.
:param relative_to: Optional path to which the given path should be made relative.
If not provided, the current working directory is used.
:return: The relative path.
"""
path = Path(file_path)
if path.is_absolute():
base_path = Path(relative_to) if relative_to else Path.cwd()
return path.relative_to(base_path)
return path

View file

@ -0,0 +1,2 @@
autogluon==0.8.2
pandas==1.5.3

View file

@ -8,6 +8,7 @@ from model_data.simulation_system.core.FeatureProcessor import FeatureProcessor
from model_data.simulation_system.MLModel.Models import AutogluonModel
import pandas as pd
from model_data.simulation_system.core.Settings import (
MODEL_DIRECTORY,
BASE_REGISTRY_PATH,
REGISTRY_FILE,
MODEL_FOLDER,
@ -16,7 +17,6 @@ from model_data.simulation_system.core.Settings import (
SUBSAMPLE_FACTOR,
MODEL_HYPERPARAMETERS
)
from model_data.simulation_system.core.Helpers import ensure_relative_path
import seaborn as sns
import matplotlib.pyplot as plt
@ -100,7 +100,7 @@ def training(
if model_type == "autogluon":
model_root = f"{target_column}-{hyperparameters['presets']}-{hyperparameters['time_limit']}-{TIMESTAMP}".lower()
output_base = BASE_REGISTRY_PATH / target_column / model_type / model_root
output_base = Path(MODEL_DIRECTORY) / target_column / model_type / model_root
model = AutogluonModel(
output_filepath=output_base / MODEL_FOLDER
@ -157,9 +157,6 @@ def training(
logger.info("--- Optimising model for deployment ---")
deployment_model_path = model.optimise_model_for_deployment(deployment_path=output_base / DEPLOYMENT_FOLDER)
# Autogluon requires models to be stored at relative paths. This will likely eventually be s3 however we
# make sure the path is relative to the location of this script
deployment_model_path = ensure_relative_path(deployment_model_path, Path(__file__).parent)
logger.info(f"Optimised version of best model can be found at: {deployment_model_path}")
# TODO: Need a model registry - for now have this as a CSV