mirror of
https://github.com/Hestia-Homes/ML.git
synced 2026-06-08 11:17:25 +00:00
add metrics for scenarios
This commit is contained in:
parent
9b6aeae0da
commit
c3985e2104
6 changed files with 50 additions and 12 deletions
6
.github/workflows/MLPipelinePullRequest.yml
vendored
6
.github/workflows/MLPipelinePullRequest.yml
vendored
|
|
@ -98,10 +98,14 @@ jobs:
|
|||
git fetch --depth=1 origin ${TARGET_BRANCH}:${TARGET_BRANCH}
|
||||
dvc metrics diff --md --all ${TARGET_BRANCH} >> report.md
|
||||
|
||||
echo "## Scenario metrics" >> report.md
|
||||
echo "## Scenario comparison" >> report.md
|
||||
|
||||
cat metrics/scenario_table.md >> report.md
|
||||
|
||||
echo "## Scenario metrics" >> report.md
|
||||
|
||||
cat metrics/scenario_metrics.md >> report.md
|
||||
|
||||
cml comment create report.md
|
||||
|
||||
# echo "## Residuals plot from model" >> report.md
|
||||
|
|
|
|||
|
|
@ -8,9 +8,11 @@ import os
|
|||
import pandas as pd
|
||||
from core.interface.InterfaceModels import MLModel
|
||||
from core.interface.InterfaceDataClient import DataClient
|
||||
from core.interface.InterfaceMetrics import MLMetrics
|
||||
from configs.post_prediction_logic import post_prediction_logic
|
||||
from core.DataClient import dataclient_factory
|
||||
from core.MLModels import model_factory
|
||||
from core.MLMetrics import metrics_factory
|
||||
from core.Logger import logger
|
||||
from config import settings
|
||||
|
||||
|
|
@ -30,7 +32,8 @@ model_filepath = build_model_params["model_save_filepath"]
|
|||
target = feature_process_params["feature_processor_config"]["target"]
|
||||
scenario_data_filepaths = scenarios_params["scenario_data_filepaths"]
|
||||
predictions_column_name = generate_predictions_params["predictions_column_name"]
|
||||
output_filepath = scenarios_params["output_filepath"]
|
||||
comparison_output_filepath = scenarios_params["comparison_output_filepath"]
|
||||
metrics_output_filepath = scenarios_params["metrics_output_filepath"]
|
||||
|
||||
logger.info(f"--- Initiate MLModel ---")
|
||||
|
||||
|
|
@ -51,15 +54,21 @@ output_dataclient = dataclient_factory(
|
|||
dataclient_config=client_params[output_dataclient_type],
|
||||
)
|
||||
|
||||
logger.info(f"--- Initiate MLMetrics ---")
|
||||
|
||||
metrics = metrics_factory(generate_metrics_params["metrics_type"])
|
||||
|
||||
|
||||
def generate_scenario_predictions(
|
||||
input_dataclient: DataClient,
|
||||
output_dataclient: DataClient,
|
||||
model: MLModel,
|
||||
metrics: MLMetrics,
|
||||
model_filepath: str,
|
||||
scenario_data_filepaths: list,
|
||||
predictions_column_name: str,
|
||||
output_filepath: str,
|
||||
comparison_output_filepath: str,
|
||||
metrics_output_filepath: str,
|
||||
):
|
||||
"""
|
||||
Given the new model, we generate prediction for expected scenarios
|
||||
|
|
@ -98,16 +107,30 @@ def generate_scenario_predictions(
|
|||
scenario_data[predictions_column_name] - scenario_data["sap_starting"]
|
||||
)
|
||||
|
||||
logger.info("--- Generate Metrics ---")
|
||||
|
||||
metrics_dict = metrics.generate_metrics(
|
||||
scenario_data["impact"], scenario_data["predicted_impact"]
|
||||
)
|
||||
|
||||
metrics_df = pd.DataFrame(metrics_dict, index=[0]).T.reset_index()
|
||||
metrics_df.columns = ["metric", "value"]
|
||||
|
||||
logger.info("--- Save prediction into metrics ---")
|
||||
|
||||
output_df = scenario_data[["uprn", "id", "impact", "predicted_impact"]]
|
||||
|
||||
output_dataclient.save_data(
|
||||
obj=output_df, location=output_filepath, save_config=None
|
||||
obj=output_df, location=comparison_output_filepath, save_config=None
|
||||
)
|
||||
|
||||
output_dataclient.save_data(
|
||||
obj=metrics_df, location=metrics_output_filepath, save_config=None
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
logger.info(f"--- {__file__} - Start! ---")
|
||||
|
||||
logger.info(f"--- Generate Scenario Predictions ---")
|
||||
|
|
@ -116,10 +139,12 @@ if __name__ == "__main__":
|
|||
input_dataclient=input_dataclient,
|
||||
output_dataclient=output_dataclient,
|
||||
model=model,
|
||||
metrics=metrics,
|
||||
model_filepath=model_filepath,
|
||||
scenario_data_filepaths=scenario_data_filepaths,
|
||||
predictions_column_name=predictions_column_name,
|
||||
output_filepath=output_filepath,
|
||||
comparison_output_filepath=comparison_output_filepath,
|
||||
metrics_output_filepath=metrics_output_filepath,
|
||||
)
|
||||
|
||||
logger.info(f"--- {__file__} - Complete! ---")
|
||||
|
|
|
|||
|
|
@ -4,5 +4,7 @@ default:
|
|||
output_dataclient_type: local
|
||||
scenario_data_filepaths:
|
||||
# - s3://retrofit-data-dev/scenario_data/22-03-2024-19-20-09/recommendations_scoring_data.parquet
|
||||
- s3://retrofit-data-dev/scenario_data/24-03-2024-20-23-25/recommendations_scoring_data.parquet
|
||||
output_filepath: ./metrics/scenario_table.md
|
||||
# - s3://retrofit-data-dev/scenario_data/24-03-2024-20-23-25/recommendations_scoring_data.parquet
|
||||
- s3://retrofit-data-dev/scenario_data/27-03-2024-11-38-15/recommendations_scoring_data.parquet
|
||||
comparison_output_filepath: ./metrics/scenario_table.md
|
||||
metrics_output_filepath: ./metrics/scenario_metrics.md
|
||||
|
|
|
|||
|
|
@ -208,18 +208,23 @@ stages:
|
|||
deps:
|
||||
- path: 5_generate_scenarios.py
|
||||
hash: md5
|
||||
md5: 30f80ffeb6ee50c5f7b82943a4dc7702
|
||||
size: 4014
|
||||
md5: a18f6c6ae2082f038df47386cf3e418e
|
||||
size: 4896
|
||||
params:
|
||||
configs/scenarios.yaml:
|
||||
default.scenarios:
|
||||
input_dataclient_type: aws-s3
|
||||
output_dataclient_type: local
|
||||
scenario_data_filepaths:
|
||||
- s3://retrofit-data-dev/scenario_data/24-03-2024-20-23-25/recommendations_scoring_data.parquet
|
||||
output_filepath: ./metrics/scenario_table.md
|
||||
- s3://retrofit-data-dev/scenario_data/27-03-2024-11-38-15/recommendations_scoring_data.parquet
|
||||
comparison_output_filepath: ./metrics/scenario_table.md
|
||||
metrics_output_filepath: ./metrics/scenario_metrics.md
|
||||
outs:
|
||||
- path: metrics/scenario_metrics.md
|
||||
hash: md5
|
||||
md5: 64e7db945ff655ae03c20c9845f19106
|
||||
size: 363
|
||||
- path: metrics/scenario_table.md
|
||||
hash: md5
|
||||
md5: 54856c66fca8b2ebd1fa4dea2d25734a
|
||||
md5: d4f8afe07b774374aeaa48f1b7b8a5fc
|
||||
size: 2133
|
||||
|
|
|
|||
|
|
@ -80,6 +80,7 @@ stages:
|
|||
- default.scenarios
|
||||
outs:
|
||||
- metrics/scenario_table.md
|
||||
- metrics/scenario_metrics.md
|
||||
always_changed: true
|
||||
metrics:
|
||||
- metrics/metrics.json
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
/fit_metrics.json
|
||||
/metrics.json
|
||||
/scenario_table.md
|
||||
/scenario_metrics.md
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue