add metrics for scenarios

This commit is contained in:
Michael Duong 2024-03-27 12:22:58 +00:00
parent 9b6aeae0da
commit c3985e2104
6 changed files with 50 additions and 12 deletions

View file

@ -98,10 +98,14 @@ jobs:
git fetch --depth=1 origin ${TARGET_BRANCH}:${TARGET_BRANCH}
dvc metrics diff --md --all ${TARGET_BRANCH} >> report.md
echo "## Scenario metrics" >> report.md
echo "## Scenario comparison" >> report.md
cat metrics/scenario_table.md >> report.md
echo "## Scenario metrics" >> report.md
cat metrics/scenario_metrics.md >> report.md
cml comment create report.md
# echo "## Residuals plot from model" >> report.md

View file

@ -8,9 +8,11 @@ import os
import pandas as pd
from core.interface.InterfaceModels import MLModel
from core.interface.InterfaceDataClient import DataClient
from core.interface.InterfaceMetrics import MLMetrics
from configs.post_prediction_logic import post_prediction_logic
from core.DataClient import dataclient_factory
from core.MLModels import model_factory
from core.MLMetrics import metrics_factory
from core.Logger import logger
from config import settings
@ -30,7 +32,8 @@ model_filepath = build_model_params["model_save_filepath"]
target = feature_process_params["feature_processor_config"]["target"]
scenario_data_filepaths = scenarios_params["scenario_data_filepaths"]
predictions_column_name = generate_predictions_params["predictions_column_name"]
output_filepath = scenarios_params["output_filepath"]
comparison_output_filepath = scenarios_params["comparison_output_filepath"]
metrics_output_filepath = scenarios_params["metrics_output_filepath"]
logger.info(f"--- Initiate MLModel ---")
@ -51,15 +54,21 @@ output_dataclient = dataclient_factory(
dataclient_config=client_params[output_dataclient_type],
)
logger.info(f"--- Initiate MLMetrics ---")
metrics = metrics_factory(generate_metrics_params["metrics_type"])
def generate_scenario_predictions(
input_dataclient: DataClient,
output_dataclient: DataClient,
model: MLModel,
metrics: MLMetrics,
model_filepath: str,
scenario_data_filepaths: list,
predictions_column_name: str,
output_filepath: str,
comparison_output_filepath: str,
metrics_output_filepath: str,
):
"""
Given the new model, we generate prediction for expected scenarios
@ -98,16 +107,30 @@ def generate_scenario_predictions(
scenario_data[predictions_column_name] - scenario_data["sap_starting"]
)
logger.info("--- Generate Metrics ---")
metrics_dict = metrics.generate_metrics(
scenario_data["impact"], scenario_data["predicted_impact"]
)
metrics_df = pd.DataFrame(metrics_dict, index=[0]).T.reset_index()
metrics_df.columns = ["metric", "value"]
logger.info("--- Save prediction into metrics ---")
output_df = scenario_data[["uprn", "id", "impact", "predicted_impact"]]
output_dataclient.save_data(
obj=output_df, location=output_filepath, save_config=None
obj=output_df, location=comparison_output_filepath, save_config=None
)
output_dataclient.save_data(
obj=metrics_df, location=metrics_output_filepath, save_config=None
)
if __name__ == "__main__":
logger.info(f"--- {__file__} - Start! ---")
logger.info(f"--- Generate Scenario Predictions ---")
@ -116,10 +139,12 @@ if __name__ == "__main__":
input_dataclient=input_dataclient,
output_dataclient=output_dataclient,
model=model,
metrics=metrics,
model_filepath=model_filepath,
scenario_data_filepaths=scenario_data_filepaths,
predictions_column_name=predictions_column_name,
output_filepath=output_filepath,
comparison_output_filepath=comparison_output_filepath,
metrics_output_filepath=metrics_output_filepath,
)
logger.info(f"--- {__file__} - Complete! ---")

View file

@ -4,5 +4,7 @@ default:
output_dataclient_type: local
scenario_data_filepaths:
# - s3://retrofit-data-dev/scenario_data/22-03-2024-19-20-09/recommendations_scoring_data.parquet
- s3://retrofit-data-dev/scenario_data/24-03-2024-20-23-25/recommendations_scoring_data.parquet
output_filepath: ./metrics/scenario_table.md
# - s3://retrofit-data-dev/scenario_data/24-03-2024-20-23-25/recommendations_scoring_data.parquet
- s3://retrofit-data-dev/scenario_data/27-03-2024-11-38-15/recommendations_scoring_data.parquet
comparison_output_filepath: ./metrics/scenario_table.md
metrics_output_filepath: ./metrics/scenario_metrics.md

View file

@ -208,18 +208,23 @@ stages:
deps:
- path: 5_generate_scenarios.py
hash: md5
md5: 30f80ffeb6ee50c5f7b82943a4dc7702
size: 4014
md5: a18f6c6ae2082f038df47386cf3e418e
size: 4896
params:
configs/scenarios.yaml:
default.scenarios:
input_dataclient_type: aws-s3
output_dataclient_type: local
scenario_data_filepaths:
- s3://retrofit-data-dev/scenario_data/24-03-2024-20-23-25/recommendations_scoring_data.parquet
output_filepath: ./metrics/scenario_table.md
- s3://retrofit-data-dev/scenario_data/27-03-2024-11-38-15/recommendations_scoring_data.parquet
comparison_output_filepath: ./metrics/scenario_table.md
metrics_output_filepath: ./metrics/scenario_metrics.md
outs:
- path: metrics/scenario_metrics.md
hash: md5
md5: 64e7db945ff655ae03c20c9845f19106
size: 363
- path: metrics/scenario_table.md
hash: md5
md5: 54856c66fca8b2ebd1fa4dea2d25734a
md5: d4f8afe07b774374aeaa48f1b7b8a5fc
size: 2133

View file

@ -80,6 +80,7 @@ stages:
- default.scenarios
outs:
- metrics/scenario_table.md
- metrics/scenario_metrics.md
always_changed: true
metrics:
- metrics/metrics.json

View file

@ -1,3 +1,4 @@
/fit_metrics.json
/metrics.json
/scenario_table.md
/scenario_metrics.md