From 15bcd46e24aac991dbc9bdd31c8c40970649f0f0 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 16 Oct 2024 17:30:15 +0100 Subject: [PATCH] extended vectis cases and added metrics --- etl/epc/Record.py | 2 + etl/epc/generate_scenarios_data.py | 84 +++++++++++++++++++++++------- 2 files changed, 67 insertions(+), 19 deletions(-) diff --git a/etl/epc/Record.py b/etl/epc/Record.py index cc70d42b..4c1a912b 100644 --- a/etl/epc/Record.py +++ b/etl/epc/Record.py @@ -575,6 +575,8 @@ class EPCRecord: mains_gas_map = { "Y": True, "N": False, + True: True, + False: False } self.prepared_epc["mains-gas-flag"] = ( diff --git a/etl/epc/generate_scenarios_data.py b/etl/epc/generate_scenarios_data.py index 1714fe39..fe27cc91 100644 --- a/etl/epc/generate_scenarios_data.py +++ b/etl/epc/generate_scenarios_data.py @@ -1,5 +1,6 @@ from datetime import datetime, timezone, date import itertools +from tqdm import tqdm import pandas as pd from etl.epc.Record import EPCRecord @@ -20,6 +21,7 @@ from backend.Property import Property from recommendations.Recommendations import Recommendations from utils.logger import setup_logger from utils.s3 import read_dataframe_from_s3_parquet, save_dataframe_to_s3_parquet +from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error now = datetime.now().strftime("%d-%m-%Y-%H-%M-%S") @@ -1165,7 +1167,7 @@ scenario_properties = [ recommendations_scoring_data = [] -for scenario_property in scenario_properties: +for scenario_property in tqdm(scenario_properties): # We validate each record in the file. If the record is NOT valid, we need to handle this accordingly epc_searcher = SearchEpc( @@ -1174,22 +1176,35 @@ for scenario_property in scenario_properties: auth_token=get_settings().EPC_AUTH_TOKEN, os_api_key=get_settings().ORDNANCE_SURVEY_API_KEY, ) - epc_searcher.find_property() - # Find the epc with the same LMK key - all_epcs = epc_searcher.older_epcs.copy() - all_epcs.extend([epc_searcher.newest_epc, epc_searcher.full_sap_epc]) - original_epc = [ - epc - for epc in all_epcs - if epc.get("lmk-key", None) == scenario_property.get("lmk-key") - ][0] + if scenario_property["lmk-key"] is None: + epc_records = { + "original_epc": scenario_property["epc"], + "full_sap_epc": {}, + "old_data": scenario_property["old_epcs"], + } + address = scenario_property["address"] + postcode = scenario_property["postcode"] + else: + epc_searcher.find_property() - epc_records = { - "original_epc": original_epc, - "full_sap_epc": {}, - "old_data": [], - } + # Find the epc with the same LMK key + all_epcs = epc_searcher.older_epcs.copy() + all_epcs.extend([epc_searcher.newest_epc, epc_searcher.full_sap_epc]) + original_epc = [ + epc + for epc in all_epcs + if epc.get("lmk-key", None) == scenario_property.get("lmk-key") + ][0] + + epc_records = { + "original_epc": original_epc, + "full_sap_epc": {}, + "old_data": [], + } + + address = epc_searcher.address_clean + postcode = epc_searcher.postcode_clean prepared_epc = EPCRecord( epc_records=epc_records, run_mode="newdata", cleaning_data=cleaning_data @@ -1197,8 +1212,8 @@ for scenario_property in scenario_properties: p = Property( id=prepared_epc.uprn, - address=epc_searcher.address_clean, - postcode=epc_searcher.postcode_clean, + address=address, + postcode=postcode, epc_record=prepared_epc, ) @@ -1220,8 +1235,10 @@ for scenario_property in scenario_properties: wall_recommendations = recommender.wall_recomender.recommendations loft_recommendations = recommender.roof_recommender.recommendations + floor_recommendations = recommender.floor_recommender.recommendations solar_recommendations = recommender.solar_recommender.recommendation windows_recommendations = recommender.windows_recommender.recommendation + led_recommendations = recommender.lighting_recommender.recommendation p.create_base_difference_epc_record(cleaned_lookup=cleaned) @@ -1236,8 +1253,10 @@ for scenario_property in scenario_properties: wall_recs = [] loft_recs = [] + floor_recs = [] solar_recs = [] windows_recs = [] + lighting_recs = [] if "internal_wall_insulation" in measure: for rec in wall_recommendations: @@ -1265,12 +1284,22 @@ for scenario_property in scenario_properties: if rec["type"] == "solar_pv": solar_recs.append(rec) - if "windows" in measure: + if "windows" in measure or "secondary_glazing" in measure or "double_glazing" in measure: for rec in windows_recommendations: if rec["type"] == "windows_glazing": windows_recs.append(rec) - combi_list = [wall_recs, loft_recs, solar_recs, windows_recs] + if "low_energy_lighting" in measure: + for rec in led_recommendations: + if rec["type"] == "led_lighting": + lighting_recs.append(rec) + + if "suspended_floor_insulation" in measure: + for rec in floor_recommendations: + if rec["type"] == "suspended_floor_insulation": + floor_recs.append(rec) + + combi_list = [wall_recs, loft_recs, solar_recs, windows_recs, lighting_recs, floor_recs] combi_list = [element for element in combi_list if len(element) != 0] all_combi_recommendations = list(itertools.product(*combi_list)) @@ -1331,6 +1360,23 @@ sap_impact = pd.concat( axis=1 ) sap_impact["predicted_impact"] = sap_impact["predictions"] - sap_impact["sap_starting"] +sap_impact["actual_post_sap"] = sap_impact["impact"] + sap_impact["sap_starting"] +sap_impact = sap_impact[ + [ + 'id', 'property_id', 'recommendation_id', 'phase', 'uprn', 'sap_starting', 'predictions', 'actual_post_sap', + 'impact', 'predicted_impact' + ] +].rename( + columns={"predictions": "predicted_post_sap", "impact": "actual_impact"} +) + +# Get some metrics - MAPE for local testing +mae = mean_absolute_error(sap_impact["actual_post_sap"], sap_impact["predicted_post_sap"]) +# 2.2958333333333347 +mape = mean_absolute_percentage_error(sap_impact["actual_post_sap"], sap_impact["predicted_post_sap"]) +# 0.034359867214274246 +mape_impact = mean_absolute_percentage_error(sap_impact["actual_impact"], sap_impact["predicted_impact"]) +# 0.4853675375550377 save_dataframe_to_s3_parquet( recommendations_scoring_data,