extended vectis cases and added metrics

2026-08-02 21:08:24 +00:00 · 2024-10-16 17:30:15 +01:00 · 2024-10-16 17:30:15 +01:00 · 15bcd46e24
commit 15bcd46e24
parent 72627b5db9
2 changed files with 67 additions and 19 deletions
--- a/etl/epc/Record.py
+++ b/etl/epc/Record.py
@ -575,6 +575,8 @@ class EPCRecord:
        mains_gas_map = {
            "Y": True,
            "N": False,
+            True: True,
+            False: False
        }

        self.prepared_epc["mains-gas-flag"] = (
--- a/etl/epc/generate_scenarios_data.py
+++ b/etl/epc/generate_scenarios_data.py
@ -1,5 +1,6 @@
 from datetime import datetime, timezone, date
 import itertools
+from tqdm import tqdm

 import pandas as pd
 from etl.epc.Record import EPCRecord
@ -20,6 +21,7 @@ from backend.Property import Property
 from recommendations.Recommendations import Recommendations
 from utils.logger import setup_logger
 from utils.s3 import read_dataframe_from_s3_parquet, save_dataframe_to_s3_parquet
+from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error

 now = datetime.now().strftime("%d-%m-%Y-%H-%M-%S")

@ -1165,7 +1167,7 @@ scenario_properties = [

 recommendations_scoring_data = []

-for scenario_property in scenario_properties:
+for scenario_property in tqdm(scenario_properties):
    # We validate each record in the file. If the record is NOT valid, we need to handle this accordingly

    epc_searcher = SearchEpc(
@ -1174,22 +1176,35 @@ for scenario_property in scenario_properties:
        auth_token=get_settings().EPC_AUTH_TOKEN,
        os_api_key=get_settings().ORDNANCE_SURVEY_API_KEY,
    )
-    epc_searcher.find_property()

-    # Find the epc with the same LMK key
-    all_epcs = epc_searcher.older_epcs.copy()
-    all_epcs.extend([epc_searcher.newest_epc, epc_searcher.full_sap_epc])
-    original_epc = [
-        epc
-        for epc in all_epcs
-        if epc.get("lmk-key", None) == scenario_property.get("lmk-key")
-    ][0]
+    if scenario_property["lmk-key"] is None:
+        epc_records = {
+            "original_epc": scenario_property["epc"],
+            "full_sap_epc": {},
+            "old_data": scenario_property["old_epcs"],
+        }
+        address = scenario_property["address"]
+        postcode = scenario_property["postcode"]
+    else:
+        epc_searcher.find_property()

-    epc_records = {
-        "original_epc": original_epc,
-        "full_sap_epc": {},
-        "old_data": [],
-    }
+        # Find the epc with the same LMK key
+        all_epcs = epc_searcher.older_epcs.copy()
+        all_epcs.extend([epc_searcher.newest_epc, epc_searcher.full_sap_epc])
+        original_epc = [
+            epc
+            for epc in all_epcs
+            if epc.get("lmk-key", None) == scenario_property.get("lmk-key")
+        ][0]
+
+        epc_records = {
+            "original_epc": original_epc,
+            "full_sap_epc": {},
+            "old_data": [],
+        }
+
+        address = epc_searcher.address_clean
+        postcode = epc_searcher.postcode_clean

    prepared_epc = EPCRecord(
        epc_records=epc_records, run_mode="newdata", cleaning_data=cleaning_data
@ -1197,8 +1212,8 @@ for scenario_property in scenario_properties:

    p = Property(
        id=prepared_epc.uprn,
-        address=epc_searcher.address_clean,
-        postcode=epc_searcher.postcode_clean,
+        address=address,
+        postcode=postcode,
        epc_record=prepared_epc,
    )

@ -1220,8 +1235,10 @@ for scenario_property in scenario_properties:

    wall_recommendations = recommender.wall_recomender.recommendations
    loft_recommendations = recommender.roof_recommender.recommendations
+    floor_recommendations = recommender.floor_recommender.recommendations
    solar_recommendations = recommender.solar_recommender.recommendation
    windows_recommendations = recommender.windows_recommender.recommendation
+    led_recommendations = recommender.lighting_recommender.recommendation

    p.create_base_difference_epc_record(cleaned_lookup=cleaned)

@ -1236,8 +1253,10 @@ for scenario_property in scenario_properties:

        wall_recs = []
        loft_recs = []
+        floor_recs = []
        solar_recs = []
        windows_recs = []
+        lighting_recs = []

        if "internal_wall_insulation" in measure:
            for rec in wall_recommendations:
@ -1265,12 +1284,22 @@ for scenario_property in scenario_properties:
                if rec["type"] == "solar_pv":
                    solar_recs.append(rec)

-        if "windows" in measure:
+        if "windows" in measure or "secondary_glazing" in measure or "double_glazing" in measure:
            for rec in windows_recommendations:
                if rec["type"] == "windows_glazing":
                    windows_recs.append(rec)

-        combi_list = [wall_recs, loft_recs, solar_recs, windows_recs]
+        if "low_energy_lighting" in measure:
+            for rec in led_recommendations:
+                if rec["type"] == "led_lighting":
+                    lighting_recs.append(rec)
+
+        if "suspended_floor_insulation" in measure:
+            for rec in floor_recommendations:
+                if rec["type"] == "suspended_floor_insulation":
+                    floor_recs.append(rec)
+
+        combi_list = [wall_recs, loft_recs, solar_recs, windows_recs, lighting_recs, floor_recs]
        combi_list = [element for element in combi_list if len(element) != 0]

        all_combi_recommendations = list(itertools.product(*combi_list))
@ -1331,6 +1360,23 @@ sap_impact = pd.concat(
    axis=1
 )
 sap_impact["predicted_impact"] = sap_impact["predictions"] - sap_impact["sap_starting"]
+sap_impact["actual_post_sap"] = sap_impact["impact"] + sap_impact["sap_starting"]
+sap_impact = sap_impact[
+    [
+        'id', 'property_id', 'recommendation_id', 'phase', 'uprn', 'sap_starting', 'predictions', 'actual_post_sap',
+        'impact', 'predicted_impact'
+    ]
+].rename(
+    columns={"predictions": "predicted_post_sap", "impact": "actual_impact"}
+)
+
+# Get some metrics - MAPE for local testing
+mae = mean_absolute_error(sap_impact["actual_post_sap"], sap_impact["predicted_post_sap"])
+# 2.2958333333333347
+mape = mean_absolute_percentage_error(sap_impact["actual_post_sap"], sap_impact["predicted_post_sap"])
+# 0.034359867214274246
+mape_impact = mean_absolute_percentage_error(sap_impact["actual_impact"], sap_impact["predicted_impact"])
+# 0.4853675375550377

 save_dataframe_to_s3_parquet(
    recommendations_scoring_data,