From 802da66ce919d414ad2fba0d892d34d939c6b407 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 22 Jul 2025 17:05:03 +0100
Subject: [PATCH] fixing engine api

---
 backend/Property.py                    |  4 +--
 backend/app/assumptions.py             |  2 ++
 backend/engine/engine.py               | 11 ++++++
 etl/epc/Dataset.py                     | 50 +++++++++++++++++++++++++-
 etl/spatial/OpenUprnClient.py          |  2 +-
 recommendations/WallRecommendations.py | 43 +++++++++++++---------
 6 files changed, 90 insertions(+), 22 deletions(-)

diff --git a/backend/Property.py b/backend/Property.py
index a8fd925b..82c60439 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -292,9 +292,7 @@ class Property:
             self.epc_record, fixed_data
         )
 
-        self.base_difference_record = TrainingDataset(
-            datasets=[difference_record], cleaned_lookup=cleaned_lookup
-        )
+        self.base_difference_record = TrainingDataset(datasets=[difference_record], cleaned_lookup=cleaned_lookup)
 
         # If we have variables that have been given to us by the landlord that we know are correct, whereas the EPC
         # may not be, we use them
diff --git a/backend/app/assumptions.py b/backend/app/assumptions.py
index d36266d3..d813e1a9 100644
--- a/backend/app/assumptions.py
+++ b/backend/app/assumptions.py
@@ -71,6 +71,8 @@ DESCRIPTIONS_TO_FUEL_TYPES = {
     },
     'Electric instantaneous at point of use, plus solar': {"fuel": "Electricity + Solar Thermal", "cop": 1},
     "Electric storage heaters, Room heaters, electric": {"fuel": "Electricity", "cop": 1},
+    'Boiler and underfloor heating, oil': {"fuel": "Oil", "cop": 0.85},
+    "Boiler and radiators, smokeless fuel": {"fuel": "Smokeless Fuel", "cop": 0.85},
 }
 
 # These are the measure types where if there is a ventilation recommendation, we force the inclusion of it
diff --git a/backend/engine/engine.py b/backend/engine/engine.py
index 6c4be199..0591eed6 100644
--- a/backend/engine/engine.py
+++ b/backend/engine/engine.py
@@ -516,6 +516,14 @@ async def model_engine(body: PlanTriggerRequest):
 
         input_properties = []
         for config in tqdm(plan_input):
+
+            if config["landlord_property_id"] in ["LE113NWIC95", "NG241FBCT", "NG51BNIC"]:
+                continue
+
+            if not pd.isnull(config.get("uprn")):
+                if int(float(config.get("uprn"))) < 0:
+                    continue
+
             # We validate each record in the file. If the record is NOT valid, we need to handle this accordingly
             uprn = config.get("uprn", None)
             if pd.isnull(uprn):
@@ -534,6 +542,9 @@ async def model_engine(body: PlanTriggerRequest):
             epc_searcher.ordnance_survey_client.property_type = config.get("property_type", None)
             # For the moment, our OS API access is unavailable, so we skip and interpolate
             epc_searcher.find_property(skip_os=True)
+            # TODO: Placeholder
+            if epc_searcher.newest_epc.get("estimated") and body.file_format == "domna_asset_list":
+                epc_searcher.newest_epc["uprn-source"] = epc_searcher.UPRN_SOURCE_SIMULATED
 
             # We check for an energy assessment we have performed on this property:
             energy_assessment = get_latest_assessment_by_uprn(session, uprn if uprn is not None else epc_searcher.uprn)
diff --git a/etl/epc/Dataset.py b/etl/epc/Dataset.py
index 83a85b78..5d3720fc 100644
--- a/etl/epc/Dataset.py
+++ b/etl/epc/Dataset.py
@@ -1,9 +1,16 @@
-import numpy as np
 import pandas as pd
 from typing import List
 from etl.epc.Record import EPCDifferenceRecord
 from etl.epc.ValidationConfiguration import DatasetValidationConfiguration
 from etl.epc.settings import EARLIEST_EPC_DATE
+from etl.epc_clean.epc_attributes.WallAttributes import WallAttributes
+from etl.epc_clean.epc_attributes.FloorAttributes import FloorAttributes
+from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes
+from etl.epc_clean.epc_attributes.HotWaterAttributes import HotWaterAttributes
+from etl.epc_clean.epc_attributes.MainheatAttributes import MainHeatAttributes
+from etl.epc_clean.epc_attributes.MainheatControlAttributes import MainheatControlAttributes
+from etl.epc_clean.epc_attributes.WindowAttributes import WindowAttributes
+from etl.epc_clean.epc_attributes.MainFuelAttributes import MainFuelAttributes
 
 from recommendations.rdsap_tables import england_wales_age_band_lookup
 from recommendations.recommendation_utils import (
@@ -492,6 +499,7 @@ class TrainingDataset(BaseDataset):
         """
 
         if component == "walls":
+
             expanded_df = expanded_df[
                 (expanded_df["is_cavity_wall"] == expanded_df["is_cavity_wall_ending"])
                 & (
@@ -657,6 +665,17 @@ class TrainingDataset(BaseDataset):
 
         components_to_expand = cols_to_drop.keys()
 
+        cleaning_lookup = {
+            "walls": WallAttributes,
+            "floor": FloorAttributes,
+            "roof": RoofAttributes,
+            "hotwater": HotWaterAttributes,
+            "mainheat": MainHeatAttributes,
+            "mainheatcont": MainheatControlAttributes,
+            "windows": WindowAttributes,
+            "main-fuel": MainFuelAttributes,
+        }
+
         for component in components_to_expand:
             # TODO: change cleaned dataframe to have underscores instead of dashes
             if component == "main-fuel":
@@ -675,6 +694,35 @@ class TrainingDataset(BaseDataset):
 
             cleaned_lookup_df_for_key = pd.DataFrame(cleaned_lookup[cleaned_key])
 
+            # We handle a specific edge case where we're missing information for the original description
+            descriptions = [x for x in self.df[left_on_starting].unique() if pd.notnull(x)]
+            # take any not in the cleaned lookup
+            missing_descriptions = [
+                x for x in descriptions if x not in cleaned_lookup_df_for_key["original_description"].values
+            ]
+            if missing_descriptions:
+                # We handle them here
+                cleaner = cleaning_lookup[component]
+                cleaned_data = []
+                for x in missing_descriptions:
+                    desc_cleaner = cleaner(x)
+                    cleaned = desc_cleaner.process()
+                    cleaned_data.append(
+                        {
+                            "original_description": x,
+                            "clean_description": desc_cleaner.description.replace("(assumed)",
+                                                                                  "").rstrip().capitalize(),
+                            **cleaned
+                        }
+                    )
+                cleaned_lookup_df_for_key = pd.concat(
+                    [
+                        cleaned_lookup_df_for_key,
+                        pd.DataFrame(cleaned_data),
+                    ],
+                    ignore_index=True,
+                )
+
             expanded_df = self.df.merge(
                 cleaned_lookup_df_for_key,
                 how="left",
diff --git a/etl/spatial/OpenUprnClient.py b/etl/spatial/OpenUprnClient.py
index c0cd3992..36cf2d7b 100644
--- a/etl/spatial/OpenUprnClient.py
+++ b/etl/spatial/OpenUprnClient.py
@@ -139,7 +139,7 @@ class OpenUprnClient:
         uprn_filenames = read_dataframe_from_s3_parquet(
             bucket_name=bucket_name, file_key="spatial/filename_meta.parquet"
         )
-
+        # If we have a domna asset list, we
         uprns = [p.uprn for p in input_properties if p.uprn_source != SearchEpc.UPRN_SOURCE_SIMULATED]
         uprn_map = cls.make_uprn_map(uprns, uprn_filenames)
 
diff --git a/recommendations/WallRecommendations.py b/recommendations/WallRecommendations.py
index dbb7d674..3a2815bc 100644
--- a/recommendations/WallRecommendations.py
+++ b/recommendations/WallRecommendations.py
@@ -498,24 +498,33 @@ class WallRecommendations(Definitions):
         Helper function to set the starting simulation config
         """
 
-        simulation_config = {}
-        if self.property.data["walls-energy-eff"] not in ["Good", "Very Good"]:
-            if wall_ending_config["is_cavity_wall"]:
-                efficiency_data = [
-                    x for x in cavity_wall_energy_eff if
-                    x["construction-age-band"] == self.property.construction_age_band
-                ][0]
-            elif wall_ending_config["internal_insulation"]:
-                efficiency_data = [
-                    x for x in iwi_energy_eff if
-                    x["construction-age-band"] == self.property.construction_age_band
-                ][0]
-            else:
-                efficiency_data = [
-                    x for x in ewi_energy_eff if
-                    x["construction-age-band"] == self.property.construction_age_band
-                ][0]
+        if wall_ending_config["is_cavity_wall"]:
+            efficiency_data = [
+                x for x in cavity_wall_energy_eff if
+                x["construction-age-band"] == self.property.construction_age_band
+            ][0]
+        elif wall_ending_config["internal_insulation"]:
+            efficiency_data = [
+                x for x in iwi_energy_eff if
+                x["construction-age-band"] == self.property.construction_age_band
+            ][0]
+        else:
+            efficiency_data = [
+                x for x in ewi_energy_eff if
+                x["construction-age-band"] == self.property.construction_age_band
+            ][0]
 
+        if self.property.data["walls-energy-eff"] == "Good" and efficiency_data["walls-energy-eff"] not in [
+            "Good", "Very Good"
+        ]:
+            simulation_config = {
+                "walls_energy_eff_ending": self.property.data["walls-energy-eff"]
+            }
+        elif self.property.data["walls-energy-eff"] == "Very Good":
+            simulation_config = {
+                "walls_energy_eff_ending": "Very Good"
+            }
+        else:
             simulation_config = {
                 "walls_energy_eff_ending": efficiency_data["walls-energy-eff"]
             }