fixing engine api

2026-07-27 23:35:01 +00:00 · 2025-07-22 17:05:03 +01:00 · 2025-07-22 17:05:03 +01:00 · 802da66ce9
commit 802da66ce9
parent 4d4e43c048
6 changed files with 90 additions and 22 deletions
--- a/backend/Property.py
+++ b/backend/Property.py
@ -292,9 +292,7 @@ class Property:
            self.epc_record, fixed_data
        )

-        self.base_difference_record = TrainingDataset(
-            datasets=[difference_record], cleaned_lookup=cleaned_lookup
-        )
+        self.base_difference_record = TrainingDataset(datasets=[difference_record], cleaned_lookup=cleaned_lookup)

        # If we have variables that have been given to us by the landlord that we know are correct, whereas the EPC
        # may not be, we use them
--- a/backend/app/assumptions.py
+++ b/backend/app/assumptions.py
@ -71,6 +71,8 @@ DESCRIPTIONS_TO_FUEL_TYPES = {
    },
    'Electric instantaneous at point of use, plus solar': {"fuel": "Electricity + Solar Thermal", "cop": 1},
    "Electric storage heaters, Room heaters, electric": {"fuel": "Electricity", "cop": 1},
+    'Boiler and underfloor heating, oil': {"fuel": "Oil", "cop": 0.85},
+    "Boiler and radiators, smokeless fuel": {"fuel": "Smokeless Fuel", "cop": 0.85},
 }

 # These are the measure types where if there is a ventilation recommendation, we force the inclusion of it
--- a/backend/engine/engine.py
+++ b/backend/engine/engine.py
@ -516,6 +516,14 @@ async def model_engine(body: PlanTriggerRequest):

        input_properties = []
        for config in tqdm(plan_input):
+
+            if config["landlord_property_id"] in ["LE113NWIC95", "NG241FBCT", "NG51BNIC"]:
+                continue
+
+            if not pd.isnull(config.get("uprn")):
+                if int(float(config.get("uprn"))) < 0:
+                    continue
+
            # We validate each record in the file. If the record is NOT valid, we need to handle this accordingly
            uprn = config.get("uprn", None)
            if pd.isnull(uprn):
@ -534,6 +542,9 @@ async def model_engine(body: PlanTriggerRequest):
            epc_searcher.ordnance_survey_client.property_type = config.get("property_type", None)
            # For the moment, our OS API access is unavailable, so we skip and interpolate
            epc_searcher.find_property(skip_os=True)
+            # TODO: Placeholder
+            if epc_searcher.newest_epc.get("estimated") and body.file_format == "domna_asset_list":
+                epc_searcher.newest_epc["uprn-source"] = epc_searcher.UPRN_SOURCE_SIMULATED

            # We check for an energy assessment we have performed on this property:
            energy_assessment = get_latest_assessment_by_uprn(session, uprn if uprn is not None else epc_searcher.uprn)
--- a/etl/epc/Dataset.py
+++ b/etl/epc/Dataset.py
@ -1,9 +1,16 @@
-import numpy as np
 import pandas as pd
 from typing import List
 from etl.epc.Record import EPCDifferenceRecord
 from etl.epc.ValidationConfiguration import DatasetValidationConfiguration
 from etl.epc.settings import EARLIEST_EPC_DATE
+from etl.epc_clean.epc_attributes.WallAttributes import WallAttributes
+from etl.epc_clean.epc_attributes.FloorAttributes import FloorAttributes
+from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes
+from etl.epc_clean.epc_attributes.HotWaterAttributes import HotWaterAttributes
+from etl.epc_clean.epc_attributes.MainheatAttributes import MainHeatAttributes
+from etl.epc_clean.epc_attributes.MainheatControlAttributes import MainheatControlAttributes
+from etl.epc_clean.epc_attributes.WindowAttributes import WindowAttributes
+from etl.epc_clean.epc_attributes.MainFuelAttributes import MainFuelAttributes

 from recommendations.rdsap_tables import england_wales_age_band_lookup
 from recommendations.recommendation_utils import (
@ -492,6 +499,7 @@ class TrainingDataset(BaseDataset):
        """

        if component == "walls":
+
            expanded_df = expanded_df[
                (expanded_df["is_cavity_wall"] == expanded_df["is_cavity_wall_ending"])
                & (
@ -657,6 +665,17 @@ class TrainingDataset(BaseDataset):

        components_to_expand = cols_to_drop.keys()

+        cleaning_lookup = {
+            "walls": WallAttributes,
+            "floor": FloorAttributes,
+            "roof": RoofAttributes,
+            "hotwater": HotWaterAttributes,
+            "mainheat": MainHeatAttributes,
+            "mainheatcont": MainheatControlAttributes,
+            "windows": WindowAttributes,
+            "main-fuel": MainFuelAttributes,
+        }
+
        for component in components_to_expand:
            # TODO: change cleaned dataframe to have underscores instead of dashes
            if component == "main-fuel":
@ -675,6 +694,35 @@ class TrainingDataset(BaseDataset):

            cleaned_lookup_df_for_key = pd.DataFrame(cleaned_lookup[cleaned_key])

+            # We handle a specific edge case where we're missing information for the original description
+            descriptions = [x for x in self.df[left_on_starting].unique() if pd.notnull(x)]
+            # take any not in the cleaned lookup
+            missing_descriptions = [
+                x for x in descriptions if x not in cleaned_lookup_df_for_key["original_description"].values
+            ]
+            if missing_descriptions:
+                # We handle them here
+                cleaner = cleaning_lookup[component]
+                cleaned_data = []
+                for x in missing_descriptions:
+                    desc_cleaner = cleaner(x)
+                    cleaned = desc_cleaner.process()
+                    cleaned_data.append(
+                        {
+                            "original_description": x,
+                            "clean_description": desc_cleaner.description.replace("(assumed)",
+                                                                                  "").rstrip().capitalize(),
+                            **cleaned
+                        }
+                    )
+                cleaned_lookup_df_for_key = pd.concat(
+                    [
+                        cleaned_lookup_df_for_key,
+                        pd.DataFrame(cleaned_data),
+                    ],
+                    ignore_index=True,
+                )
+
            expanded_df = self.df.merge(
                cleaned_lookup_df_for_key,
                how="left",
--- a/etl/spatial/OpenUprnClient.py
+++ b/etl/spatial/OpenUprnClient.py
@ -139,7 +139,7 @@ class OpenUprnClient:
        uprn_filenames = read_dataframe_from_s3_parquet(
            bucket_name=bucket_name, file_key="spatial/filename_meta.parquet"
        )
-
+        # If we have a domna asset list, we
        uprns = [p.uprn for p in input_properties if p.uprn_source != SearchEpc.UPRN_SOURCE_SIMULATED]
        uprn_map = cls.make_uprn_map(uprns, uprn_filenames)

--- a/recommendations/WallRecommendations.py
+++ b/recommendations/WallRecommendations.py
@ -498,24 +498,33 @@ class WallRecommendations(Definitions):
        Helper function to set the starting simulation config
        """

-        simulation_config = {}
-        if self.property.data["walls-energy-eff"] not in ["Good", "Very Good"]:
-            if wall_ending_config["is_cavity_wall"]:
-                efficiency_data = [
-                    x for x in cavity_wall_energy_eff if
-                    x["construction-age-band"] == self.property.construction_age_band
-                ][0]
-            elif wall_ending_config["internal_insulation"]:
-                efficiency_data = [
-                    x for x in iwi_energy_eff if
-                    x["construction-age-band"] == self.property.construction_age_band
-                ][0]
-            else:
-                efficiency_data = [
-                    x for x in ewi_energy_eff if
-                    x["construction-age-band"] == self.property.construction_age_band
-                ][0]
+        if wall_ending_config["is_cavity_wall"]:
+            efficiency_data = [
+                x for x in cavity_wall_energy_eff if
+                x["construction-age-band"] == self.property.construction_age_band
+            ][0]
+        elif wall_ending_config["internal_insulation"]:
+            efficiency_data = [
+                x for x in iwi_energy_eff if
+                x["construction-age-band"] == self.property.construction_age_band
+            ][0]
+        else:
+            efficiency_data = [
+                x for x in ewi_energy_eff if
+                x["construction-age-band"] == self.property.construction_age_band
+            ][0]

+        if self.property.data["walls-energy-eff"] == "Good" and efficiency_data["walls-energy-eff"] not in [
+            "Good", "Very Good"
+        ]:
+            simulation_config = {
+                "walls_energy_eff_ending": self.property.data["walls-energy-eff"]
+            }
+        elif self.property.data["walls-energy-eff"] == "Very Good":
+            simulation_config = {
+                "walls_energy_eff_ending": "Very Good"
+            }
+        else:
            simulation_config = {
                "walls_energy_eff_ending": efficiency_data["walls-energy-eff"]
            }