From 802da66ce919d414ad2fba0d892d34d939c6b407 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 22 Jul 2025 17:05:03 +0100 Subject: [PATCH] fixing engine api --- backend/Property.py | 4 +-- backend/app/assumptions.py | 2 ++ backend/engine/engine.py | 11 ++++++ etl/epc/Dataset.py | 50 +++++++++++++++++++++++++- etl/spatial/OpenUprnClient.py | 2 +- recommendations/WallRecommendations.py | 43 +++++++++++++--------- 6 files changed, 90 insertions(+), 22 deletions(-) diff --git a/backend/Property.py b/backend/Property.py index a8fd925b..82c60439 100644 --- a/backend/Property.py +++ b/backend/Property.py @@ -292,9 +292,7 @@ class Property: self.epc_record, fixed_data ) - self.base_difference_record = TrainingDataset( - datasets=[difference_record], cleaned_lookup=cleaned_lookup - ) + self.base_difference_record = TrainingDataset(datasets=[difference_record], cleaned_lookup=cleaned_lookup) # If we have variables that have been given to us by the landlord that we know are correct, whereas the EPC # may not be, we use them diff --git a/backend/app/assumptions.py b/backend/app/assumptions.py index d36266d3..d813e1a9 100644 --- a/backend/app/assumptions.py +++ b/backend/app/assumptions.py @@ -71,6 +71,8 @@ DESCRIPTIONS_TO_FUEL_TYPES = { }, 'Electric instantaneous at point of use, plus solar': {"fuel": "Electricity + Solar Thermal", "cop": 1}, "Electric storage heaters, Room heaters, electric": {"fuel": "Electricity", "cop": 1}, + 'Boiler and underfloor heating, oil': {"fuel": "Oil", "cop": 0.85}, + "Boiler and radiators, smokeless fuel": {"fuel": "Smokeless Fuel", "cop": 0.85}, } # These are the measure types where if there is a ventilation recommendation, we force the inclusion of it diff --git a/backend/engine/engine.py b/backend/engine/engine.py index 6c4be199..0591eed6 100644 --- a/backend/engine/engine.py +++ b/backend/engine/engine.py @@ -516,6 +516,14 @@ async def model_engine(body: PlanTriggerRequest): input_properties = [] for config in tqdm(plan_input): + + if config["landlord_property_id"] in ["LE113NWIC95", "NG241FBCT", "NG51BNIC"]: + continue + + if not pd.isnull(config.get("uprn")): + if int(float(config.get("uprn"))) < 0: + continue + # We validate each record in the file. If the record is NOT valid, we need to handle this accordingly uprn = config.get("uprn", None) if pd.isnull(uprn): @@ -534,6 +542,9 @@ async def model_engine(body: PlanTriggerRequest): epc_searcher.ordnance_survey_client.property_type = config.get("property_type", None) # For the moment, our OS API access is unavailable, so we skip and interpolate epc_searcher.find_property(skip_os=True) + # TODO: Placeholder + if epc_searcher.newest_epc.get("estimated") and body.file_format == "domna_asset_list": + epc_searcher.newest_epc["uprn-source"] = epc_searcher.UPRN_SOURCE_SIMULATED # We check for an energy assessment we have performed on this property: energy_assessment = get_latest_assessment_by_uprn(session, uprn if uprn is not None else epc_searcher.uprn) diff --git a/etl/epc/Dataset.py b/etl/epc/Dataset.py index 83a85b78..5d3720fc 100644 --- a/etl/epc/Dataset.py +++ b/etl/epc/Dataset.py @@ -1,9 +1,16 @@ -import numpy as np import pandas as pd from typing import List from etl.epc.Record import EPCDifferenceRecord from etl.epc.ValidationConfiguration import DatasetValidationConfiguration from etl.epc.settings import EARLIEST_EPC_DATE +from etl.epc_clean.epc_attributes.WallAttributes import WallAttributes +from etl.epc_clean.epc_attributes.FloorAttributes import FloorAttributes +from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes +from etl.epc_clean.epc_attributes.HotWaterAttributes import HotWaterAttributes +from etl.epc_clean.epc_attributes.MainheatAttributes import MainHeatAttributes +from etl.epc_clean.epc_attributes.MainheatControlAttributes import MainheatControlAttributes +from etl.epc_clean.epc_attributes.WindowAttributes import WindowAttributes +from etl.epc_clean.epc_attributes.MainFuelAttributes import MainFuelAttributes from recommendations.rdsap_tables import england_wales_age_band_lookup from recommendations.recommendation_utils import ( @@ -492,6 +499,7 @@ class TrainingDataset(BaseDataset): """ if component == "walls": + expanded_df = expanded_df[ (expanded_df["is_cavity_wall"] == expanded_df["is_cavity_wall_ending"]) & ( @@ -657,6 +665,17 @@ class TrainingDataset(BaseDataset): components_to_expand = cols_to_drop.keys() + cleaning_lookup = { + "walls": WallAttributes, + "floor": FloorAttributes, + "roof": RoofAttributes, + "hotwater": HotWaterAttributes, + "mainheat": MainHeatAttributes, + "mainheatcont": MainheatControlAttributes, + "windows": WindowAttributes, + "main-fuel": MainFuelAttributes, + } + for component in components_to_expand: # TODO: change cleaned dataframe to have underscores instead of dashes if component == "main-fuel": @@ -675,6 +694,35 @@ class TrainingDataset(BaseDataset): cleaned_lookup_df_for_key = pd.DataFrame(cleaned_lookup[cleaned_key]) + # We handle a specific edge case where we're missing information for the original description + descriptions = [x for x in self.df[left_on_starting].unique() if pd.notnull(x)] + # take any not in the cleaned lookup + missing_descriptions = [ + x for x in descriptions if x not in cleaned_lookup_df_for_key["original_description"].values + ] + if missing_descriptions: + # We handle them here + cleaner = cleaning_lookup[component] + cleaned_data = [] + for x in missing_descriptions: + desc_cleaner = cleaner(x) + cleaned = desc_cleaner.process() + cleaned_data.append( + { + "original_description": x, + "clean_description": desc_cleaner.description.replace("(assumed)", + "").rstrip().capitalize(), + **cleaned + } + ) + cleaned_lookup_df_for_key = pd.concat( + [ + cleaned_lookup_df_for_key, + pd.DataFrame(cleaned_data), + ], + ignore_index=True, + ) + expanded_df = self.df.merge( cleaned_lookup_df_for_key, how="left", diff --git a/etl/spatial/OpenUprnClient.py b/etl/spatial/OpenUprnClient.py index c0cd3992..36cf2d7b 100644 --- a/etl/spatial/OpenUprnClient.py +++ b/etl/spatial/OpenUprnClient.py @@ -139,7 +139,7 @@ class OpenUprnClient: uprn_filenames = read_dataframe_from_s3_parquet( bucket_name=bucket_name, file_key="spatial/filename_meta.parquet" ) - + # If we have a domna asset list, we uprns = [p.uprn for p in input_properties if p.uprn_source != SearchEpc.UPRN_SOURCE_SIMULATED] uprn_map = cls.make_uprn_map(uprns, uprn_filenames) diff --git a/recommendations/WallRecommendations.py b/recommendations/WallRecommendations.py index dbb7d674..3a2815bc 100644 --- a/recommendations/WallRecommendations.py +++ b/recommendations/WallRecommendations.py @@ -498,24 +498,33 @@ class WallRecommendations(Definitions): Helper function to set the starting simulation config """ - simulation_config = {} - if self.property.data["walls-energy-eff"] not in ["Good", "Very Good"]: - if wall_ending_config["is_cavity_wall"]: - efficiency_data = [ - x for x in cavity_wall_energy_eff if - x["construction-age-band"] == self.property.construction_age_band - ][0] - elif wall_ending_config["internal_insulation"]: - efficiency_data = [ - x for x in iwi_energy_eff if - x["construction-age-band"] == self.property.construction_age_band - ][0] - else: - efficiency_data = [ - x for x in ewi_energy_eff if - x["construction-age-band"] == self.property.construction_age_band - ][0] + if wall_ending_config["is_cavity_wall"]: + efficiency_data = [ + x for x in cavity_wall_energy_eff if + x["construction-age-band"] == self.property.construction_age_band + ][0] + elif wall_ending_config["internal_insulation"]: + efficiency_data = [ + x for x in iwi_energy_eff if + x["construction-age-band"] == self.property.construction_age_band + ][0] + else: + efficiency_data = [ + x for x in ewi_energy_eff if + x["construction-age-band"] == self.property.construction_age_band + ][0] + if self.property.data["walls-energy-eff"] == "Good" and efficiency_data["walls-energy-eff"] not in [ + "Good", "Very Good" + ]: + simulation_config = { + "walls_energy_eff_ending": self.property.data["walls-energy-eff"] + } + elif self.property.data["walls-energy-eff"] == "Very Good": + simulation_config = { + "walls_energy_eff_ending": "Very Good" + } + else: simulation_config = { "walls_energy_eff_ending": efficiency_data["walls-energy-eff"] }