From 4d4e43c0489d367726bc47d35adbbc1e090c100b Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 22 Jul 2025 12:45:07 +0100 Subject: [PATCH] fixed requests --- backend/app/plan/schemas.py | 4 +- backend/engine/engine.py | 69 +++++++++++++++++++++++++++- etl/find_my_epc/RetrieveFindMyEpc.py | 1 + 3 files changed, 70 insertions(+), 4 deletions(-) diff --git a/backend/app/plan/schemas.py b/backend/app/plan/schemas.py index 85a48a6f..a6d21ae7 100644 --- a/backend/app/plan/schemas.py +++ b/backend/app/plan/schemas.py @@ -105,8 +105,8 @@ class PlanTriggerRequest(BaseModel): # Add in optional fields which describe the format of the asset list being used - file_type: Optional[Literal["csv", "xlsx"]] = None, - file_format: Optional[Literal["domna_asset_list"]] = None, + file_type: Optional[Literal["csv", "xlsx"]] = None + file_format: Optional[Literal["domna_asset_list"]] = None sheet_name: Optional[str] = None # If one of index_start or index_end is set, the other must be set too index_start: Optional[int] = None diff --git a/backend/engine/engine.py b/backend/engine/engine.py index 318f4a0e..6c4be199 100644 --- a/backend/engine/engine.py +++ b/backend/engine/engine.py @@ -4,6 +4,7 @@ from datetime import datetime from tqdm import tqdm import pandas as pd +import numpy as np from etl.epc.Record import EPCRecord from backend.SearchEpc import SearchEpc from sqlalchemy.exc import IntegrityError, OperationalError @@ -37,7 +38,7 @@ from recommendations.optimiser.GainOptimiser import GainOptimiser from recommendations.optimiser.optimiser_functions import prepare_input_measures from recommendations.Recommendations import Recommendations from utils.logger import setup_logger -from utils.s3 import read_dataframe_from_s3_parquet, read_csv_from_s3 +from utils.s3 import read_dataframe_from_s3_parquet, read_csv_from_s3, read_excel_from_s3 from backend.ml_models.Valuation import PropertyValuation from etl.bill_savings.KwhData import KwhData @@ -435,7 +436,69 @@ async def model_engine(body: PlanTriggerRequest): try: session.begin() logger.info("Getting the inputs") - plan_input = read_csv_from_s3(bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.trigger_file_path) + + if body.file_type == "xlsx": + plan_input = read_excel_from_s3( + bucket_name=get_settings().PLAN_TRIGGER_BUCKET, + file_key=body.trigger_file_path, + sheet_name=body.sheet_name, + header_row=0, + ) + + # We now handle the case where the input data is a Domna standardised assset list + if body.file_format == "domna_asset_list": + # We rename the columns to match the expected format + plan_input = plan_input.rename( + columns={"domna_address_1": "address", "domna_postcode": "postcode", "epc_os_uprn": "uprn"} + ) + # Where the EPC has been estimated, that is because a UPRN wasn't avaialble and so we remote UPRN + plan_input["uprn"] = np.where(plan_input["estimated"].isin([1, True]), None, plan_input["uprn"]) + # We handle the landlord property type and built form + plan_input["property_type"] = plan_input["landlord_property_type"].copy() + plan_input["built_form"] = plan_input["landlord_built_form"].copy() + plan_input["property_type"] = np.where( + plan_input["property_type"] == "unknown", + plan_input["epc_property_type"], + plan_input["property_type"] + ) + plan_input["built_form"] = np.where( + plan_input["built_form"] == "unknown", plan_input["epc_archetype"], plan_input["built_form"] + ) + property_type_map = { + "house": "House", + "flat": "Flat", + "maisonette": "Maisonette", + "bungalow": "Bungalow", + "block house": "House", + "coach house": "House", + "bedsit": "Flat" + } + + built_form_map = { + "mid-terrace": "Mid-Terrace", + "end-terrace": "End-Terrace", + "semi-detached": "Semi-Detached", + "detached": "Detached", + "enclosed end-terrace": "Enclosed End-Terrace", + "enclosed mid-terrace": "Enclosed Mid-Terrace", + } + # We remap the values to match the EPC expected formats + plan_input["property_type"] = plan_input["property_type"].map(property_type_map) + plan_input["built_form"] = plan_input["built_form"].map(built_form_map) + + plan_input = plan_input.to_dict("records") + else: + raise ValueError("Other formats not yet supported") + + else: + plan_input = read_csv_from_s3( + bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.trigger_file_path + ) + + # We then slide it on the indexes if they are provided + if body.index_start is not None and body.index_end is not None: + plan_input = plan_input[body.index_start:body.index_end] + # Check for duplicate UPRNS input_uprns = [x.get("uprn") for x in plan_input if "uprn" in x and x.get("uprn")] @@ -455,6 +518,8 @@ async def model_engine(body: PlanTriggerRequest): for config in tqdm(plan_input): # We validate each record in the file. If the record is NOT valid, we need to handle this accordingly uprn = config.get("uprn", None) + if pd.isnull(uprn): + uprn = None if uprn: uprn = int(float(uprn)) diff --git a/etl/find_my_epc/RetrieveFindMyEpc.py b/etl/find_my_epc/RetrieveFindMyEpc.py index 766de840..21794284 100644 --- a/etl/find_my_epc/RetrieveFindMyEpc.py +++ b/etl/find_my_epc/RetrieveFindMyEpc.py @@ -684,6 +684,7 @@ class RetrieveFindMyEpc: ], "Increase loft insulation to 250mm": ["loft_insulation"], "Solar photovoltaics panels, 25% of roof area": ["solar_pv"], + 'Air or ground source heat pump': ["air_source_heat_pump"], } survey = True