From 4d4e43c0489d367726bc47d35adbbc1e090c100b Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 22 Jul 2025 12:45:07 +0100
Subject: [PATCH] fixed requests

---
 backend/app/plan/schemas.py          |  4 +-
 backend/engine/engine.py             | 69 +++++++++++++++++++++++++++-
 etl/find_my_epc/RetrieveFindMyEpc.py |  1 +
 3 files changed, 70 insertions(+), 4 deletions(-)

diff --git a/backend/app/plan/schemas.py b/backend/app/plan/schemas.py
index 85a48a6f..a6d21ae7 100644
--- a/backend/app/plan/schemas.py
+++ b/backend/app/plan/schemas.py
@@ -105,8 +105,8 @@ class PlanTriggerRequest(BaseModel):
 
     # Add in optional fields which describe the format of the asset list being used
 
-    file_type: Optional[Literal["csv", "xlsx"]] = None,
-    file_format: Optional[Literal["domna_asset_list"]] = None,
+    file_type: Optional[Literal["csv", "xlsx"]] = None
+    file_format: Optional[Literal["domna_asset_list"]] = None
     sheet_name: Optional[str] = None
     # If one of index_start or index_end is set, the other must be set too
     index_start: Optional[int] = None
diff --git a/backend/engine/engine.py b/backend/engine/engine.py
index 318f4a0e..6c4be199 100644
--- a/backend/engine/engine.py
+++ b/backend/engine/engine.py
@@ -4,6 +4,7 @@ from datetime import datetime
 
 from tqdm import tqdm
 import pandas as pd
+import numpy as np
 from etl.epc.Record import EPCRecord
 from backend.SearchEpc import SearchEpc
 from sqlalchemy.exc import IntegrityError, OperationalError
@@ -37,7 +38,7 @@ from recommendations.optimiser.GainOptimiser import GainOptimiser
 from recommendations.optimiser.optimiser_functions import prepare_input_measures
 from recommendations.Recommendations import Recommendations
 from utils.logger import setup_logger
-from utils.s3 import read_dataframe_from_s3_parquet, read_csv_from_s3
+from utils.s3 import read_dataframe_from_s3_parquet, read_csv_from_s3, read_excel_from_s3
 from backend.ml_models.Valuation import PropertyValuation
 
 from etl.bill_savings.KwhData import KwhData
@@ -435,7 +436,69 @@ async def model_engine(body: PlanTriggerRequest):
     try:
         session.begin()
         logger.info("Getting the inputs")
-        plan_input = read_csv_from_s3(bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.trigger_file_path)
+
+        if body.file_type == "xlsx":
+            plan_input = read_excel_from_s3(
+                bucket_name=get_settings().PLAN_TRIGGER_BUCKET,
+                file_key=body.trigger_file_path,
+                sheet_name=body.sheet_name,
+                header_row=0,
+            )
+
+            # We now handle the case where the input data is a Domna standardised assset list
+            if body.file_format == "domna_asset_list":
+                # We rename the columns to match the expected format
+                plan_input = plan_input.rename(
+                    columns={"domna_address_1": "address", "domna_postcode": "postcode", "epc_os_uprn": "uprn"}
+                )
+                # Where the EPC has been estimated, that is because a UPRN wasn't avaialble and so we remote UPRN
+                plan_input["uprn"] = np.where(plan_input["estimated"].isin([1, True]), None, plan_input["uprn"])
+                # We handle the landlord property type and built form
+                plan_input["property_type"] = plan_input["landlord_property_type"].copy()
+                plan_input["built_form"] = plan_input["landlord_built_form"].copy()
+                plan_input["property_type"] = np.where(
+                    plan_input["property_type"] == "unknown",
+                    plan_input["epc_property_type"],
+                    plan_input["property_type"]
+                )
+                plan_input["built_form"] = np.where(
+                    plan_input["built_form"] == "unknown", plan_input["epc_archetype"], plan_input["built_form"]
+                )
+                property_type_map = {
+                    "house": "House",
+                    "flat": "Flat",
+                    "maisonette": "Maisonette",
+                    "bungalow": "Bungalow",
+                    "block house": "House",
+                    "coach house": "House",
+                    "bedsit": "Flat"
+                }
+
+                built_form_map = {
+                    "mid-terrace": "Mid-Terrace",
+                    "end-terrace": "End-Terrace",
+                    "semi-detached": "Semi-Detached",
+                    "detached": "Detached",
+                    "enclosed end-terrace": "Enclosed End-Terrace",
+                    "enclosed mid-terrace": "Enclosed Mid-Terrace",
+                }
+                # We remap the values to match the EPC expected formats
+                plan_input["property_type"] = plan_input["property_type"].map(property_type_map)
+                plan_input["built_form"] = plan_input["built_form"].map(built_form_map)
+
+                plan_input = plan_input.to_dict("records")
+            else:
+                raise ValueError("Other formats not yet supported")
+
+        else:
+            plan_input = read_csv_from_s3(
+                bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.trigger_file_path
+            )
+
+        # We then slide it on the indexes if they are provided
+        if body.index_start is not None and body.index_end is not None:
+            plan_input = plan_input[body.index_start:body.index_end]
+
         # Check for duplicate UPRNS
         input_uprns = [x.get("uprn") for x in plan_input if "uprn" in x and x.get("uprn")]
 
@@ -455,6 +518,8 @@ async def model_engine(body: PlanTriggerRequest):
         for config in tqdm(plan_input):
             # We validate each record in the file. If the record is NOT valid, we need to handle this accordingly
             uprn = config.get("uprn", None)
+            if pd.isnull(uprn):
+                uprn = None
             if uprn:
                 uprn = int(float(uprn))
 
diff --git a/etl/find_my_epc/RetrieveFindMyEpc.py b/etl/find_my_epc/RetrieveFindMyEpc.py
index 766de840..21794284 100644
--- a/etl/find_my_epc/RetrieveFindMyEpc.py
+++ b/etl/find_my_epc/RetrieveFindMyEpc.py
@@ -684,6 +684,7 @@ class RetrieveFindMyEpc:
             ],
             "Increase loft insulation to 250mm": ["loft_insulation"],
             "Solar photovoltaics panels, 25% of roof area": ["solar_pv"],
+            'Air or ground source heat pump': ["air_source_heat_pump"],
         }
 
         survey = True