fixed requests

2026-07-27 23:35:01 +00:00 · 2025-07-22 12:45:07 +01:00 · 2025-07-22 12:45:07 +01:00 · 4d4e43c048
commit 4d4e43c048
parent 5e717b73b2
3 changed files with 70 additions and 4 deletions
--- a/backend/app/plan/schemas.py
+++ b/backend/app/plan/schemas.py
@ -105,8 +105,8 @@ class PlanTriggerRequest(BaseModel):

    # Add in optional fields which describe the format of the asset list being used

-    file_type: Optional[Literal["csv", "xlsx"]] = None,
-    file_format: Optional[Literal["domna_asset_list"]] = None,
+    file_type: Optional[Literal["csv", "xlsx"]] = None
+    file_format: Optional[Literal["domna_asset_list"]] = None
    sheet_name: Optional[str] = None
    # If one of index_start or index_end is set, the other must be set too
    index_start: Optional[int] = None
--- a/backend/engine/engine.py
+++ b/backend/engine/engine.py
@ -4,6 +4,7 @@ from datetime import datetime

 from tqdm import tqdm
 import pandas as pd
+import numpy as np
 from etl.epc.Record import EPCRecord
 from backend.SearchEpc import SearchEpc
 from sqlalchemy.exc import IntegrityError, OperationalError
@ -37,7 +38,7 @@ from recommendations.optimiser.GainOptimiser import GainOptimiser
 from recommendations.optimiser.optimiser_functions import prepare_input_measures
 from recommendations.Recommendations import Recommendations
 from utils.logger import setup_logger
-from utils.s3 import read_dataframe_from_s3_parquet, read_csv_from_s3
+from utils.s3 import read_dataframe_from_s3_parquet, read_csv_from_s3, read_excel_from_s3
 from backend.ml_models.Valuation import PropertyValuation

 from etl.bill_savings.KwhData import KwhData
@ -435,7 +436,69 @@ async def model_engine(body: PlanTriggerRequest):
    try:
        session.begin()
        logger.info("Getting the inputs")
-        plan_input = read_csv_from_s3(bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.trigger_file_path)
+
+        if body.file_type == "xlsx":
+            plan_input = read_excel_from_s3(
+                bucket_name=get_settings().PLAN_TRIGGER_BUCKET,
+                file_key=body.trigger_file_path,
+                sheet_name=body.sheet_name,
+                header_row=0,
+            )
+
+            # We now handle the case where the input data is a Domna standardised assset list
+            if body.file_format == "domna_asset_list":
+                # We rename the columns to match the expected format
+                plan_input = plan_input.rename(
+                    columns={"domna_address_1": "address", "domna_postcode": "postcode", "epc_os_uprn": "uprn"}
+                )
+                # Where the EPC has been estimated, that is because a UPRN wasn't avaialble and so we remote UPRN
+                plan_input["uprn"] = np.where(plan_input["estimated"].isin([1, True]), None, plan_input["uprn"])
+                # We handle the landlord property type and built form
+                plan_input["property_type"] = plan_input["landlord_property_type"].copy()
+                plan_input["built_form"] = plan_input["landlord_built_form"].copy()
+                plan_input["property_type"] = np.where(
+                    plan_input["property_type"] == "unknown",
+                    plan_input["epc_property_type"],
+                    plan_input["property_type"]
+                )
+                plan_input["built_form"] = np.where(
+                    plan_input["built_form"] == "unknown", plan_input["epc_archetype"], plan_input["built_form"]
+                )
+                property_type_map = {
+                    "house": "House",
+                    "flat": "Flat",
+                    "maisonette": "Maisonette",
+                    "bungalow": "Bungalow",
+                    "block house": "House",
+                    "coach house": "House",
+                    "bedsit": "Flat"
+                }
+
+                built_form_map = {
+                    "mid-terrace": "Mid-Terrace",
+                    "end-terrace": "End-Terrace",
+                    "semi-detached": "Semi-Detached",
+                    "detached": "Detached",
+                    "enclosed end-terrace": "Enclosed End-Terrace",
+                    "enclosed mid-terrace": "Enclosed Mid-Terrace",
+                }
+                # We remap the values to match the EPC expected formats
+                plan_input["property_type"] = plan_input["property_type"].map(property_type_map)
+                plan_input["built_form"] = plan_input["built_form"].map(built_form_map)
+
+                plan_input = plan_input.to_dict("records")
+            else:
+                raise ValueError("Other formats not yet supported")
+
+        else:
+            plan_input = read_csv_from_s3(
+                bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.trigger_file_path
+            )
+
+        # We then slide it on the indexes if they are provided
+        if body.index_start is not None and body.index_end is not None:
+            plan_input = plan_input[body.index_start:body.index_end]
+
        # Check for duplicate UPRNS
        input_uprns = [x.get("uprn") for x in plan_input if "uprn" in x and x.get("uprn")]

@ -455,6 +518,8 @@ async def model_engine(body: PlanTriggerRequest):
        for config in tqdm(plan_input):
            # We validate each record in the file. If the record is NOT valid, we need to handle this accordingly
            uprn = config.get("uprn", None)
+            if pd.isnull(uprn):
+                uprn = None
            if uprn:
                uprn = int(float(uprn))

--- a/etl/find_my_epc/RetrieveFindMyEpc.py
+++ b/etl/find_my_epc/RetrieveFindMyEpc.py
@ -684,6 +684,7 @@ class RetrieveFindMyEpc:
            ],
            "Increase loft insulation to 250mm": ["loft_insulation"],
            "Solar photovoltaics panels, 25% of roof area": ["solar_pv"],
+            'Air or ground source heat pump': ["air_source_heat_pump"],
        }

        survey = True