fixed requests

This commit is contained in:
Khalim Conn-Kowlessar 2025-07-22 12:45:07 +01:00
parent 5e717b73b2
commit 4d4e43c048
3 changed files with 70 additions and 4 deletions

View file

@ -105,8 +105,8 @@ class PlanTriggerRequest(BaseModel):
# Add in optional fields which describe the format of the asset list being used
file_type: Optional[Literal["csv", "xlsx"]] = None,
file_format: Optional[Literal["domna_asset_list"]] = None,
file_type: Optional[Literal["csv", "xlsx"]] = None
file_format: Optional[Literal["domna_asset_list"]] = None
sheet_name: Optional[str] = None
# If one of index_start or index_end is set, the other must be set too
index_start: Optional[int] = None

View file

@ -4,6 +4,7 @@ from datetime import datetime
from tqdm import tqdm
import pandas as pd
import numpy as np
from etl.epc.Record import EPCRecord
from backend.SearchEpc import SearchEpc
from sqlalchemy.exc import IntegrityError, OperationalError
@ -37,7 +38,7 @@ from recommendations.optimiser.GainOptimiser import GainOptimiser
from recommendations.optimiser.optimiser_functions import prepare_input_measures
from recommendations.Recommendations import Recommendations
from utils.logger import setup_logger
from utils.s3 import read_dataframe_from_s3_parquet, read_csv_from_s3
from utils.s3 import read_dataframe_from_s3_parquet, read_csv_from_s3, read_excel_from_s3
from backend.ml_models.Valuation import PropertyValuation
from etl.bill_savings.KwhData import KwhData
@ -435,7 +436,69 @@ async def model_engine(body: PlanTriggerRequest):
try:
session.begin()
logger.info("Getting the inputs")
plan_input = read_csv_from_s3(bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.trigger_file_path)
if body.file_type == "xlsx":
plan_input = read_excel_from_s3(
bucket_name=get_settings().PLAN_TRIGGER_BUCKET,
file_key=body.trigger_file_path,
sheet_name=body.sheet_name,
header_row=0,
)
# We now handle the case where the input data is a Domna standardised assset list
if body.file_format == "domna_asset_list":
# We rename the columns to match the expected format
plan_input = plan_input.rename(
columns={"domna_address_1": "address", "domna_postcode": "postcode", "epc_os_uprn": "uprn"}
)
# Where the EPC has been estimated, that is because a UPRN wasn't avaialble and so we remote UPRN
plan_input["uprn"] = np.where(plan_input["estimated"].isin([1, True]), None, plan_input["uprn"])
# We handle the landlord property type and built form
plan_input["property_type"] = plan_input["landlord_property_type"].copy()
plan_input["built_form"] = plan_input["landlord_built_form"].copy()
plan_input["property_type"] = np.where(
plan_input["property_type"] == "unknown",
plan_input["epc_property_type"],
plan_input["property_type"]
)
plan_input["built_form"] = np.where(
plan_input["built_form"] == "unknown", plan_input["epc_archetype"], plan_input["built_form"]
)
property_type_map = {
"house": "House",
"flat": "Flat",
"maisonette": "Maisonette",
"bungalow": "Bungalow",
"block house": "House",
"coach house": "House",
"bedsit": "Flat"
}
built_form_map = {
"mid-terrace": "Mid-Terrace",
"end-terrace": "End-Terrace",
"semi-detached": "Semi-Detached",
"detached": "Detached",
"enclosed end-terrace": "Enclosed End-Terrace",
"enclosed mid-terrace": "Enclosed Mid-Terrace",
}
# We remap the values to match the EPC expected formats
plan_input["property_type"] = plan_input["property_type"].map(property_type_map)
plan_input["built_form"] = plan_input["built_form"].map(built_form_map)
plan_input = plan_input.to_dict("records")
else:
raise ValueError("Other formats not yet supported")
else:
plan_input = read_csv_from_s3(
bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.trigger_file_path
)
# We then slide it on the indexes if they are provided
if body.index_start is not None and body.index_end is not None:
plan_input = plan_input[body.index_start:body.index_end]
# Check for duplicate UPRNS
input_uprns = [x.get("uprn") for x in plan_input if "uprn" in x and x.get("uprn")]
@ -455,6 +518,8 @@ async def model_engine(body: PlanTriggerRequest):
for config in tqdm(plan_input):
# We validate each record in the file. If the record is NOT valid, we need to handle this accordingly
uprn = config.get("uprn", None)
if pd.isnull(uprn):
uprn = None
if uprn:
uprn = int(float(uprn))

View file

@ -684,6 +684,7 @@ class RetrieveFindMyEpc:
],
"Increase loft insulation to 250mm": ["loft_insulation"],
"Solar photovoltaics panels, 25% of roof area": ["solar_pv"],
'Air or ground source heat pump': ["air_source_heat_pump"],
}
survey = True