From abc300b40615d4382f3b0ed076b883bb97928a84 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 10 Feb 2026 14:31:28 +0000 Subject: [PATCH] merged --- asset_list/app.py | 32 ++++++++++++++-------------- asset_list/mappings/property_type.py | 7 +++++- backend/engine/engine.py | 16 ++++++++------ backend/ml_models/api.py | 11 ++++++++++ backend/onboarders/base.py | 2 +- etl/bill_savings/KwhData.py | 4 ++++ recommendations/Recommendations.py | 1 + utils/s3.py | 1 + 8 files changed, 49 insertions(+), 25 deletions(-) diff --git a/asset_list/app.py b/asset_list/app.py index b46254f9..30172121 100644 --- a/asset_list/app.py +++ b/asset_list/app.py @@ -69,24 +69,24 @@ def app(): Property UPRN """ - data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Hackney" - data_filename = "Domna SHF Wave 3 (3).xlsx" - sheet_name = "Domna Wave 3" + data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Aspire" + data_filename = "ASPIRE ASSET LIST.xlsx" + sheet_name = "Asset List" postcode_column = "Postcode" - address1_column = "Address 1" - address1_method = None - fulladdress_column = None - address_cols_to_concat = ["Address 1"] + address1_column = None + address1_method = "house_number_extraction" + fulladdress_column = "Address" + address_cols_to_concat = [] missing_postcodes_method = None - landlord_year_built = "Construction Years" - landlord_os_uprn = "UPRN" - landlord_property_type = "Type" - landlord_built_form = "Attachment" - landlord_wall_construction = "Wall type" + landlord_year_built = None + landlord_os_uprn = None + landlord_property_type = "Property Type" + landlord_built_form = None + landlord_wall_construction = None landlord_roof_construction = None landlord_heating_system = None landlord_existing_pv = None - landlord_property_id = "Row ID" + landlord_property_id = "LLUPRN" landlord_sap = None outcomes_filename = None outcomes_sheetname = None @@ -275,7 +275,7 @@ def app(): if skip is not None and not force_retrieve_data: if i <= skip: continue - chunk = asset_list.standardised_asset_list[i : i + chunk_size] + chunk = asset_list.standardised_asset_list[i: i + chunk_size] epc_data_chunk, errors_chunk, no_epc_chunk = get_data( df=chunk, row_id_name=asset_list.DOMNA_PROPERTY_ID, @@ -418,7 +418,7 @@ def app(): # Retrieve just the data we need epc_df = epc_df[ [asset_list.DOMNA_PROPERTY_ID] + list(asset_list.EPC_API_DATA_NAMES.keys()) - ].rename(columns=asset_list.EPC_API_DATA_NAMES) + ].rename(columns=asset_list.EPC_API_DATA_NAMES) # Look for columns not in the find my EPC data, which will have happened if we didn't # retrieve it in the first place @@ -435,7 +435,7 @@ def app(): find_my_epc_data[ [asset_list.DOMNA_PROPERTY_ID, "epc_has_floor_recommendation"] + list(asset_list.FIND_EPC_DATA_NAMES.keys()) - ].rename(columns=asset_list.FIND_EPC_DATA_NAMES), + ].rename(columns=asset_list.FIND_EPC_DATA_NAMES), how="left", on=asset_list.DOMNA_PROPERTY_ID, ) diff --git a/asset_list/mappings/property_type.py b/asset_list/mappings/property_type.py index 6f808c9a..177a7549 100644 --- a/asset_list/mappings/property_type.py +++ b/asset_list/mappings/property_type.py @@ -439,6 +439,11 @@ PROPERTY_MAPPING = { 'House: Semi Detached: Top Floor': 'house', 'House: End Terrace: Ground Floor': 'house', 'Maisonette: Enclosed End Terrace: Mid Floor': 'maisonette', - 'Flat: Enclosed Mid Terrace: Basement': 'flat' + 'Flat: Enclosed Mid Terrace: Basement': 'flat', + + 'Warden Bungalow': 'bungalow', + 'Warden Flat': 'flat', + 'Upper Floor Flat': 'flat', + 'Extracare Scheme': 'other' } diff --git a/backend/engine/engine.py b/backend/engine/engine.py index e833eb89..69726604 100644 --- a/backend/engine/engine.py +++ b/backend/engine/engine.py @@ -978,13 +978,15 @@ async def model_engine(body: PlanTriggerRequest): recommendations_scoring_data.extend(p.recommendations_scoring_data) logger.info("Preparing data for scoring in sap change api") - recommendations_scoring_data = pd.DataFrame(recommendations_scoring_data).drop( - columns=[ - "rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending", - "carbon_ending" - ] - ) - # Temp putting this here + recommendations_scoring_data = pd.DataFrame(recommendations_scoring_data) + if not recommendations_scoring_data.empty: + recommendations_scoring_data = recommendations_scoring_data.drop( + columns=[ + "rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending", + "carbon_ending" + ] + ) + # TODO: Temp putting this here recommendations_scoring_data["is_post_sap10_ending"] = True all_predictions = await model_api.async_paginated_predictions( diff --git a/backend/ml_models/api.py b/backend/ml_models/api.py index daf4b715..440367b2 100644 --- a/backend/ml_models/api.py +++ b/backend/ml_models/api.py @@ -313,4 +313,15 @@ class ModelApi: logger.error(f"Batch {chunk}-{chunk + batch_size} failed (Attempt {attempts}): {e}") await asyncio.sleep(2 ** attempts) # exponential backoff await self.close_aiohttp_session() + + # Ensure stable output structure for the datagrame to be utilised by other functions downstream + for k in all_predictions.keys(): + if all_predictions[k].empty: + col_template = ['id', 'predictions', 'property_id', 'recommendation_id', 'phase'] if ( + extract_ids) else ['id', 'predictions'] + + all_predictions[k] = pd.DataFrame( + columns=col_template + ) + return all_predictions diff --git a/backend/onboarders/base.py b/backend/onboarders/base.py index 93a0b7b0..03cb2370 100644 --- a/backend/onboarders/base.py +++ b/backend/onboarders/base.py @@ -53,7 +53,7 @@ class OnboarderBase: ) else: self.data = read_from_s3(bucket_name=self.bucket_name, s3_file_name=self.input_file_name) - + def write(self): if self.data is None: raise ValueError("No data to write. Please run transform() before writing.") diff --git a/etl/bill_savings/KwhData.py b/etl/bill_savings/KwhData.py index 3291e909..b4bb979d 100644 --- a/etl/bill_savings/KwhData.py +++ b/etl/bill_savings/KwhData.py @@ -196,6 +196,10 @@ class KwhData: if save and self.bucket is None: raise Exception("bucket not set, cannot save data") + if data.empty: + # If we have no data + return data + # TODO: New is a temporary parameter, which will transform the epc descriptions to their transformed features # in anticipation of the new model diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py index c6fea3b6..e470c1a3 100644 --- a/recommendations/Recommendations.py +++ b/recommendations/Recommendations.py @@ -1090,6 +1090,7 @@ class Recommendations: ashp_cop = ashp_cop if ashp_cop else assumptions.AVERAGE_ASHP_EFFICIENCY + # kwh_impact_table = kwh_simulation_predictions["heating_kwh_predictions"][ kwh_simulation_predictions["heating_kwh_predictions"]["property_id"] == str(property_instance.id) ].merge( diff --git a/utils/s3.py b/utils/s3.py index e70669d0..2e67d4f0 100644 --- a/utils/s3.py +++ b/utils/s3.py @@ -264,6 +264,7 @@ def save_excel_to_s3(df, bucket_name, file_key): def read_csv_from_s3(bucket_name, filepath): + logger.info(f"Reading CSV file from S3 bucket '{bucket_name}' with key '{filepath}'") s3 = boto3.client('s3') # Get the object from s3