From 723beaf1041168461220da87ebdee8477d9c3f8c Mon Sep 17 00:00:00 2001 From: Michael Duong Date: Tue, 26 Aug 2025 10:44:31 +0100 Subject: [PATCH 001/202] fix pipeline for July 2025 data, keep lodgement date to do analysis on new rdsap standard in ML stage --- .gitignore | 4 +- BaseUtility.py | 21 +- etl/epc/DataProcessor.py | 8 +- etl/epc/Dataset.py | 39 +- recommendations/rdsap_tables.py | 864 +++++++++++++++++++++++++++----- 5 files changed, 775 insertions(+), 161 deletions(-) diff --git a/.gitignore b/.gitignore index 5e247d77..a6538116 100644 --- a/.gitignore +++ b/.gitignore @@ -275,4 +275,6 @@ cache/ */.idea *.png -*.pptx \ No newline at end of file +*.pptx + +local_data* \ No newline at end of file diff --git a/BaseUtility.py b/BaseUtility.py index e799144d..2f990695 100644 --- a/BaseUtility.py +++ b/BaseUtility.py @@ -30,24 +30,25 @@ class Definitions: # The Building Emission Rate (BER) data field for non-domestic buildings may contain a ‘blank’ value. The BER # was only lodged on the register from 7 March 2010. "Blank" - # There are currently just over 8,600 records where the local authority identifier is ‘null’. This is due to - # the Register Operator not being able to match the building address in the Markermap Ordinance Survey (GB) - # lookup tables or OS MasterMap Address Layer 2 data. The majority of these addresses have been requested - # manually by energy assessors for inclusion by the Register Operator in the registers (e.g. new builds, - # etc). These records are being published for completeness. An ongoing process to manage these manually added + # There are currently just over 8,600 records where the local authority identifier is ‘null’. This is due to + # the Register Operator not being able to match the building address in the Markermap Ordinance Survey (GB) + # lookup tables or OS MasterMap Address Layer 2 data. The majority of these addresses have been requested + # manually by energy assessors for inclusion by the Register Operator in the registers (e.g. new builds, + # etc). These records are being published for completeness. An ongoing process to manage these manually added # addresses will take time to develop to deal with these and future anomalies. # - # There are several fields within the lodged data where it is possible to enter multiple entries to cater for - # different data_types of build within a single property, i.e. extensions. This results in multiple entries for - # the description fields for floor, roof and wall. For the purposes of this data release only the information - # contained within the first of these multiple entries is being provided. As there are no restrictions on the - # value in this first field it means that sometimes the first field in a multiple entry description field may + # There are several fields within the lodged data where it is possible to enter multiple entries to cater for + # different data_types of build within a single property, i.e. extensions. This results in multiple entries for + # the description fields for floor, roof and wall. For the purposes of this data release only the information + # contained within the first of these multiple entries is being provided. As there are no restrictions on the + # value in this first field it means that sometimes the first field in a multiple entry description field may # contain a ‘null’ value. A resolution to correct these anomalies will be considered for future data releases. "NULL", # We sometimes see fields populated with just an empty string. "", # An older value which rarely shows up but has been seen in the data. "UNKNOWN", + "Unknown", } DATA_ANOMALY_SUBSTRINGS = { diff --git a/etl/epc/DataProcessor.py b/etl/epc/DataProcessor.py index 9655cf77..99987f48 100644 --- a/etl/epc/DataProcessor.py +++ b/etl/epc/DataProcessor.py @@ -48,6 +48,8 @@ construction_age_bounds_map = { "England and Wales: 2003-2006": {"l": 2003, "u": 2006}, "England and Wales: 2007-2011": {"l": 2007, "u": 2011}, "England and Wales: 2012 onwards": {"l": 2012, "u": 3000}, + "England and Wales: 2012-2021": {"l": 2012, "u": 2021}, + "England and Wales: 2022 onwards": {"l": 2022, "u": 3000}, } construction_age_remap = { @@ -384,7 +386,7 @@ class EPCDataProcessor: has_missings = pd.isnull(self.data[col]).sum() while has_missings: self.data = apply_clean( - data=self.data, matching_columns=matching_columns[0: to_index + 1] + data=self.data, matching_columns=matching_columns[0 : to_index + 1] ) has_missings = pd.isnull(self.data[col]).sum() @@ -858,7 +860,9 @@ class EPCDataProcessor: # Fill NaN values with averages for col in cols_to_clean: - data_to_clean[col] = data_to_clean[col].fillna(data_to_clean[f"{col}_AVERAGE"]) + data_to_clean[col] = data_to_clean[col].fillna( + data_to_clean[f"{col}_AVERAGE"] + ) data_to_clean = data_to_clean.drop(columns=[f"{col}_AVERAGE"]) # If we still have missings data_to_clean[col] = data_to_clean[col].fillna(data_to_clean[col].mean()) diff --git a/etl/epc/Dataset.py b/etl/epc/Dataset.py index 5d3720fc..35bc108e 100644 --- a/etl/epc/Dataset.py +++ b/etl/epc/Dataset.py @@ -8,7 +8,9 @@ from etl.epc_clean.epc_attributes.FloorAttributes import FloorAttributes from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes from etl.epc_clean.epc_attributes.HotWaterAttributes import HotWaterAttributes from etl.epc_clean.epc_attributes.MainheatAttributes import MainHeatAttributes -from etl.epc_clean.epc_attributes.MainheatControlAttributes import MainheatControlAttributes +from etl.epc_clean.epc_attributes.MainheatControlAttributes import ( + MainheatControlAttributes, +) from etl.epc_clean.epc_attributes.WindowAttributes import WindowAttributes from etl.epc_clean.epc_attributes.MainFuelAttributes import MainFuelAttributes @@ -169,7 +171,7 @@ class TrainingDataset(BaseDataset): self.df = pd.DataFrame([dataset.difference_record for dataset in datasets]) self._feature_generation() - self._drop_features() + # self._drop_features() self._clean_efficiency_variables() self._null_validation(information="Clean Efficiency Variables") self._expand_description_to_features(cleaned_lookup) @@ -210,11 +212,11 @@ class TrainingDataset(BaseDataset): common_cols = [[col + "_starting", col + "_ending"] for col in common_cols] self.df = self.df.loc[ - :, - no_suffix_cols - + only_ending_cols - + [col for cols in common_cols for col in cols], - ] + :, + no_suffix_cols + + only_ending_cols + + [col for cols in common_cols for col in cols], + ] def _remove_abnormal_change_in_floor_area(self): """ @@ -519,7 +521,7 @@ class TrainingDataset(BaseDataset): expanded_df["is_sandstone_or_limestone"] == expanded_df["is_sandstone_or_limestone_ending"] ) - ] + ] elif component == "floor": expanded_df = expanded_df[ (expanded_df["is_suspended"] == expanded_df["is_suspended_ending"]) @@ -536,7 +538,7 @@ class TrainingDataset(BaseDataset): expanded_df["is_to_external_air"] == expanded_df["is_to_external_air_ending"] ) - ] + ] elif component == "roof": expanded_df = expanded_df[ (expanded_df["is_pitched"] == expanded_df["is_pitched_ending"]) @@ -549,7 +551,7 @@ class TrainingDataset(BaseDataset): expanded_df["has_dwelling_above"] == expanded_df["has_dwelling_above_ending"] ) - ] + ] return expanded_df @@ -695,10 +697,14 @@ class TrainingDataset(BaseDataset): cleaned_lookup_df_for_key = pd.DataFrame(cleaned_lookup[cleaned_key]) # We handle a specific edge case where we're missing information for the original description - descriptions = [x for x in self.df[left_on_starting].unique() if pd.notnull(x)] + descriptions = [ + x for x in self.df[left_on_starting].unique() if pd.notnull(x) + ] # take any not in the cleaned lookup missing_descriptions = [ - x for x in descriptions if x not in cleaned_lookup_df_for_key["original_description"].values + x + for x in descriptions + if x not in cleaned_lookup_df_for_key["original_description"].values ] if missing_descriptions: # We handle them here @@ -710,9 +716,12 @@ class TrainingDataset(BaseDataset): cleaned_data.append( { "original_description": x, - "clean_description": desc_cleaner.description.replace("(assumed)", - "").rstrip().capitalize(), - **cleaned + "clean_description": desc_cleaner.description.replace( + "(assumed)", "" + ) + .rstrip() + .capitalize(), + **cleaned, } ) cleaned_lookup_df_for_key = pd.concat( diff --git a/recommendations/rdsap_tables.py b/recommendations/rdsap_tables.py index e56faf7c..14c7f247 100644 --- a/recommendations/rdsap_tables.py +++ b/recommendations/rdsap_tables.py @@ -3,6 +3,7 @@ This script contains standard tables which are defined in rdsap. The most recent based on the 2012 version, however the government is currently working on releasing a new version, and there we will need to re-visit this """ + import pandas as pd age_band_data = [ @@ -11,84 +12,91 @@ age_band_data = [ "England_Wales": "before 1900", "Scotland": "before 1919", "Northern_Ireland": "before 1919", - "Park_home_UK": None + "Park_home_UK": None, }, { "age_band": "B", "England_Wales": "1900-1929", "Scotland": "1919-1929", "Northern_Ireland": "1919-1929", - "Park_home_UK": None + "Park_home_UK": None, }, { "age_band": "C", "England_Wales": "1930-1949", "Scotland": "1930-1949", "Northern_Ireland": "1930-1949", - "Park_home_UK": None + "Park_home_UK": None, }, { "age_band": "D", "England_Wales": "1950-1966", "Scotland": "1950-1964", "Northern_Ireland": "1950-1973", - "Park_home_UK": None + "Park_home_UK": None, }, { "age_band": "E", "England_Wales": "1967-1975", "Scotland": "1965-1975", "Northern_Ireland": "1974-1977", - "Park_home_UK": None + "Park_home_UK": None, }, { "age_band": "F", "England_Wales": "1976-1982", "Scotland": "1976-1983", "Northern_Ireland": "1978-1985", - "Park_home_UK": "before 1983" + "Park_home_UK": "before 1983", }, { "age_band": "G", "England_Wales": "1983-1990", "Scotland": "1984-1991", "Northern_Ireland": "1986-1991", - "Park_home_UK": "1983-1995" + "Park_home_UK": "1983-1995", }, { "age_band": "H", "England_Wales": "1991-1995", "Scotland": "1992-1998", "Northern_Ireland": "1992-1999", - "Park_home_UK": None + "Park_home_UK": None, }, { "age_band": "I", "England_Wales": "1996-2002", "Scotland": "1999-2002", "Northern_Ireland": "2000-2006", - "Park_home_UK": "1996-2005" + "Park_home_UK": "1996-2005", }, { "age_band": "J", "England_Wales": "2003-2006", "Scotland": "2003-2007", "Northern_Ireland": None, - "Park_home_UK": None + "Park_home_UK": None, }, { "age_band": "K", "England_Wales": "2007-2011", "Scotland": "2008-2011", "Northern_Ireland": "2007-2013", - "Park_home_UK": "2006 onwards" + "Park_home_UK": "2006 onwards", }, { "age_band": "L", "England_Wales": "2012 onwards", "Scotland": "2012 onwards", "Northern_Ireland": "2014 onwards", - "Park_home_UK": None + "Park_home_UK": None, + }, + { + "age_band": "L", + "England_Wales": "2012-2021", + "Scotland": "2012-2023", + "Northern_Ireland": "2014-2022", + "Park_home_UK": None, }, ] @@ -102,32 +110,109 @@ england_wales_age_band_lookup = { ######################################################################################################################## default_wall_thickness = [ { - "type": "stone", "A": 500, "B": 500, "C": 500, "D": 500, "E": 450, "F": 420, "G": 420, "H": 420, - "I": 450, "J": 450, "K": 450, "L": 450 + "type": "stone", + "A": 500, + "B": 500, + "C": 500, + "D": 500, + "E": 450, + "F": 420, + "G": 420, + "H": 420, + "I": 450, + "J": 450, + "K": 450, + "L": 450, }, { - "type": "solid brick", "A": 220, "B": 220, "C": 220, "D": 220, "E": 240, "F": 250, "G": 270, "H": 270, - "I": 300, "J": 300, "K": 300, "L": 300 + "type": "solid brick", + "A": 220, + "B": 220, + "C": 220, + "D": 220, + "E": 240, + "F": 250, + "G": 270, + "H": 270, + "I": 300, + "J": 300, + "K": 300, + "L": 300, }, { - "type": "cavity", "A": 250, "B": 250, "C": 250, "D": 250, "E": 250, "F": 260, "G": 270, "H": 270, - "I": 300, "J": 300, "K": 300, "L": 300 + "type": "cavity", + "A": 250, + "B": 250, + "C": 250, + "D": 250, + "E": 250, + "F": 260, + "G": 270, + "H": 270, + "I": 300, + "J": 300, + "K": 300, + "L": 300, }, { - "type": "timber frame", "A": 150, "B": 150, "C": 150, "D": 250, "E": 270, "F": 270, "G": 270, "H": 270, - "I": 300, "J": 300, "K": 300, "L": 300 + "type": "timber frame", + "A": 150, + "B": 150, + "C": 150, + "D": 250, + "E": 270, + "F": 270, + "G": 270, + "H": 270, + "I": 300, + "J": 300, + "K": 300, + "L": 300, }, { - "type": "cob", "A": 540, "B": 540, "C": 540, "D": 540, "E": 540, "F": 540, "G": 560, "H": 560, "I": 590, - "J": 590, "K": 590, "L": 590 + "type": "cob", + "A": 540, + "B": 540, + "C": 540, + "D": 540, + "E": 540, + "F": 540, + "G": 560, + "H": 560, + "I": 590, + "J": 590, + "K": 590, + "L": 590, }, { - "type": "system build", "A": 250, "B": 250, "C": 250, "D": 250, "E": 250, "F": 300, "G": 300, "H": 300, - "I": 300, "J": 300, "K": 300, "L": 300 + "type": "system build", + "A": 250, + "B": 250, + "C": 250, + "D": 250, + "E": 250, + "F": 300, + "G": 300, + "H": 300, + "I": 300, + "J": 300, + "K": 300, + "L": 300, }, { - "type": "park home", "A": None, "B": None, "C": None, "D": None, "E": None, "F": 50, "G": 50, - "H": None, "I": 75, "J": 100, "K": 100, "L": 100 + "type": "park home", + "A": None, + "B": None, + "C": None, + "D": None, + "E": None, + "F": 50, + "G": 50, + "H": None, + "I": 75, + "J": 100, + "K": 100, + "L": 100, }, ] @@ -170,33 +255,384 @@ wall_types = [ u_values = [ ["a", "a", "a", "a", "1.7b", "1.0", "0.6", "0.60", "0.45", "0.35", "0.30", "0.28"], ["a", "a", "a", "a", "1.7b", "1.0", "0.6", "0.60", "0.45", "0.35", "0.30", "0.28"], - ["1.7", "1.7", "1.7", "1.7", "1.7", "1.0", "0.60", "0.60", "0.45", "0.35", "0.30", "0.28"], - ["0.55", "0.55", "0.55", "0.55", "0.55", "0.45", "0.35", "0.35", "0.30", "0.25", "0.21", "0.21"], - ["0.32", "0.32", "0.32", "0.32", "0.32", "0.28", "0.24", "0.24", "0.21", "0.19", "0.17", "0.16"], - ["0.23", "0.23", "0.23", "0.23", "0.23", "0.21", "0.18", "0.18", "0.17", "0.15", "0.14", "0.14"], - ["0.18", "0.18", "0.18", "0.18", "0.18", "0.17", "0.15", "0.15", "0.14", "0.13", "0.12", "0.12"], - ["0.80", "0.80", "0.80", "0.80", "0.80", "0.80", "0.60", "0.60", "0.45", "0.35", "0.30", "0.28"], - ["0.40", "0.40", "0.40", "0.40", "0.40", "0.40", "0.35", "0.35", "0.30", "0.25", "0.21", "0.21"], - ["0.26", "0.26", "0.26", "0.26", "0.26", "0.26", "0.24", "0.24", "0.21", "0.19", "0.17", "0.16"], - ["0.20", "0.20", "0.20", "0.20", "0.20", "0.20", "0.18", "0.18", "0.17", "0.15", "0.14", "0.14"], - ["0.16", "0.16", "0.16", "0.16", "0.16", "0.16", "0.15", "0.15", "0.14", "0.13", "0.12", "0.12"], - ["1.5", "1.5", "1.5", "1.5", "1.5", "1.0", "0.60", "0.60", "0.45", "0.35", "0.30", "0.28"], - ["0.53", "0.53", "0.53", "0.53", "0.53", "0.45", "0.35", "0.35", "0.30", "0.25", "0.21", "0.21"], - ["0.32", "0.32", "0.32", "0.32", "0.32", "0.30", "0.24", "0.24", "0.21", "0.19", "0.17", "0.16"], - ["0.23", "0.23", "0.23", "0.23", "0.23", "0.21", "0.18", "0.18", "0.17", "0.15", "0.14", "0.14"], - ["0.18", "0.18", "0.18", "0.18", "0.18", "0.17", "0.15", "0.15", "0.14", "0.13", "0.12", "0.12"], - ["0.7", "0.7", "0.7", "0.7", "0.7", "0.40", "0.35", "0.35", "0.45", "0.35", "0.30", "0.28"], - ["0.37", "0.37", "0.37", "0.37", "0.37", "0.27", "0.25", "0.25", "0.25", "0.25", "0.21", "0.21"], - ["0.25", "0.25", "0.25", "0.25", "0.25", "0.20", "0.19", "0.19", "0.19", "0.19", "0.17", "0.16"], - ["0.19", "0.19", "0.19", "0.19", "0.19", "0.16", "0.15", "0.15", "0.15", "0.15", "0.14", "0.14"], - ["0.16", "0.16", "0.16", "0.16", "0.16", "0.13", "0.13", "0.13", "0.13", "0.13", "0.12", "0.12"], - ["2.5", "1.9", "1.9", "1.0", "0.80", "0.45", "0.40", "0.40", "0.40", "0.35", "0.30", "0.28"], - ["0.60", "0.55", "0.55", "0.40", "0.40", "0.40", "0.40", "0.40", "0.40", "0.35", "0.30", "0.28"], - ["2.0", "2.0", "2.0", "2.0", "1.7", "1.0", "0.60", "0.60", "0.45", "0.35", "0.30", "0.28"], - ["0.60", "0.60", "0.60", "0.60", "0.55", "0.45", "0.35", "0.35", "0.30", "0.25", "0.21", "0.21"], - ["0.35", "0.35", "0.35", "0.35", "0.35", "0.32", "0.24", "0.24", "0.21", "0.19", "0.17", "0.16"], - ["0.25", "0.25", "0.25", "0.25", "0.25", "0.21", "0.18", "0.18", "0.17", "0.15", "0.14", "0.14"], - ["0.18", "0.18", "0.18", "0.18", "0.18", "0.17", "0.15", "0.15", "0.14", "0.13", "0.12", "0.12"], + [ + "1.7", + "1.7", + "1.7", + "1.7", + "1.7", + "1.0", + "0.60", + "0.60", + "0.45", + "0.35", + "0.30", + "0.28", + ], + [ + "0.55", + "0.55", + "0.55", + "0.55", + "0.55", + "0.45", + "0.35", + "0.35", + "0.30", + "0.25", + "0.21", + "0.21", + ], + [ + "0.32", + "0.32", + "0.32", + "0.32", + "0.32", + "0.28", + "0.24", + "0.24", + "0.21", + "0.19", + "0.17", + "0.16", + ], + [ + "0.23", + "0.23", + "0.23", + "0.23", + "0.23", + "0.21", + "0.18", + "0.18", + "0.17", + "0.15", + "0.14", + "0.14", + ], + [ + "0.18", + "0.18", + "0.18", + "0.18", + "0.18", + "0.17", + "0.15", + "0.15", + "0.14", + "0.13", + "0.12", + "0.12", + ], + [ + "0.80", + "0.80", + "0.80", + "0.80", + "0.80", + "0.80", + "0.60", + "0.60", + "0.45", + "0.35", + "0.30", + "0.28", + ], + [ + "0.40", + "0.40", + "0.40", + "0.40", + "0.40", + "0.40", + "0.35", + "0.35", + "0.30", + "0.25", + "0.21", + "0.21", + ], + [ + "0.26", + "0.26", + "0.26", + "0.26", + "0.26", + "0.26", + "0.24", + "0.24", + "0.21", + "0.19", + "0.17", + "0.16", + ], + [ + "0.20", + "0.20", + "0.20", + "0.20", + "0.20", + "0.20", + "0.18", + "0.18", + "0.17", + "0.15", + "0.14", + "0.14", + ], + [ + "0.16", + "0.16", + "0.16", + "0.16", + "0.16", + "0.16", + "0.15", + "0.15", + "0.14", + "0.13", + "0.12", + "0.12", + ], + [ + "1.5", + "1.5", + "1.5", + "1.5", + "1.5", + "1.0", + "0.60", + "0.60", + "0.45", + "0.35", + "0.30", + "0.28", + ], + [ + "0.53", + "0.53", + "0.53", + "0.53", + "0.53", + "0.45", + "0.35", + "0.35", + "0.30", + "0.25", + "0.21", + "0.21", + ], + [ + "0.32", + "0.32", + "0.32", + "0.32", + "0.32", + "0.30", + "0.24", + "0.24", + "0.21", + "0.19", + "0.17", + "0.16", + ], + [ + "0.23", + "0.23", + "0.23", + "0.23", + "0.23", + "0.21", + "0.18", + "0.18", + "0.17", + "0.15", + "0.14", + "0.14", + ], + [ + "0.18", + "0.18", + "0.18", + "0.18", + "0.18", + "0.17", + "0.15", + "0.15", + "0.14", + "0.13", + "0.12", + "0.12", + ], + [ + "0.7", + "0.7", + "0.7", + "0.7", + "0.7", + "0.40", + "0.35", + "0.35", + "0.45", + "0.35", + "0.30", + "0.28", + ], + [ + "0.37", + "0.37", + "0.37", + "0.37", + "0.37", + "0.27", + "0.25", + "0.25", + "0.25", + "0.25", + "0.21", + "0.21", + ], + [ + "0.25", + "0.25", + "0.25", + "0.25", + "0.25", + "0.20", + "0.19", + "0.19", + "0.19", + "0.19", + "0.17", + "0.16", + ], + [ + "0.19", + "0.19", + "0.19", + "0.19", + "0.19", + "0.16", + "0.15", + "0.15", + "0.15", + "0.15", + "0.14", + "0.14", + ], + [ + "0.16", + "0.16", + "0.16", + "0.16", + "0.16", + "0.13", + "0.13", + "0.13", + "0.13", + "0.13", + "0.12", + "0.12", + ], + [ + "2.5", + "1.9", + "1.9", + "1.0", + "0.80", + "0.45", + "0.40", + "0.40", + "0.40", + "0.35", + "0.30", + "0.28", + ], + [ + "0.60", + "0.55", + "0.55", + "0.40", + "0.40", + "0.40", + "0.40", + "0.40", + "0.40", + "0.35", + "0.30", + "0.28", + ], + [ + "2.0", + "2.0", + "2.0", + "2.0", + "1.7", + "1.0", + "0.60", + "0.60", + "0.45", + "0.35", + "0.30", + "0.28", + ], + [ + "0.60", + "0.60", + "0.60", + "0.60", + "0.55", + "0.45", + "0.35", + "0.35", + "0.30", + "0.25", + "0.21", + "0.21", + ], + [ + "0.35", + "0.35", + "0.35", + "0.35", + "0.35", + "0.32", + "0.24", + "0.24", + "0.21", + "0.19", + "0.17", + "0.16", + ], + [ + "0.25", + "0.25", + "0.25", + "0.25", + "0.25", + "0.21", + "0.18", + "0.18", + "0.17", + "0.15", + "0.14", + "0.14", + ], + [ + "0.18", + "0.18", + "0.18", + "0.18", + "0.18", + "0.17", + "0.15", + "0.15", + "0.14", + "0.13", + "0.12", + "0.12", + ], ] age_bands = ["A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L"] @@ -210,8 +646,13 @@ for i, wall_type in enumerate(wall_types): parkhome_wall_uvalues = [ {"Wall_type": "Park home as built", "F": "1.7", "G": "1.2", "I": "0.7", "K": "0.6"}, - {"Wall_type": "Park home with additional insulation", "F": "s1.1.2", "G": "s1.1.2", "I": "s1.1.2", - "K": "s1.1.2"} + { + "Wall_type": "Park home with additional insulation", + "F": "s1.1.2", + "G": "s1.1.2", + "I": "s1.1.2", + "K": "s1.1.2", + }, ] wall_uvalues.extend(parkhome_wall_uvalues) @@ -229,16 +670,12 @@ epc_wall_description_map = { "Cavity wall, as built, insulated": "Filled cavity", "Cavity wall, with external insulation": "Unfilled cavity with 100 mm external or internal insulation", "Cavity wall, insulated": "Filled cavity", - 'Cavity wall, partial insulation': "Filled cavity", - + "Cavity wall, partial insulation": "Filled cavity", "Cavity wall,": "Cavity as built", # General case of cavity wall without further details - "Cavity wall, filled cavity and external insulation": - "Filled cavity with 100 mm external or internal insulation", - "Cavity wall, filled cavity and internal insulation": - "Filled cavity with 100 mm external or internal insulation", + "Cavity wall, filled cavity and external insulation": "Filled cavity with 100 mm external or internal insulation", + "Cavity wall, filled cavity and internal insulation": "Filled cavity with 100 mm external or internal insulation", "Cavity wall, with internal insulation": "Unfilled cavity with 100 mm external or internal insulation", "Cavity wall, no insulation": "Cavity as built", - ############################ # Solid brick wall mappings ############################ @@ -247,7 +684,6 @@ epc_wall_description_map = { "Solid brick, as built, insulated": "Stone/solid brick with 100 mm external or internal insulation", "Solid brick, with external insulation": "Stone/solid brick with 100 mm external or internal insulation", "Solid brick, as built, partial insulation": "Stone/solid brick with 50 mm external or internal insulation", - ############################ # Timber frame wall mappings ############################ @@ -262,33 +698,29 @@ epc_wall_description_map = { # Sandstone/limestones wall mappings ############################ "Sandstone or limestone, as built, no insulation": "Stone: sandstone or limestone as built", - "Sandstone or limestone, with internal insulation": - "Stone/solid brick with 100 mm external or internal insulation", + "Sandstone or limestone, with internal insulation": "Stone/solid brick with 100 mm external or internal insulation", "Sandstone or limestone, as built, partial insulation": "Stone/solid brick with 50 mm external or internal " - "insulation", + "insulation", "Sandstone, as built, no insulation": "Stone: sandstone or limestone as built", - "Sandstone or limestone, as built, insulated": - "Stone/solid brick with 100 mm external or internal insulation", + "Sandstone or limestone, as built, insulated": "Stone/solid brick with 100 mm external or internal insulation", "Sandstone, as built, insulated": "Stone/solid brick with 100 mm external or internal insulation", "Sandstone, with internal insulation": "Stone/solid brick with 100 mm external or internal insulation", "Sandstone or limestone, with external insulation": "Stone/solid brick with 100 mm external or internal " - "insulation", + "insulation", "Sandstone, with external insulation": "Stone/solid brick with 100 mm external or internal insulation", "Sandstone, as built, partial insulation": "Stone/solid brick with 50 mm external or internal insulation", - ############################ # Granite/whinstone wall mappings ############################ "Granite or whinstone, as built, no insulation": "Stone: granite or whinstone as built", "Granite or whinstone, with internal insulation": "Stone/solid brick with 100 mm external or internal " - "insulation", + "insulation", "Granite or whinstone, as built, partial insulation": "Stone/solid brick with 50 mm external or internal " - "insulation", + "insulation", "Granite or whinstone, as built, insulated": "Stone/solid brick with 100 mm external or internal " - "insulation", + "insulation", "Granite or whinstone, with external insulation": "Stone/solid brick with 100 mm external or internal " - "insulation", - + "insulation", ############################ # System built wall mappings ############################ @@ -297,15 +729,13 @@ epc_wall_description_map = { "System built, with internal insulation": "System build with 100 mm external or internal insulation", "System built, with external insulation": "System build with 100 mm external or internal insulation", "System built, as built, insulated": "System build with 100 mm external or internal insulation", - ############################ # Cob wall mappings ############################ "Cob, as built": "Cob as built", "Cob, with external insulation": "Cob with 100 mm external or internal insulation", "Cob, with internal insulation": "Cob with 100 mm external or internal insulation", - 'Cob,': "Cob as built", - + "Cob,": "Cob as built", ############################ # Park home mappings ############################ @@ -321,20 +751,71 @@ epc_wall_description_map = { ######################################################################################################################## s9_list = [ - {"Insulation_thickness_mm": None, "Slates_or_tiles_U_value_W_m2K": 2.3, "Thatched_roof_U_value_W_m2K": 0.35}, - {"Insulation_thickness_mm": 12, "Slates_or_tiles_U_value_W_m2K": 1.5, "Thatched_roof_U_value_W_m2K": 0.32}, - {"Insulation_thickness_mm": 25, "Slates_or_tiles_U_value_W_m2K": 1.0, "Thatched_roof_U_value_W_m2K": 0.30}, - {"Insulation_thickness_mm": 50, "Slates_or_tiles_U_value_W_m2K": 0.68, "Thatched_roof_U_value_W_m2K": 0.25}, - {"Insulation_thickness_mm": 75, "Slates_or_tiles_U_value_W_m2K": 0.50, "Thatched_roof_U_value_W_m2K": 0.22}, - {"Insulation_thickness_mm": 100, "Slates_or_tiles_U_value_W_m2K": 0.40, "Thatched_roof_U_value_W_m2K": 0.20}, - {"Insulation_thickness_mm": 150, "Slates_or_tiles_U_value_W_m2K": 0.30, "Thatched_roof_U_value_W_m2K": 0.17}, - {"Insulation_thickness_mm": 200, "Slates_or_tiles_U_value_W_m2K": 0.21, "Thatched_roof_U_value_W_m2K": 0.14}, - {"Insulation_thickness_mm": 250, "Slates_or_tiles_U_value_W_m2K": 0.17, "Thatched_roof_U_value_W_m2K": 0.12}, - {"Insulation_thickness_mm": 270, "Slates_or_tiles_U_value_W_m2K": 0.16, "Thatched_roof_U_value_W_m2K": 0.12}, - {"Insulation_thickness_mm": 300, "Slates_or_tiles_U_value_W_m2K": 0.14, "Thatched_roof_U_value_W_m2K": 0.11}, - {"Insulation_thickness_mm": 350, "Slates_or_tiles_U_value_W_m2K": 0.12, "Thatched_roof_U_value_W_m2K": 0.10}, - {"Insulation_thickness_mm": 400, "Slates_or_tiles_U_value_W_m2K": 0.11, - "Thatched_roof_U_value_W_m2K": 0.09}, + { + "Insulation_thickness_mm": None, + "Slates_or_tiles_U_value_W_m2K": 2.3, + "Thatched_roof_U_value_W_m2K": 0.35, + }, + { + "Insulation_thickness_mm": 12, + "Slates_or_tiles_U_value_W_m2K": 1.5, + "Thatched_roof_U_value_W_m2K": 0.32, + }, + { + "Insulation_thickness_mm": 25, + "Slates_or_tiles_U_value_W_m2K": 1.0, + "Thatched_roof_U_value_W_m2K": 0.30, + }, + { + "Insulation_thickness_mm": 50, + "Slates_or_tiles_U_value_W_m2K": 0.68, + "Thatched_roof_U_value_W_m2K": 0.25, + }, + { + "Insulation_thickness_mm": 75, + "Slates_or_tiles_U_value_W_m2K": 0.50, + "Thatched_roof_U_value_W_m2K": 0.22, + }, + { + "Insulation_thickness_mm": 100, + "Slates_or_tiles_U_value_W_m2K": 0.40, + "Thatched_roof_U_value_W_m2K": 0.20, + }, + { + "Insulation_thickness_mm": 150, + "Slates_or_tiles_U_value_W_m2K": 0.30, + "Thatched_roof_U_value_W_m2K": 0.17, + }, + { + "Insulation_thickness_mm": 200, + "Slates_or_tiles_U_value_W_m2K": 0.21, + "Thatched_roof_U_value_W_m2K": 0.14, + }, + { + "Insulation_thickness_mm": 250, + "Slates_or_tiles_U_value_W_m2K": 0.17, + "Thatched_roof_U_value_W_m2K": 0.12, + }, + { + "Insulation_thickness_mm": 270, + "Slates_or_tiles_U_value_W_m2K": 0.16, + "Thatched_roof_U_value_W_m2K": 0.12, + }, + { + "Insulation_thickness_mm": 300, + "Slates_or_tiles_U_value_W_m2K": 0.14, + "Thatched_roof_U_value_W_m2K": 0.11, + }, + { + "Insulation_thickness_mm": 350, + "Slates_or_tiles_U_value_W_m2K": 0.12, + "Thatched_roof_U_value_W_m2K": 0.10, + }, + { + "Insulation_thickness_mm": 400, + "Slates_or_tiles_U_value_W_m2K": 0.11, + "Thatched_roof_U_value_W_m2K": 0.09, + }, ] s10_list = [ @@ -347,7 +828,7 @@ s10_list = [ "Room_in_roof_slates_or_tiles": 2.3, "Thatched_roof": 0.35, "Thatched_roof_room_in_roof": 0.25, - "Park_home": None + "Park_home": None, }, { "Age_band": "E", @@ -358,7 +839,7 @@ s10_list = [ "Room_in_roof_slates_or_tiles": 1.5, "Thatched_roof": 0.35, "Thatched_roof_room_in_roof": 0.25, - "Park_home": None + "Park_home": None, }, { "Age_band": "F", @@ -369,7 +850,7 @@ s10_list = [ "Room_in_roof_slates_or_tiles": 0.80, "Thatched_roof": 0.35, "Thatched_roof_room_in_roof": 0.25, - "Park_home": 1.7 + "Park_home": 1.7, }, { "Age_band": "G", @@ -380,7 +861,7 @@ s10_list = [ "Room_in_roof_slates_or_tiles": "0.50", "Thatched_roof": 0.35, "Thatched_roof_room_in_roof": 0.25, - "Park_home": 0.6 + "Park_home": 0.6, }, { "Age_band": "H", @@ -391,7 +872,7 @@ s10_list = [ "Room_in_roof_slates_or_tiles": 0.35, "Thatched_roof": 0.35, "Thatched_roof_room_in_roof": 0.25, - "Park_home": None + "Park_home": None, }, { "Age_band": "I", @@ -402,7 +883,7 @@ s10_list = [ "Room_in_roof_slates_or_tiles": 0.35, "Thatched_roof": 0.35, "Thatched_roof_room_in_roof": 0.25, - "Park_home": 0.35 + "Park_home": 0.35, }, { "Age_band": "J", @@ -413,7 +894,7 @@ s10_list = [ "Room_in_roof_slates_or_tiles": 0.30, "Thatched_roof": 0.30, "Thatched_roof_room_in_roof": 0.25, - "Park_home": None + "Park_home": None, }, { "Age_band": "K", @@ -424,7 +905,7 @@ s10_list = [ "Room_in_roof_slates_or_tiles": 0.25, "Thatched_roof": 0.25, "Thatched_roof_room_in_roof": 0.25, - "Park_home": 0.30 + "Park_home": 0.30, }, { "Age_band": "L", @@ -435,8 +916,8 @@ s10_list = [ "Room_in_roof_slates_or_tiles": 0.18, "Thatched_roof": 0.18, "Thatched_roof_room_in_roof": 0.18, - "Park_home": None - } + "Park_home": None, + }, ] table_s9 = pd.DataFrame(s9_list) @@ -452,22 +933,70 @@ table_s10 = pd.DataFrame(s10_list) ######################################################################################################################## s11_list = [ - {"Age_band": "A, B", "Floor_construction": "suspended timber", "England_Wales": 0, "Scotland": 0, - "Northern_Ireland": 0, "Park_home": 0}, - {"Age_band": "C to F", "Floor_construction": "solid", "England_Wales": 0, "Scotland": 0, - "Northern_Ireland": 0, "Park_home": 0}, - {"Age_band": "G", "Floor_construction": "solid", "England_Wales": 0, "Scotland": 0, - "Northern_Ireland": 0, "Park_home": 25}, - {"Age_band": "H", "Floor_construction": "solid", "England_Wales": 0, "Scotland": 25, - "Northern_Ireland": 25, "Park_home": 0}, - {"Age_band": "I", "Floor_construction": "solid", "England_Wales": 25, "Scotland": 50, - "Northern_Ireland": 50, "Park_home": 50}, - {"Age_band": "J", "Floor_construction": "solid", "England_Wales": 75, "Scotland": 75, - "Northern_Ireland": 0, "Park_home": 0}, - {"Age_band": "K", "Floor_construction": "solid", "England_Wales": 100, "Scotland": 100, - "Northern_Ireland": 100, "Park_home": 70}, - {"Age_band": "L", "Floor_construction": "solid", "England_Wales": 100, "Scotland": 120, - "Northern_Ireland": 100, "Park_home": 0}, + { + "Age_band": "A, B", + "Floor_construction": "suspended timber", + "England_Wales": 0, + "Scotland": 0, + "Northern_Ireland": 0, + "Park_home": 0, + }, + { + "Age_band": "C to F", + "Floor_construction": "solid", + "England_Wales": 0, + "Scotland": 0, + "Northern_Ireland": 0, + "Park_home": 0, + }, + { + "Age_band": "G", + "Floor_construction": "solid", + "England_Wales": 0, + "Scotland": 0, + "Northern_Ireland": 0, + "Park_home": 25, + }, + { + "Age_band": "H", + "Floor_construction": "solid", + "England_Wales": 0, + "Scotland": 25, + "Northern_Ireland": 25, + "Park_home": 0, + }, + { + "Age_band": "I", + "Floor_construction": "solid", + "England_Wales": 25, + "Scotland": 50, + "Northern_Ireland": 50, + "Park_home": 50, + }, + { + "Age_band": "J", + "Floor_construction": "solid", + "England_Wales": 75, + "Scotland": 75, + "Northern_Ireland": 0, + "Park_home": 0, + }, + { + "Age_band": "K", + "Floor_construction": "solid", + "England_Wales": 100, + "Scotland": 100, + "Northern_Ireland": 100, + "Park_home": 70, + }, + { + "Age_band": "L", + "Floor_construction": "solid", + "England_Wales": 100, + "Scotland": 120, + "Northern_Ireland": 100, + "Park_home": 0, + }, ] table_s11 = pd.DataFrame(s11_list) @@ -481,21 +1010,90 @@ table_s11 = pd.DataFrame(s11_list) ######################################################################################################################## s12_list = [ - {"age_band": "A", "insulation_0": 1.2, "insulation_50": 0.5, "insulation_100": 0.3, "insulation_150": 0.22}, - {"age_band": "B", "insulation_0": 1.2, "insulation_50": 0.5, "insulation_100": 0.3, "insulation_150": 0.22}, - {"age_band": "C", "insulation_0": 1.2, "insulation_50": 0.5, "insulation_100": 0.3, "insulation_150": 0.22}, - {"age_band": "D", "insulation_0": 1.2, "insulation_50": 0.5, "insulation_100": 0.3, "insulation_150": 0.22}, - {"age_band": "E", "insulation_0": 1.2, "insulation_50": 0.5, "insulation_100": 0.3, "insulation_150": 0.22}, - {"age_band": "F", "insulation_0": 1.2, "insulation_50": 0.5, "insulation_100": 0.3, "insulation_150": 0.22}, - {"age_band": "G", "insulation_0": 1.2, "insulation_50": 0.5, "insulation_100": 0.3, "insulation_150": 0.22}, - - {"age_band": "H", "insulation_0": 0.51, "insulation_50": 0.5, "insulation_100": 0.3, "insulation_150": 0.22}, - {"age_band": "I", "insulation_0": 0.51, "insulation_50": 0.5, "insulation_100": 0.3, "insulation_150": 0.22}, - - {"age_band": "J", "insulation_0": 0.25, "insulation_50": 0.25, "insulation_100": 0.25, "insulation_150": 0.22}, - - {"age_band": "K", "insulation_0": 0.22, "insulation_50": 0.22, "insulation_100": 0.22, "insulation_150": 0.22}, - {"age_band": "L", "insulation_0": 0.22, "insulation_50": 0.22, "insulation_100": 0.22, "insulation_150": 0.22}, + { + "age_band": "A", + "insulation_0": 1.2, + "insulation_50": 0.5, + "insulation_100": 0.3, + "insulation_150": 0.22, + }, + { + "age_band": "B", + "insulation_0": 1.2, + "insulation_50": 0.5, + "insulation_100": 0.3, + "insulation_150": 0.22, + }, + { + "age_band": "C", + "insulation_0": 1.2, + "insulation_50": 0.5, + "insulation_100": 0.3, + "insulation_150": 0.22, + }, + { + "age_band": "D", + "insulation_0": 1.2, + "insulation_50": 0.5, + "insulation_100": 0.3, + "insulation_150": 0.22, + }, + { + "age_band": "E", + "insulation_0": 1.2, + "insulation_50": 0.5, + "insulation_100": 0.3, + "insulation_150": 0.22, + }, + { + "age_band": "F", + "insulation_0": 1.2, + "insulation_50": 0.5, + "insulation_100": 0.3, + "insulation_150": 0.22, + }, + { + "age_band": "G", + "insulation_0": 1.2, + "insulation_50": 0.5, + "insulation_100": 0.3, + "insulation_150": 0.22, + }, + { + "age_band": "H", + "insulation_0": 0.51, + "insulation_50": 0.5, + "insulation_100": 0.3, + "insulation_150": 0.22, + }, + { + "age_band": "I", + "insulation_0": 0.51, + "insulation_50": 0.5, + "insulation_100": 0.3, + "insulation_150": 0.22, + }, + { + "age_band": "J", + "insulation_0": 0.25, + "insulation_50": 0.25, + "insulation_100": 0.25, + "insulation_150": 0.22, + }, + { + "age_band": "K", + "insulation_0": 0.22, + "insulation_50": 0.22, + "insulation_100": 0.22, + "insulation_150": 0.22, + }, + { + "age_band": "L", + "insulation_0": 0.22, + "insulation_50": 0.22, + "insulation_100": 0.22, + "insulation_150": 0.22, + }, ] table_s12 = pd.DataFrame(s12_list) From 335164eaf1b558e45a3cf377ed73f756716e05e6 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 18 Sep 2025 16:23:18 +0100 Subject: [PATCH 002/202] multiple remote assessments --- asset_list/AssetList.py | 21 ++++-- asset_list/app.py | 88 ++++++++++++++++++++++++++ asset_list/mappings/built_form.py | 4 +- asset_list/mappings/heating_systems.py | 7 +- asset_list/mappings/roof.py | 30 +++++++++ asset_list/mappings/walls.py | 12 +++- backend/apis/GoogleSolarApi.py | 1 - 7 files changed, 154 insertions(+), 9 deletions(-) diff --git a/asset_list/AssetList.py b/asset_list/AssetList.py index 9569afe8..dce929ae 100644 --- a/asset_list/AssetList.py +++ b/asset_list/AssetList.py @@ -1783,9 +1783,16 @@ class AssetList: ) ) - not_a_flat = ( - self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] != "flat" - ) + # Determine if the client gave us property type in the first place + if all(self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] == "unknown"): + # Use EPC + not_a_flat = ( + self.standardised_asset_list[self.EPC_API_DATA_NAMES["property-type"]] != "Flat" + ) + else: + not_a_flat = ( + self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] != "flat" + ) solar_roof_meets_criteria = ( self.standardised_asset_list["solar_epc_roof_insulated"] | @@ -3452,7 +3459,13 @@ class AssetList: raise ValueError("No installer column found in master data") measure_mix_col = "MEASURE COMBO" - town_colname = "TOWN" if "TOWN" in master_data.columns else 'Town/Area' + + if "TOWN" in master_data.columns: + town_colname = "TOWN" + elif 'Town/Area' in master_data.columns: + town_colname = 'Town/Area' + else: + town_colname = "Town/City" logger.info("Matching master data to asset list") matched = [] diff --git a/asset_list/app.py b/asset_list/app.py index 01c31f0f..833050fb 100644 --- a/asset_list/app.py +++ b/asset_list/app.py @@ -59,6 +59,74 @@ def app(): Property UPRN """ + # CDS - Sept 2025 + data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/CDS/September 2025 Programme" + data_filename = "Founder Estates CDS.xlsx" + sheet_name = "Combined List" + postcode_column = 'Postcode' + address1_column = None # Is only patchily populated so we create it + address1_method = 'house_number_extraction' + fulladdress_column = "Address" + address_cols_to_concat = [] + missing_postcodes_method = None + landlord_year_built = None + landlord_os_uprn = None + landlord_property_type = "Property Type" + landlord_built_form = None + landlord_wall_construction = None + landlord_roof_construction = None + landlord_heating_system = "Heating Type" + landlord_existing_pv = None + landlord_property_id = "(Do Not Modify) Property" + landlord_sap = None + outcomes_filename = None + outcomes_sheetname = None + outcomes_postcode = None + outcomes_houseno = None + outcomes_id = None + outcomes_address = None + master_filepaths = [] + master_id_colnames = [] + master_to_asset_list_filepath = None + phase = False + ecosurv_landlords = None + asset_list_header = 0 + landlord_block_reference = None + + # Project from Nick + data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/sfr/Sep2025 Project" + data_filename = "AL Test.xlsx" + sheet_name = "Sheet1" + postcode_column = 'postcode' + address1_column = None + address1_method = 'house_number_extraction' + fulladdress_column = "address" + address_cols_to_concat = [] + missing_postcodes_method = None + landlord_year_built = None + landlord_os_uprn = None + landlord_property_type = None + landlord_built_form = None + landlord_wall_construction = None + landlord_roof_construction = None + landlord_heating_system = None + landlord_existing_pv = None + landlord_property_id = "row_id" + landlord_sap = None + outcomes_filename = None + outcomes_sheetname = None + outcomes_postcode = None + outcomes_houseno = None + outcomes_id = None + outcomes_address = None + master_filepaths = [] + master_id_colnames = [] + master_to_asset_list_filepath = None + phase = False + ecosurv_landlords = None + asset_list_header = 0 + landlord_block_reference = None + # Lambeth data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Lambeth" data_filename = "LAMBETH Asset List ( Incomplete).xlsx" @@ -1307,6 +1375,26 @@ def app(): filename = os.path.join(data_folder, ".".join(data_filename.split(".")[:-1])) + " - Standardised.xlsx" # Store the data in two tabs. One for the asset list with the EPC data and the second with the flat data + # Determine inspections priority + # solar_jobs = asset_list.standardised_asset_list[~pd.isnull(asset_list.standardised_asset_list["solar_reason"])][ + # "domna_postcode"].unique() + # asset_list.standardised_asset_list["in_solar_area"] = asset_list.standardised_asset_list["domna_postcode"].isin( + # solar_jobs + # ) + # # Same for cav + # cavity_jobs = asset_list.standardised_asset_list[ + # ~pd.isnull(asset_list.standardised_asset_list["cavity_reason"]) + # ]["domna_postcode"].unique() + # asset_list.standardised_asset_list["in_cavity_area"] = asset_list.standardised_asset_list["domna_postcode"].isin( + # cavity_jobs + # ) + # # We prioritise properties that are in solar areas and cavity areas + # import numpy as np + # asset_list.standardised_asset_list["inspection_priority"] = np.where( + # asset_list.standardised_asset_list["in_solar_area"] | asset_list.standardised_asset_list["in_cavity_area"], + # 1, 2 + # ) + with pd.ExcelWriter(filename) as writer: asset_list.standardised_asset_list.to_excel(writer, sheet_name="Standardised Asset List", index=False) if asset_list.block_analysis_df is not None: diff --git a/asset_list/mappings/built_form.py b/asset_list/mappings/built_form.py index 0dc51129..bdd82883 100644 --- a/asset_list/mappings/built_form.py +++ b/asset_list/mappings/built_form.py @@ -438,6 +438,6 @@ BUILT_FORM_MAPPINGS = { 'Maisonette - Mid Terrace': 'mid-terrace', 'Chalet - Wheelchair': 'unknown', 'Studio Flat': 'unknown', - 'Bungalow - Attached': 'semi-detached' - + 'Bungalow - Attached': 'semi-detached', + 'ND': 'unknown' } diff --git a/asset_list/mappings/heating_systems.py b/asset_list/mappings/heating_systems.py index 424b9b46..4ab8ca72 100644 --- a/asset_list/mappings/heating_systems.py +++ b/asset_list/mappings/heating_systems.py @@ -473,5 +473,10 @@ HEATING_MAPPINGS = { 'Boiler and radiators, oil': 'oil boiler', 'Boiler and radiators, electric': 'electric boiler', 'No system present: electric heaters assumed': 'electric radiators', - 'Boiler and radiators, anthracite': 'solid fuel' + 'Boiler and radiators, anthracite': 'solid fuel', + + 'Heat networks Heat networks (mains gas)': 'communal heating', + 'ND Oil': 'oil fuel', + 'Boiler Biofuel': 'boiler - other fuel' + } diff --git a/asset_list/mappings/roof.py b/asset_list/mappings/roof.py index 60f0473c..8ac926c0 100644 --- a/asset_list/mappings/roof.py +++ b/asset_list/mappings/roof.py @@ -246,4 +246,34 @@ ROOF_CONSTRUCTION_MAPPINGS = { 'Pitched, 150 mm loft insulation': 'pitched insulated', 'Flat, limited insulation (assumed)': 'flat uninsulated', + 'Pitched (no access to loft) 350mm': 'pitched insulated', + 'Pitched (no access to loft) 200mm': 'pitched insulated', + 'Pitched (access to loft) 200mm': 'pitched insulated', + 'Pitched (no access to loft) 250mm': 'pitched insulated', + 'Pitched (access to loft) 100mm': 'pitched insulated', + 'Another dwelling above ND (inferred)': 'another dwelling above', + 'Pitched (no access to loft) N/A': 'pitched no access to loft', + 'Pitched (no access to loft) ND (inferred)': 'pitched no access to loft', + 'Pitched (no access to loft) 150mm': 'pitched insulated', + 'Pitched (access to loft) 400mm+': 'pitched insulated', + 'Pitched (no access to loft) 300mm': 'pitched insulated', + 'Pitched (access to loft) <25mm': 'pitched less than 100mm insulation', + 'Pitched (access to loft) None': 'pitched less than 100mm insulation', + 'Pitched (access to loft) 300mm': 'pitched insulated', + 'Pitched (access to loft) 50mm': 'pitched less than 100mm insulation', + 'Pitched (access to loft) 270mm': 'pitched insulated', + 'Pitched (access to loft) Non-joist': 'pitched access to loft', + 'Pitched (access to loft) 250mm': 'pitched insulated', + 'Another dwelling above N/A': 'another dwelling above', + 'Pitched (access to loft) 150mm': 'pitched insulated', + 'Pitched (access to loft) ND (inferred)': 'pitched access to loft', + 'Pitched (access to loft) 350mm': 'pitched insulated', + 'Pitched (access to loft) NR': 'pitched unknown insulation', + 'Pitched (access to loft) 75mm': 'pitched less than 100mm insulation', + 'Pitched (access to loft) N/A': 'pitched access to loft', + 'ND (inferred) 250mm': 'unknown insulated', + 'Pitched (vaulted ceiling) Non-joist': 'pitched unknown insulation', + 'ND (inferred) ND (inferred)': 'unknown', + 'Flat Non-joist': 'flat insulated', + 'Same dwelling above N/A': 'another dwelling above' } diff --git a/asset_list/mappings/walls.py b/asset_list/mappings/walls.py index 14e4565c..73db586e 100644 --- a/asset_list/mappings/walls.py +++ b/asset_list/mappings/walls.py @@ -342,5 +342,15 @@ WALL_CONSTRUCTION_MAPPINGS = { 'Solid brick, as built, partial insulation (assumed)': 'insulated solid brick', 'Sandstone, as built, no insulation (assumed)': 'uninsulated sandstone or limestone', 'System built, as built, partial insulation (assumed)': 'system built unknown insulation', - 'Timber frame, with external insulation': 'insulated timber frame' + 'Timber frame, with external insulation': 'insulated timber frame', + + 'Cob As-built': 'cob', + 'System built Unknown insulation': 'system built unknown insulation', + 'Solid brick Unknown insulation': 'solid brick unknown insulation', + 'Timber frame Internal': 'insulated timber frame', + 'System built External': 'insulated system built', + 'Stone As-built': 'uninsulated sandstone or limestone', + 'System built As-built': "uninsulated system built", + 'System built Internal': 'insulated system built', + } diff --git a/backend/apis/GoogleSolarApi.py b/backend/apis/GoogleSolarApi.py index 043f41a9..532afec0 100644 --- a/backend/apis/GoogleSolarApi.py +++ b/backend/apis/GoogleSolarApi.py @@ -332,7 +332,6 @@ class GoogleSolarApi: ) if solar_product is None: - logger.info("No suitable solar product found for the configuration with %d panels.", total_panels) continue total_cost = Costs.solar_pv( From d3f941349aa08bbe46f1f28f7e2440dc3894fe24 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 22 Sep 2025 10:50:05 +0100 Subject: [PATCH 003/202] implementing decent homes wf --- backend/engine/engine.py | 2 +- etl/bill_savings/KwhData.py | 2 +- .../waltham_forest/decent_homes_pilot.py | 442 ++++++++++++++++++ 3 files changed, 444 insertions(+), 2 deletions(-) create mode 100644 etl/customers/waltham_forest/decent_homes_pilot.py diff --git a/backend/engine/engine.py b/backend/engine/engine.py index 2e1ede79..cc17222f 100644 --- a/backend/engine/engine.py +++ b/backend/engine/engine.py @@ -900,7 +900,7 @@ async def model_engine(body: PlanTriggerRequest): r["uplift_project_score"] ) = funding.get_innovation_uplift( measure=r, - starting_sap=p.data["current-energy-efficiency"], + starting_sap=int(p.data["current-energy-efficiency"]), floor_area=p.floor_area, is_cavity=p.walls["is_cavity_wall"], current_wall_uvalue=current_wall_u_value, diff --git a/etl/bill_savings/KwhData.py b/etl/bill_savings/KwhData.py index 24ce9f2c..3291e909 100644 --- a/etl/bill_savings/KwhData.py +++ b/etl/bill_savings/KwhData.py @@ -310,7 +310,7 @@ class KwhData: False: "N", None: "N", "Y": "Y", - "N": "N" + "N": "N", } for v in bools_to_remap: epc[v] = bool_map[epc[v]] diff --git a/etl/customers/waltham_forest/decent_homes_pilot.py b/etl/customers/waltham_forest/decent_homes_pilot.py new file mode 100644 index 00000000..78460f5a --- /dev/null +++ b/etl/customers/waltham_forest/decent_homes_pilot.py @@ -0,0 +1,442 @@ +import json +import os + +import pandas as pd + +from datetime import datetime + + +def years_between(d1, d2): + # precise year difference (accounts for months/days) + return (d1.year - d2.year) - ((d1.month, d1.day) < (d2.month, d2.day)) + + +def get_element(elements, label): + """Safely get an element dict by display label (your JSON keys).""" + return elements.get(label) + + +def adequacy_result_by_text(attr_desc: str): + """ + Generic adequacy parser. + Pass if description clearly says 'Adequate' and not 'Inadequate'. + Fail if it says 'Inadequate' (or equivalent). + Unknown -> 'no_data' + """ + if not attr_desc or not isinstance(attr_desc, str): + return "no_data" + text = attr_desc.strip().lower() + # Common patterns + if "inadequate" in text or "unsatisfactory" in text or "problems" in text: + return "fail" + if "adequate" in text or "standard" in text or "appropriate" in text: + return "pass" + return "no_data" + + +def append_result(decent_homes, variable, result, install_date=None): + decent_homes.append({ + "variable": variable, + "result": result, + "hhsrs_rank": None, + "hhsrs_score": None, + "install_date": install_date + }) + + +# Read in static json, which is transformed by Jun-te's script +folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Waltham Forest/Decent Homes Pilot" +filenames = ["flat 1.json", "house 1.json"] + +houses_waltham_forest_data = pd.read_excel( + os.path.join(folder, "LBWF - Example Asset Data September 2025.xlsx"), + sheet_name="Houses Asset Data" +) +flats_waltham_forest_data = pd.read_excel( + os.path.join(folder, "LBWF - Example Asset Data September 2025.xlsx"), + sheet_name="CHINGFORD ROAD 236-254 Asset Bl" +) + +# Standardised variables which will form the enums in the db +HHSRS_VARIABLES = [ + "damp_and_mould_growth", + "excess_cold", + "excess_heat", + "asbestos_and_mm_fibres", + "biocides", + "carbon_monoxide_and_fuel_combustion_products", + "lead", + "radiation", + "uncombusted_fuel_gas", + "volatile_organic_compounds", + "crowding_and_space", + "entry_by_intruders", + "lighting", + "noise", + "domestic_hygiene_pests_and_refuse", + "food_safety", + "personal_hygiene_sanitation_and_drainage", + "water_supply", + "falls_associated_with_baths", + "falls_on_level_surfaces", + "falls_on_stairs_and_steps", + "falls_between_levels", + "electrical_hazards", + "fire", + "flames_hot_surfaces_and_materials", + "collision_and_entrapment", + "explosions", + "ergonomics", + "structural_collapse_and_falling_elements" +] + +CRITERION_B_VARIABLES = [ + "external_walls_structure", "lintels", "brickwork_spalling", "wall_finish", "roof_structure", "roof_finish", + "chimneys", "windows", "external_doors", "kitchens", "bathrooms", "central_heating_boiler", + "central_heating_distribution_system", "heating_other", "electrical_systems", +] + +CRITERION_C_VARIABLES = [ + "kitchen_facilities", +] + +# Criterion C explicit age limits (different from component lifespans used elsewhere) +CRITERION_C_AGE_LIMITS = { + "kitchen_years_max": 20, + "bathroom_years_max": 30, +} + +# Field labels as they appear in your JSON (based on your code) +LABEL_KITCHEN = "Adequacy of Kitchen and Type in Property" +LABEL_BATHROOM = "Adequacy of Bathroom Location in Property" +LABEL_NOISE = "Adequacy of Noise Insulation in Property" +LABEL_COMMON_CIRC = "Circulation Space in Common Area" # flats only + +STANDARD_HHSRS_MAPPING = {"pass": "TYPRISK", "fail": "MODRISK", "no_data": "TOBEASSESS"} + +# Criterion A - mapping of HHSRS variables to Waltham forest element codes +HHSRS_MAPPING = { + "damp_and_mould_growth": {"HHSRSDAMP": STANDARD_HHSRS_MAPPING}, + "excess_cold": {"HHSRSCOLD": STANDARD_HHSRS_MAPPING}, + "excess_heat": {"HHSRSHEAT": STANDARD_HHSRS_MAPPING}, + "asbestos_and_mm_fibres": {"HHSRSASB": STANDARD_HHSRS_MAPPING}, + "biocides": {"HHSRSBIOC": STANDARD_HHSRS_MAPPING}, + "carbon_monoxide_and_fuel_combustion_products": { + "HHSRSCO": STANDARD_HHSRS_MAPPING, + "HHSRSSO2": STANDARD_HHSRS_MAPPING, + "HHSRSNO2": STANDARD_HHSRS_MAPPING + }, + "lead": {"HHSRSLEAD": STANDARD_HHSRS_MAPPING}, + "radiation": {"HHSRSRADIA": STANDARD_HHSRS_MAPPING}, + "uncombusted_fuel_gas": {"HHSRSFUEL": STANDARD_HHSRS_MAPPING}, + "volatile_organic_compounds": {"HHSRSORGAN": STANDARD_HHSRS_MAPPING}, + "crowding_and_space": {"HHSRSCROWD": STANDARD_HHSRS_MAPPING}, + "entry_by_intruders": {"HHSRSENTRY": STANDARD_HHSRS_MAPPING}, + "lighting": {"HHSRSLIGHT": STANDARD_HHSRS_MAPPING}, + "noise": {"HHSRSNOISE": STANDARD_HHSRS_MAPPING}, + "domestic_hygiene_pests_and_refuse": {"HHSRSDOMES": STANDARD_HHSRS_MAPPING}, + "food_safety": {"HHSRSFOOD": STANDARD_HHSRS_MAPPING}, + "personal_hygiene_sanitation_and_drainage": {"HHSRSPERS": STANDARD_HHSRS_MAPPING}, + "water_supply": {"HHSRSWATER": STANDARD_HHSRS_MAPPING}, + "falls_associated_with_baths": {"HHSRSFBATH": STANDARD_HHSRS_MAPPING}, + "falls_on_level_surfaces": {"HHSRSFLEVE": STANDARD_HHSRS_MAPPING}, + "falls_on_stairs_and_steps": {"HHSRSFSTAI": STANDARD_HHSRS_MAPPING}, + "falls_between_levels": {"HHSRSFBETW": STANDARD_HHSRS_MAPPING}, + "electrical_hazards": {"HHSRSELEC": STANDARD_HHSRS_MAPPING}, + "fire": {"HHSRSFIRE": STANDARD_HHSRS_MAPPING}, + "flames_hot_surfaces_and_materials": {"HHSRSFLAME": STANDARD_HHSRS_MAPPING}, + "collision_and_entrapment": {"HHSRSENTRP": STANDARD_HHSRS_MAPPING, "HHSRSCLOW": STANDARD_HHSRS_MAPPING}, + "explosions": {"HHSRSEXPLO": STANDARD_HHSRS_MAPPING}, + "ergonomics": {"HHSRSPOSI": STANDARD_HHSRS_MAPPING}, + "structural_collapse_and_falling_elements": {"HHSRSSTRUC": STANDARD_HHSRS_MAPPING} +} + +print(houses_waltham_forest_data[ + houses_waltham_forest_data["ELEMENT CODE"] == "INTHTIMP" + ][["ATTRIBUTE CODE", "ATTRIBUTE CODE DESCRIPTION"]].drop_duplicates()) + +print(flats_waltham_forest_data[ + flats_waltham_forest_data["ELEMENT CODE"] == "INTBTHADEQ" + ][["ATTRIBUTE CODE", "ATTRIBUTE CODE DESCRIPTION"]].drop_duplicates()) + +# Criterion B +CRITERION_B_MAPPING = { + # TODO: Needs to be sorted!!! + # "external_walls_structure": { + # "EXTWALLSTR": {"pass": "GOOD", "fail": "POOR", "no_data": "Unknown if Structural Defects in External Area"} + # } + "lintels": { + "EXTLINTELS": {"pass": "GOOD", "fail": "POOR", "no_data": "Unknown Condition of Lintels"} + } +} + +# Criterion C +CRITERION_C_MAPPING = { + # "kitchen_less_than_20_years_old": +} + +COMPONENT_LIFESPANS = { + "kitchen": {"house": 30, "flat_below_6_storeys": 30, "flat_above_6_storeys": 30}, + "bathroom": {"house": 50, "flat_below_6_storeys": 50, "flat_above_6_storeys": 50} +} + +# Database design +# creation_date, uprn, variable, result, hhsrs_score (optional, numeric), hhsrs_rank (A-J), install_date (for +# components which expire, e.g. kitchen) + +decent_homes = [] +# Use to capture criterion A, B, C and D. Should be: +# {"uprn": int, "creation_date": datetime, "criterion_a": bool, "criterion_b": bool, "criterion_c": bool, +# "criterion_d": bool, "decent_homes": bool"} +property_decent_homes = [] +for fn in filenames: + with open(os.path.join(folder, fn), "rb") as f: + data = json.load(f) + + from pprint import pprint + + pprint(data["elements"]) + + property_info = data["property_info"] + if property_info["PROP TYPE"] in ["HOU"]: + property_type = "house" + elif property_info["PROP TYPE"] == "FLA": + raise Exception("Implement distrinction between below and above 6 storeys") + property_type = "flat" + else: + raise NotImplementedError("Unknown property type") + + # Criterion A + for hhsrs_variable, mapping in HHSRS_MAPPING.items(): + element_code = list(mapping.keys())[0] + + # Find the data in the JSON within data["elements"] + check_pass = [] + for k, v in data["elements"].items(): + if v["ELEMENT CODE"] == element_code: + # We check the attribute code + # Check if pass + if v["ATTRIBUTE CODE"] == mapping[element_code]["pass"]: + result = "pass" + elif v["ATTRIBUTE CODE"] == mapping[element_code]["fail"]: + result = "fail" + elif v["ATTRIBUTE CODE"] == mapping[element_code]["no_data"]: + result = "no_data" + else: + raise ValueError("Unknown attribute code") + check_pass.append(result) + + # We check if we have a pass, fail or no_data + if all([x == "pass" for x in check_pass]): + hhsrs_result = "pass" + elif any([x == "fail" for x in check_pass]): + hhsrs_result = "fail" + elif any([x == "no_data" for x in check_pass]): + hhsrs_result = "no_data" + else: + raise NotImplementedError("Mixed results not implemented") + decent_homes.append( + {"variable": hhsrs_variable, 'result': hhsrs_result, "hhsrs_rank": None, "hhsrs_score": None, + "install_date": None} + ) + + # Criterion B + + # --- Criterion C --- + today = pd.Timestamp.today().normalize() + + # Guard: property type string already set earlier + is_flat = (property_info["PROP TYPE"] == "FLA") + + # 1) Kitchen age ≤ 20 years + kitchen = get_element(data["elements"], LABEL_KITCHEN) + if kitchen: + kit_install_raw = kitchen.get("INSTALL DATE") + try: + kit_install = pd.to_datetime(kit_install_raw) + kit_age_years = years_between(today.to_pydatetime(), kit_install.to_pydatetime()) + kitchen_age_result = "pass" if kit_age_years <= CRITERION_C_AGE_LIMITS["kitchen_years_max"] else "fail" + # For transparency, store next renewal as install + 20 years (criterion C perspective) + kit_next_due = kit_install + pd.DateOffset(years=CRITERION_C_AGE_LIMITS["kitchen_years_max"]) + except Exception: + kitchen_age_result = "no_data" + kit_next_due = None + else: + kitchen_age_result = "no_data" + kit_next_due = None + append_result(decent_homes, "kitchen_less_than_20_years_old", kitchen_age_result, kit_next_due) + + # 2) Kitchen adequate space/layout + # Prefer explicit codes if you have them, fall back to text in ATTRIBUTE CODE DESCRIPTION + if kitchen: + kit_attr_desc = kitchen.get("ATTRIBUTE CODE DESCRIPTION", "") + # If you prefer codes, you can also branch here on kitchen.get("ATTRIBUTE CODE") == "STDKITADQ" + kitchen_adequacy_result = adequacy_result_by_text(kit_attr_desc) + else: + kitchen_adequacy_result = "no_data" + append_result(decent_homes, "kitchen_adequate_space_and_layout", kitchen_adequacy_result) + + # 3) Bathroom age ≤ 30 years + bath = get_element(data["elements"], LABEL_BATHROOM) + if bath: + bth_install_raw = bath.get("INSTALL DATE") + try: + bth_install = pd.to_datetime(bth_install_raw) + bth_age_years = years_between(today.to_pydatetime(), bth_install.to_pydatetime()) + bathroom_age_result = "pass" if bth_age_years <= CRITERION_C_AGE_LIMITS["bathroom_years_max"] else "fail" + bth_next_due = bth_install + pd.DateOffset(years=CRITERION_C_AGE_LIMITS["bathroom_years_max"]) + except Exception: + bathroom_age_result = "no_data" + bth_next_due = None + else: + bathroom_age_result = "no_data" + bth_next_due = None + append_result(decent_homes, "bathroom_less_than_30_years_old", bathroom_age_result, bth_next_due) + + # 4) Bathroom/WC appropriately located + if bath: + # You already observed codes like STDBTHADQ / ADPBTHADQ as 'pass' + bth_attr_code = bath.get("ATTRIBUTE CODE", "") + bth_attr_desc = bath.get("ATTRIBUTE CODE DESCRIPTION", "") + known_pass_codes = {"STDBTHADQ", "ADPBTHADQ"} + if bth_attr_code in known_pass_codes: + bathroom_location_result = "pass" + else: + # Fallback to text adequacy check + bathroom_location_result = adequacy_result_by_text(bth_attr_desc) + else: + bathroom_location_result = "no_data" + append_result(decent_homes, "bathroom_wc_appropriately_located", bathroom_location_result) + + # 5) Adequate external noise insulation + noise = get_element(data["elements"], LABEL_NOISE) + if noise: + noise_desc = noise.get("ATTRIBUTE CODE DESCRIPTION", "") + noise_result = adequacy_result_by_text(noise_desc) + else: + noise_result = "no_data" + append_result(decent_homes, "adequate_external_noise_insulation", noise_result) + + # 6) Adequate common entrance areas (flats only) + if is_flat: + raise Exception("Pls check this") + common = get_element(data["elements"], LABEL_COMMON_CIRC) + if common: + circ_desc = common.get("ATTRIBUTE CODE DESCRIPTION", "") + common_areas_result = adequacy_result_by_text(circ_desc) + else: + common_areas_result = "no_data" + append_result(decent_homes, "adequate_common_entrance_areas", common_areas_result) + + # ---------------- Criterion D ---------------- + # heating system type + heating = get_element(data["elements"], "Heating Improvement Required in Property") + if heating: + # Example: ATTRIBUTE CODE == "GOOD" means pass, "POOR" means fail + heat_type_code = heating.get("ATTRIBUTE CODE", "") + if heat_type_code in {"NOTAPPLIC"}: + heating_type_result = "pass" + elif heat_type_code in {"WETINSFULL"}: + heating_type_result = "fail" + else: + raise NotImplementedError("No other observed codes yet") + else: + raise NotImplementedError("Heating element missing in dataset") + + append_result(decent_homes, "efficient_heating_system_type", heating_type_result) + + # heating distribution + heating_dist = get_element(data["elements"], "Heating Distribution System in Property") + if heating_dist: + dist_desc = heating_dist.get("ATTRIBUTE CODE DESCRIPTION", "") + heating_dist_result = adequacy_result_by_text(dist_desc) + else: + raise NotImplementedError("Heating distribution element missing in dataset") + + append_result(decent_homes, "efficient_heating_distribution", heating_dist_result) + + # insulation + loft = get_element(data["elements"], "Size in mm of Loft Insulation Thickness in Property") + wall = get_element(data["elements"], "Wall Insulation Improvement in External Area") + heating = get_element(data["elements"], "Heating Improvement Required in Property") + # To determine how much loft insulation is required + + # Loft insulation check (example threshold: ≥ 270mm = pass) + if loft: + # We have a specific code, where further loft insulation is needed + loft_code = loft.get("ATTRIBUTE CODE", "") + if loft_code == "LOFTINSRQD": + loft_result = "fail" + elif loft_code.isnumeric(): + loft_result = "pass" + else: + raise NotImplementedError("Unknown loft insulation code - pls check") + else: + raise NotImplementedError("Loft insulation data missing - pls check") + append_result(decent_homes, "loft_insulation_sufficient", loft_result) + + # Wall insulation check (simple adequacy parser) + if wall: + wall_desc = wall.get("ATTRIBUTE CODE DESCRIPTION", "") + wall_result = adequacy_result_by_text(wall_desc) + else: + raise NotImplementedError("Wall insulation data missing - pls check") + append_result(decent_homes, "wall_insulation_sufficient", wall_result) + + # ---------------- Criterion A overall ---------------- + a_vars = set(HHSRS_MAPPING.keys()) + latest_a_results = {r["variable"]: r["result"] for r in decent_homes if r["variable"] in a_vars} + + if any(v == "fail" for v in latest_a_results.values()): + criterion_a_result = "fail" + elif all(v == "pass" for v in latest_a_results.values()): + criterion_a_result = "pass" + else: + criterion_a_result = "no_data" + + # ---------------- Criterion C overall ---------------- + criterion_c_vars = [ + "kitchen_less_than_20_years_old", + "kitchen_adequate_space_and_layout", + "bathroom_less_than_30_years_old", + "bathroom_wc_appropriately_located", + "adequate_external_noise_insulation", + ] + if is_flat: + criterion_c_vars.append("adequate_common_entrance_areas") + + latest_c_results = {r["variable"]: r["result"] for r in decent_homes if r["variable"] in criterion_c_vars} + + count_fails = sum(1 for v in latest_c_results.values() if v == "fail") + # optionally count no_data too if you want strict interpretation + criterion_c_result = "fail" if count_fails >= 3 else "pass" + + # ---------------- Criterion D overall ---------------- + criterion_d_vars = [ + "efficient_heating_system_type", + "efficient_heating_distribution", + "loft_insulation_sufficient", + "wall_insulation_sufficient", + ] + latest_d_results = {r["variable"]: r["result"] for r in decent_homes if r["variable"] in criterion_d_vars} + + if any(v == "fail" for v in latest_d_results.values()): + criterion_d_result = "fail" + elif all(v == "pass" for v in latest_d_results.values()): + criterion_d_result = "pass" + else: + criterion_d_result = "no_data" + + # ---------------- Append to property_decent_homes ---------------- + property_decent_homes.append({ + "uprn": property_info.get("UPRN"), # update field name if needed + "creation_date": datetime.now().date().isoformat(), + "criterion_a": criterion_a_result, + "criterion_b": None, # not yet implemented + "criterion_c": criterion_c_result, + "criterion_d": criterion_d_result, + "decent_homes": ( + criterion_a_result == "pass" + and criterion_c_result == "pass" + ) + }) From a5ae1669718ac1fd6b17fba13678920534d17ea1 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 22 Sep 2025 12:32:51 +0100 Subject: [PATCH 004/202] mvp implementation for A, C, D, year mapping for B --- .../waltham_forest/decent_homes_pilot.py | 367 ++++++++++++------ 1 file changed, 254 insertions(+), 113 deletions(-) diff --git a/etl/customers/waltham_forest/decent_homes_pilot.py b/etl/customers/waltham_forest/decent_homes_pilot.py index 78460f5a..b59168fb 100644 --- a/etl/customers/waltham_forest/decent_homes_pilot.py +++ b/etl/customers/waltham_forest/decent_homes_pilot.py @@ -16,26 +16,8 @@ def get_element(elements, label): return elements.get(label) -def adequacy_result_by_text(attr_desc: str): - """ - Generic adequacy parser. - Pass if description clearly says 'Adequate' and not 'Inadequate'. - Fail if it says 'Inadequate' (or equivalent). - Unknown -> 'no_data' - """ - if not attr_desc or not isinstance(attr_desc, str): - return "no_data" - text = attr_desc.strip().lower() - # Common patterns - if "inadequate" in text or "unsatisfactory" in text or "problems" in text: - return "fail" - if "adequate" in text or "standard" in text or "appropriate" in text: - return "pass" - return "no_data" - - -def append_result(decent_homes, variable, result, install_date=None): - decent_homes.append({ +def append_result(decent_homes_meta, variable, result, install_date=None): + decent_homes_meta.append({ "variable": variable, "result": result, "hhsrs_rank": None, @@ -97,7 +79,8 @@ CRITERION_B_VARIABLES = [ ] CRITERION_C_VARIABLES = [ - "kitchen_facilities", + "kitchen_less_than_20_years_old", "kitchen_adequate_space_and_layout", "bathroom_less_than_30_years_old", + "bathroom_wc_appropriately_located", "adequate_external_noise_insulation", "adequate_common_entrance_areas", ] # Criterion C explicit age limits (different from component lifespans used elsewhere) @@ -151,40 +134,163 @@ HHSRS_MAPPING = { "structural_collapse_and_falling_elements": {"HHSRSSTRUC": STANDARD_HHSRS_MAPPING} } -print(houses_waltham_forest_data[ - houses_waltham_forest_data["ELEMENT CODE"] == "INTHTIMP" - ][["ATTRIBUTE CODE", "ATTRIBUTE CODE DESCRIPTION"]].drop_duplicates()) +# print(houses_waltham_forest_data[ +# houses_waltham_forest_data["ELEMENT CODE"] == "INTBTHADEQ" +# ][["ATTRIBUTE CODE", "ATTRIBUTE CODE DESCRIPTION"]].drop_duplicates()) + +# print(flats_waltham_forest_data[ +# flats_waltham_forest_data["ELEMENT CODE"] == "INTBTHADEQ" +# ][["ATTRIBUTE CODE", "ATTRIBUTE CODE DESCRIPTION"]].drop_duplicates()) -print(flats_waltham_forest_data[ - flats_waltham_forest_data["ELEMENT CODE"] == "INTBTHADEQ" - ][["ATTRIBUTE CODE", "ATTRIBUTE CODE DESCRIPTION"]].drop_duplicates()) # Criterion B -CRITERION_B_MAPPING = { - # TODO: Needs to be sorted!!! - # "external_walls_structure": { - # "EXTWALLSTR": {"pass": "GOOD", "fail": "POOR", "no_data": "Unknown if Structural Defects in External Area"} - # } - "lintels": { - "EXTLINTELS": {"pass": "GOOD", "fail": "POOR", "no_data": "Unknown Condition of Lintels"} - } +B_COMPONENT_LABELS = { + # Key components + "wall_structure": [ + "Wall Structure in External Area", + ], + "lintels": [ + "Lintels in External Area", + ], + "brickwork_spalling": [ + "Wall Spalling in External Area", + ], + "wall_finish": [ + "Wall Finish 1 in External Area", + "Wall Finish 2 in External Area", + "External Decorations in External Area", + "Brickwork Pointing in External Area", + ], + "roof_structure": [ + "Roof Structure 1 in External Area", + "Roof Structure 2 in External Area", + "Roof Structure 3 in External Area", + # If you later decide to include ancillary items, add: + # "Fascia / Soffit / Bargeboard in External Area", + # "Gutters in External Area", "Downpipes in External Area", + # "Internal Downpipes in External Area", + # and give them a clear condition rule. + ], + "roof_finish": [ + "Roof Covering 1 in External Area", + "Roof Covering 2 in External Area", + "Roof Covering 3 in External Area", + ], + "chimneys": [ + "Chimneys in External Area", + ], + "windows": [ + "Windows in Property", + "Windows 1 in External Area", + "Windows 2 in External Area", + "Garage and Store Windows in External Area", + "Garage Windows in External Area", + "Store Windows in External Area", + ], + "external_doors": [ + "Type and Location of Front Door in Property", + "Front Door Fire Rating in Property", + "Patio and French Doors 1 in External Area", + "Back and Side Doors 1 in External Area", + "Back and Side Doors 2 in External Area", + "Garage and Store Doors in External Area", + "Garage Door in External Area", + "Store Door in External Area", + ], + "central_heating_boiler": [ + # If the dataset exposes a specific boiler element, put it here. + # For now we only have "Heating Improvement Required in Property" elsewhere (Criterion D), + # which isn't reliable for age. If your JSON later includes a boiler line with INSTALL DATE, + # add its label here. + ], + "heating_other": [ + # e.g., gas fires/storage heaters if present as discrete elements later. + ], + "electrical_systems": [ + # If you have an installation line with dates (e.g. "Electrics Required in Property") + # add it here; we will rely on INSTALL DATE + REMAINING LIFE. + "Electrics Required in Property", + ], + + # Other components + "kitchen": [ + "Adequacy of Kitchen and Type in Property", + ], + "bathroom": [ + "Adequacy of Bathroom Location in Property", + ], + "central_heating_distribution_system": [ + "Heating Distribution System in Property", + ], +} + +KEY_COMPONENTS = { + "wall_structure", "lintels", "brickwork_spalling", "wall_finish", + "roof_structure", "roof_finish", "chimneys", "windows", + "external_doors", "central_heating_boiler", "heating_other", + "electrical_systems", +} +OTHER_COMPONENTS = { + "kitchen", "bathroom", "central_heating_distribution_system", } # Criterion C -CRITERION_C_MAPPING = { - # "kitchen_less_than_20_years_old": -} - COMPONENT_LIFESPANS = { - "kitchen": {"house": 30, "flat_below_6_storeys": 30, "flat_above_6_storeys": 30}, - "bathroom": {"house": 50, "flat_below_6_storeys": 50, "flat_above_6_storeys": 50} + # Key components + "wall_structure": { + "house": 80, "flat_below_6_storeys": 80, "flat_above_6_storeys": 80 + }, + "lintels": { + "house": 60, "flat_below_6_storeys": 60, "flat_above_6_storeys": 60 + }, + "brickwork_spalling": { + "house": 30, "flat_below_6_storeys": 30, "flat_above_6_storeys": 30 + }, + "wall_finish": { + "house": 60, "flat_below_6_storeys": 60, "flat_above_6_storeys": 30 + }, + "roof_structure": { + "house": 50, "flat_below_6_storeys": 30, "flat_above_6_storeys": 30 + }, + "roof_finish": { + "house": 50, "flat_below_6_storeys": 30, "flat_above_6_storeys": 30 + }, + "chimneys": { + "house": 50, "flat_below_6_storeys": 50, "flat_above_6_storeys": None # N/A + }, + "windows": { + "house": 40, "flat_below_6_storeys": 30, "flat_above_6_storeys": 30 + }, + "external_doors": { + "house": 40, "flat_below_6_storeys": 30, "flat_above_6_storeys": 30 + }, + "central_heating_boiler": { + "house": 15, "flat_below_6_storeys": 15, "flat_above_6_storeys": 15 + }, + "heating_other": { + "house": 30, "flat_below_6_storeys": 30, "flat_above_6_storeys": 30 + }, + "electrical_systems": { + "house": 30, "flat_below_6_storeys": 30, "flat_above_6_storeys": 30 + }, + + # Other components + "kitchen": { + "house": 30, "flat_below_6_storeys": 30, "flat_above_6_storeys": 30 + }, + "bathroom": { + "house": 40, "flat_below_6_storeys": 40, "flat_above_6_storeys": 40 + }, + "central_heating_distribution_system": { + "house": 40, "flat_below_6_storeys": 40, "flat_above_6_storeys": 40 + }, } # Database design # creation_date, uprn, variable, result, hhsrs_score (optional, numeric), hhsrs_rank (A-J), install_date (for # components which expire, e.g. kitchen) -decent_homes = [] +decent_homes_meta = [] # Use to capture criterion A, B, C and D. Should be: # {"uprn": int, "creation_date": datetime, "criterion_a": bool, "criterion_b": bool, "criterion_c": bool, # "criterion_d": bool, "decent_homes": bool"} @@ -193,20 +299,16 @@ for fn in filenames: with open(os.path.join(folder, fn), "rb") as f: data = json.load(f) - from pprint import pprint - - pprint(data["elements"]) - property_info = data["property_info"] if property_info["PROP TYPE"] in ["HOU"]: property_type = "house" elif property_info["PROP TYPE"] == "FLA": raise Exception("Implement distrinction between below and above 6 storeys") - property_type = "flat" + # property_type = "flat" else: raise NotImplementedError("Unknown property type") - # Criterion A + # ---------------- Criterion A ---------------- for hhsrs_variable, mapping in HHSRS_MAPPING.items(): element_code = list(mapping.keys())[0] @@ -235,14 +337,48 @@ for fn in filenames: hhsrs_result = "no_data" else: raise NotImplementedError("Mixed results not implemented") - decent_homes.append( + decent_homes_meta.append( {"variable": hhsrs_variable, 'result': hhsrs_result, "hhsrs_rank": None, "hhsrs_score": None, "install_date": None} ) - # Criterion B + # ---------------- Criterion B ---------------- + # Check each of the components - # --- Criterion C --- + component_pass_or_fail = [] + # TODO: Delete me + component, labels = list(B_COMPONENT_LABELS.items())[1] + for component, labels in B_COMPONENT_LABELS.items(): + # TODO: labels may not need to be multiple variables + for label in labels: + # Grab the label + label_data = get_element(data["elements"], label) + # 1) We check if the component is old + install_date = pd.to_datetime(label_data["INSTALL DATE"]) + if pd.isnull(install_date): + raise ValueError("Missing install date - pls check") + component_lifetime = COMPONENT_LIFESPANS[component][property_type] + # This should be populated, and for the pilot it's okay if this errors if missing - we'll handle accordingly + is_old = years_between(today.to_pydatetime(), install_date.to_pydatetime()) >= component_lifetime + # 2) We check if the component is in poor condition + if pd.isnull(label_data["REMAINING LIFE"]): + raise ValueError("Missing remaining life - pls check") + has_failed = label_data["REMAINING LIFE"] < 0 + # The component needs to have both failed and be old to fail criterion B + component_result = "fail" if is_old and has_failed else "pass" + component_pass_or_fail.append( + { + "component": component, + "label": label, + "install_date": str(install_date), + "remaining_life": label_data["REMAINING LIFE"], + "is_old": is_old, + "has_failed": has_failed, + "result": component_result + } + ) + + # ---------------- Criterion C ---------------- today = pd.Timestamp.today().normalize() # Guard: property type string already set earlier @@ -251,71 +387,67 @@ for fn in filenames: # 1) Kitchen age ≤ 20 years kitchen = get_element(data["elements"], LABEL_KITCHEN) if kitchen: - kit_install_raw = kitchen.get("INSTALL DATE") - try: - kit_install = pd.to_datetime(kit_install_raw) - kit_age_years = years_between(today.to_pydatetime(), kit_install.to_pydatetime()) - kitchen_age_result = "pass" if kit_age_years <= CRITERION_C_AGE_LIMITS["kitchen_years_max"] else "fail" - # For transparency, store next renewal as install + 20 years (criterion C perspective) - kit_next_due = kit_install + pd.DateOffset(years=CRITERION_C_AGE_LIMITS["kitchen_years_max"]) - except Exception: - kitchen_age_result = "no_data" - kit_next_due = None + kit_install_raw = kitchen["INSTALL DATE"] + kit_install = pd.to_datetime(kit_install_raw) + kit_age_years = years_between(today.to_pydatetime(), kit_install.to_pydatetime()) + kitchen_age_result = "pass" if kit_age_years <= CRITERION_C_AGE_LIMITS["kitchen_years_max"] else "fail" + # For transparency, store next renewal as install + 20 years (criterion C perspective) + kit_next_due = kit_install + pd.DateOffset(years=CRITERION_C_AGE_LIMITS["kitchen_years_max"]) else: - kitchen_age_result = "no_data" - kit_next_due = None - append_result(decent_homes, "kitchen_less_than_20_years_old", kitchen_age_result, kit_next_due) + raise NotImplementedError("Kitchen data missing - pls check") + append_result( + decent_homes_meta, "kitchen_less_than_20_years_old", kitchen_age_result, install_date=str(kit_install) + ) # 2) Kitchen adequate space/layout # Prefer explicit codes if you have them, fall back to text in ATTRIBUTE CODE DESCRIPTION if kitchen: - kit_attr_desc = kitchen.get("ATTRIBUTE CODE DESCRIPTION", "") - # If you prefer codes, you can also branch here on kitchen.get("ATTRIBUTE CODE") == "STDKITADQ" - kitchen_adequacy_result = adequacy_result_by_text(kit_attr_desc) + kit_attr_desc = kitchen["ATTRIBUTE CODE"] + if kit_attr_desc == "STDKITADQ": + kitchen_adequacy_result = "pass" + else: + raise NotImplementedError("No other observed codes yet") else: - kitchen_adequacy_result = "no_data" - append_result(decent_homes, "kitchen_adequate_space_and_layout", kitchen_adequacy_result) + raise NotImplementedError("Kitchen data missing - pls check") + append_result(decent_homes_meta, "kitchen_adequate_space_and_layout", kitchen_adequacy_result) # 3) Bathroom age ≤ 30 years bath = get_element(data["elements"], LABEL_BATHROOM) if bath: - bth_install_raw = bath.get("INSTALL DATE") - try: - bth_install = pd.to_datetime(bth_install_raw) - bth_age_years = years_between(today.to_pydatetime(), bth_install.to_pydatetime()) - bathroom_age_result = "pass" if bth_age_years <= CRITERION_C_AGE_LIMITS["bathroom_years_max"] else "fail" - bth_next_due = bth_install + pd.DateOffset(years=CRITERION_C_AGE_LIMITS["bathroom_years_max"]) - except Exception: - bathroom_age_result = "no_data" - bth_next_due = None + bth_install_raw = bath["INSTALL DATE"] + bth_install = pd.to_datetime(bth_install_raw) + bth_age_years = years_between(today.to_pydatetime(), bth_install.to_pydatetime()) + bathroom_age_result = "pass" if bth_age_years <= CRITERION_C_AGE_LIMITS["bathroom_years_max"] else "fail" + bth_next_due = bth_install + pd.DateOffset(years=CRITERION_C_AGE_LIMITS["bathroom_years_max"]) else: - bathroom_age_result = "no_data" - bth_next_due = None - append_result(decent_homes, "bathroom_less_than_30_years_old", bathroom_age_result, bth_next_due) + raise NotImplementedError("Bathroom data missing - pls check") + append_result( + decent_homes_meta, "bathroom_less_than_30_years_old", bathroom_age_result, install_date=str(bth_install) + ) # 4) Bathroom/WC appropriately located if bath: - # You already observed codes like STDBTHADQ / ADPBTHADQ as 'pass' - bth_attr_code = bath.get("ATTRIBUTE CODE", "") - bth_attr_desc = bath.get("ATTRIBUTE CODE DESCRIPTION", "") - known_pass_codes = {"STDBTHADQ", "ADPBTHADQ"} - if bth_attr_code in known_pass_codes: + bth_attr_code = bath["ATTRIBUTE CODE"] + if bth_attr_code in {"STDBTHADQ", "ADPBTHADQ"}: bathroom_location_result = "pass" else: - # Fallback to text adequacy check - bathroom_location_result = adequacy_result_by_text(bth_attr_desc) + raise NotImplementedError("No other observed codes yet") else: - bathroom_location_result = "no_data" - append_result(decent_homes, "bathroom_wc_appropriately_located", bathroom_location_result) + raise NotImplementedError("Bathroom data missing - pls check") + + append_result(decent_homes_meta, "bathroom_wc_appropriately_located", bathroom_location_result) # 5) Adequate external noise insulation noise = get_element(data["elements"], LABEL_NOISE) if noise: - noise_desc = noise.get("ATTRIBUTE CODE DESCRIPTION", "") - noise_result = adequacy_result_by_text(noise_desc) + noise_code = noise["ATTRIBUTE CODE"] + if noise_code in {"ADEQUATE"}: + noise_result = "pass" + else: + raise NotImplementedError("No other observed codes yet") else: - noise_result = "no_data" - append_result(decent_homes, "adequate_external_noise_insulation", noise_result) + raise NotImplementedError("Noise insulation data missing - pls check") + append_result(decent_homes_meta, "adequate_external_noise_insulation", noise_result) # 6) Adequate common entrance areas (flats only) if is_flat: @@ -326,14 +458,13 @@ for fn in filenames: common_areas_result = adequacy_result_by_text(circ_desc) else: common_areas_result = "no_data" - append_result(decent_homes, "adequate_common_entrance_areas", common_areas_result) + append_result(decent_homes_meta, "adequate_common_entrance_areas", common_areas_result) # ---------------- Criterion D ---------------- # heating system type heating = get_element(data["elements"], "Heating Improvement Required in Property") if heating: - # Example: ATTRIBUTE CODE == "GOOD" means pass, "POOR" means fail - heat_type_code = heating.get("ATTRIBUTE CODE", "") + heat_type_code = heating["ATTRIBUTE CODE"] if heat_type_code in {"NOTAPPLIC"}: heating_type_result = "pass" elif heat_type_code in {"WETINSFULL"}: @@ -343,28 +474,33 @@ for fn in filenames: else: raise NotImplementedError("Heating element missing in dataset") - append_result(decent_homes, "efficient_heating_system_type", heating_type_result) + append_result(decent_homes_meta, "efficient_heating_system_type", heating_type_result) # heating distribution heating_dist = get_element(data["elements"], "Heating Distribution System in Property") if heating_dist: - dist_desc = heating_dist.get("ATTRIBUTE CODE DESCRIPTION", "") - heating_dist_result = adequacy_result_by_text(dist_desc) + dist_code = heating_dist["ATTRIBUTE CODE"] + if dist_code == "UNKNOWN": + # For the observed case, there was no heating and wet heating needed to be installed in full so the value + # was unknown + heating_dist_result = "no_data" + else: + raise NotImplementedError("No other observed codes yet") else: raise NotImplementedError("Heating distribution element missing in dataset") - append_result(decent_homes, "efficient_heating_distribution", heating_dist_result) + append_result(decent_homes_meta, "efficient_heating_distribution", heating_dist_result) # insulation loft = get_element(data["elements"], "Size in mm of Loft Insulation Thickness in Property") wall = get_element(data["elements"], "Wall Insulation Improvement in External Area") - heating = get_element(data["elements"], "Heating Improvement Required in Property") # To determine how much loft insulation is required # Loft insulation check (example threshold: ≥ 270mm = pass) if loft: - # We have a specific code, where further loft insulation is needed - loft_code = loft.get("ATTRIBUTE CODE", "") + # We have a specific code, where further loft insulation is needed - It appears the heating type check has + # already been completed in this dataset and so we just need to check the code + loft_code = loft["ATTRIBUTE CODE"] if loft_code == "LOFTINSRQD": loft_result = "fail" elif loft_code.isnumeric(): @@ -373,19 +509,22 @@ for fn in filenames: raise NotImplementedError("Unknown loft insulation code - pls check") else: raise NotImplementedError("Loft insulation data missing - pls check") - append_result(decent_homes, "loft_insulation_sufficient", loft_result) + append_result(decent_homes_meta, "loft_insulation_sufficient", loft_result) - # Wall insulation check (simple adequacy parser) + # Wall insulation check if wall: - wall_desc = wall.get("ATTRIBUTE CODE DESCRIPTION", "") - wall_result = adequacy_result_by_text(wall_desc) + wall_code = wall["ATTRIBUTE CODE"] + if wall_code in {"NONE"}: # Means no insulation improvement required + wall_result = "pass" + else: + raise NotImplementedError("No other observed codes yet") else: raise NotImplementedError("Wall insulation data missing - pls check") - append_result(decent_homes, "wall_insulation_sufficient", wall_result) + append_result(decent_homes_meta, "wall_insulation_sufficient", wall_result) # ---------------- Criterion A overall ---------------- a_vars = set(HHSRS_MAPPING.keys()) - latest_a_results = {r["variable"]: r["result"] for r in decent_homes if r["variable"] in a_vars} + latest_a_results = {r["variable"]: r["result"] for r in decent_homes_meta if r["variable"] in a_vars} if any(v == "fail" for v in latest_a_results.values()): criterion_a_result = "fail" @@ -405,20 +544,21 @@ for fn in filenames: if is_flat: criterion_c_vars.append("adequate_common_entrance_areas") - latest_c_results = {r["variable"]: r["result"] for r in decent_homes if r["variable"] in criterion_c_vars} + latest_c_results = {r["variable"]: r["result"] for r in decent_homes_meta if r["variable"] in criterion_c_vars} count_fails = sum(1 for v in latest_c_results.values() if v == "fail") # optionally count no_data too if you want strict interpretation criterion_c_result = "fail" if count_fails >= 3 else "pass" # ---------------- Criterion D overall ---------------- + # Needs to have both efficient geating and distribution so all should pass criterion_d_vars = [ "efficient_heating_system_type", "efficient_heating_distribution", "loft_insulation_sufficient", "wall_insulation_sufficient", ] - latest_d_results = {r["variable"]: r["result"] for r in decent_homes if r["variable"] in criterion_d_vars} + latest_d_results = {r["variable"]: r["result"] for r in decent_homes_meta if r["variable"] in criterion_d_vars} if any(v == "fail" for v in latest_d_results.values()): criterion_d_result = "fail" @@ -429,7 +569,7 @@ for fn in filenames: # ---------------- Append to property_decent_homes ---------------- property_decent_homes.append({ - "uprn": property_info.get("UPRN"), # update field name if needed + "uprn": property_info.get("UPRN"), # TODO: Need UPRN "creation_date": datetime.now().date().isoformat(), "criterion_a": criterion_a_result, "criterion_b": None, # not yet implemented @@ -438,5 +578,6 @@ for fn in filenames: "decent_homes": ( criterion_a_result == "pass" and criterion_c_result == "pass" + and criterion_d_result == "pass" ) }) From d68ef88b9db7735a55a74732a58dabe5f3ff8463 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 22 Sep 2025 14:57:58 +0100 Subject: [PATCH 005/202] wip --- .../waltham_forest/decent_homes_pilot.py | 107 ++++++++++++++---- 1 file changed, 85 insertions(+), 22 deletions(-) diff --git a/etl/customers/waltham_forest/decent_homes_pilot.py b/etl/customers/waltham_forest/decent_homes_pilot.py index b59168fb..ba9bb3b7 100644 --- a/etl/customers/waltham_forest/decent_homes_pilot.py +++ b/etl/customers/waltham_forest/decent_homes_pilot.py @@ -5,6 +5,8 @@ import pandas as pd from datetime import datetime +from docutils.nodes import table + def years_between(d1, d2): # precise year difference (accounts for months/days) @@ -16,13 +18,14 @@ def get_element(elements, label): return elements.get(label) -def append_result(decent_homes_meta, variable, result, install_date=None): +def append_result(decent_homes_meta, variable, result, install_date=None, expiry_date=None): decent_homes_meta.append({ "variable": variable, "result": result, "hhsrs_rank": None, "hhsrs_score": None, - "install_date": install_date + "install_date": install_date, + "expiry_date": expiry_date, }) @@ -165,11 +168,13 @@ B_COMPONENT_LABELS = { "Roof Structure 1 in External Area", "Roof Structure 2 in External Area", "Roof Structure 3 in External Area", - # If you later decide to include ancillary items, add: - # "Fascia / Soffit / Bargeboard in External Area", - # "Gutters in External Area", "Downpipes in External Area", - # "Internal Downpipes in External Area", - # and give them a clear condition rule. + "Garage Roof in External Area", + "Garage and Store Roofs in External Area", + "Store Roof in External Area", + "Fascia / Soffit / Bargeboard in External Area", + "Gutters in External Area", + "Downpipes in External Area", + "Internal Downpipes in External Area" ], "roof_finish": [ "Roof Covering 1 in External Area", @@ -198,20 +203,15 @@ B_COMPONENT_LABELS = { "Store Door in External Area", ], "central_heating_boiler": [ - # If the dataset exposes a specific boiler element, put it here. - # For now we only have "Heating Improvement Required in Property" elsewhere (Criterion D), - # which isn't reliable for age. If your JSON later includes a boiler line with INSTALL DATE, - # add its label here. + # TODO ], "heating_other": [ - # e.g., gas fires/storage heaters if present as discrete elements later. + # TODO ], "electrical_systems": [ - # If you have an installation line with dates (e.g. "Electrics Required in Property") - # add it here; we will rely on INSTALL DATE + REMAINING LIFE. + # TODO "Electrics Required in Property", ], - # Other components "kitchen": [ "Adequacy of Kitchen and Type in Property", @@ -287,9 +287,10 @@ COMPONENT_LIFESPANS = { } # Database design -# creation_date, uprn, variable, result, hhsrs_score (optional, numeric), hhsrs_rank (A-J), install_date (for -# components which expire, e.g. kitchen) +# creation_date, uprn, variable, result (pass/fail/nodata), hhsrs_score (optional, numeric), hhsrs_rank (A-J), +# install_date (for components which expire, e.g. kitchen), remaining_life (for components which expire, e.g. kitchen), +# TODO: Add the criterion decent_homes_meta = [] # Use to capture criterion A, B, C and D. Should be: # {"uprn": int, "creation_date": datetime, "criterion_a": bool, "criterion_b": bool, "criterion_c": bool, @@ -303,12 +304,15 @@ for fn in filenames: if property_info["PROP TYPE"] in ["HOU"]: property_type = "house" elif property_info["PROP TYPE"] == "FLA": - raise Exception("Implement distrinction between below and above 6 storeys") + raise NotImplementedError("Implement distrinction between below and above 6 storeys") # property_type = "flat" else: raise NotImplementedError("Unknown property type") # ---------------- Criterion A ---------------- + # TODO: Map out the sub-information + # Critrion A: pass/fail + # If fail, why? for hhsrs_variable, mapping in HHSRS_MAPPING.items(): element_code = list(mapping.keys())[0] @@ -347,19 +351,36 @@ for fn in filenames: component_pass_or_fail = [] # TODO: Delete me - component, labels = list(B_COMPONENT_LABELS.items())[1] + component, labels = list(B_COMPONENT_LABELS.items())[9] + label = labels[0] + # TODO: need to handle the case where there is no survey data at all for a component for component, labels in B_COMPONENT_LABELS.items(): # TODO: labels may not need to be multiple variables for label in labels: # Grab the label label_data = get_element(data["elements"], label) + if label_data["ATTRIBUTE CODE"] in ["UNKNOWN", "NONE", "UNKNOWNG", "UNKNOWNS"]: + # This isn't applicable + component_pass_or_fail.append( + { + "component": component, + "label": label, + "install_date": None, + "remaining_life": None, + "is_old": False, + "has_failed": False, + "result": "pass", + "appliable": False + } + ) + continue # 1) We check if the component is old install_date = pd.to_datetime(label_data["INSTALL DATE"]) if pd.isnull(install_date): raise ValueError("Missing install date - pls check") component_lifetime = COMPONENT_LIFESPANS[component][property_type] # This should be populated, and for the pilot it's okay if this errors if missing - we'll handle accordingly - is_old = years_between(today.to_pydatetime(), install_date.to_pydatetime()) >= component_lifetime + is_old = years_between(today.to_pydatetime(), install_date.to_pydatetime()) > component_lifetime # 2) We check if the component is in poor condition if pd.isnull(label_data["REMAINING LIFE"]): raise ValueError("Missing remaining life - pls check") @@ -369,15 +390,54 @@ for fn in filenames: component_pass_or_fail.append( { "component": component, + "component_type": "key" if component in KEY_COMPONENTS else "other", + "component_sub_description": label_data["ATTRIBUTE CODE DESCRIPTION"], "label": label, "install_date": str(install_date), "remaining_life": label_data["REMAINING LIFE"], "is_old": is_old, "has_failed": has_failed, - "result": component_result + "result": component_result, + "appliable": True } ) + # TODO: We need to check by component + # Example of a pass for a component + # [ + # {"component": "external_walls", "component_type": "key", "descr": "A", "result": "pass"}, + # {"component": "external_walls", "component_type": "key", "descr": "B", "result": "pass"}, + # {"component": "external_walls", "component_type": "key", "descr": "C", "result": "pass"}, + # ] + + # Example of a fail for a component + # [ + # {"component": "external_walls", "component_type": "key", "descr": "A", "result": "pass"}, + # {"component": "external_walls", "component_type": "key", "descr": "B", "result": "fail"}, + # {"component": "external_walls", "component_type": "key", "descr": "C", "result": "pass"}, + # ] + + # Example of a no data for a component + # [ + # {"component": "external_walls", "component_type": "key", "descr": "A", "result": "pass"}, + # {"component": "external_walls", "component_type": "key", "descr": "B", "result": "nodata", "appliable": True}, + # {"component": "external_walls", "component_type": "key", "descr": "C", "result": "pass"}, + # ] + # OR + # Everything is unknown + # [ + # {"component": "external_walls", "component_type": "key", "descr": "A", "result": "pass", "appliable": False}, + # {"component": "external_walls", "component_type": "key", "descr": "B", "result": "pass", "appliable": False}, + # {"component": "external_walls", "component_type": "key", "descr": "C", "result": "pass", "appliable": False}, + # ] + + # Component 1: pass/fail, key: true/False + # Component 2: pass/fail, key: true/False + # Component 3: pass/fail, key: true/False + # Component 4: pass/fail, key: true/False + # Component 4: pass/fail, key: true/False + # -> Decide on outcome. If failure of 1 key component -> fail criterion B, or 2 other components -> fail criterion B + # ---------------- Criterion C ---------------- today = pd.Timestamp.today().normalize() @@ -396,7 +456,8 @@ for fn in filenames: else: raise NotImplementedError("Kitchen data missing - pls check") append_result( - decent_homes_meta, "kitchen_less_than_20_years_old", kitchen_age_result, install_date=str(kit_install) + decent_homes_meta, "kitchen_less_than_20_years_old", kitchen_age_result, + install_date=str(kit_install), expiry_date=str(kit_next_due) ) # 2) Kitchen adequate space/layout @@ -533,6 +594,8 @@ for fn in filenames: else: criterion_a_result = "no_data" + # ---------------- Criterion B overall ---------------- + # ---------------- Criterion C overall ---------------- criterion_c_vars = [ "kitchen_less_than_20_years_old", From a22db51be9ffedd782c38f4120b4cb5e26e23919 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 22 Sep 2025 22:33:31 +0100 Subject: [PATCH 006/202] pilot implementation --- .../waltham_forest/decent_homes_pilot.py | 319 ++++++++++++------ 1 file changed, 215 insertions(+), 104 deletions(-) diff --git a/etl/customers/waltham_forest/decent_homes_pilot.py b/etl/customers/waltham_forest/decent_homes_pilot.py index ba9bb3b7..33836236 100644 --- a/etl/customers/waltham_forest/decent_homes_pilot.py +++ b/etl/customers/waltham_forest/decent_homes_pilot.py @@ -18,9 +18,11 @@ def get_element(elements, label): return elements.get(label) -def append_result(decent_homes_meta, variable, result, install_date=None, expiry_date=None): +def append_result(decent_homes_meta, criteria, variable, sub_variable, result, install_date=None, expiry_date=None): decent_homes_meta.append({ + "criteria": criteria, "variable": variable, + "sub_variable": sub_variable, "result": result, "hhsrs_rank": None, "hhsrs_score": None, @@ -75,6 +77,44 @@ HHSRS_VARIABLES = [ "structural_collapse_and_falling_elements" ] +ELEMENT_CODE_TO_DESCRIPTION = { + # One-to-one + "HHSRSDAMP": "damp_and_mould_growth", + "HHSRSCOLD": "excess_cold", + "HHSRSHEAT": "excess_heat", + "HHSRSASB": "asbestos_and_mm_fibres", + "HHSRSBIOC": "biocides", + "HHSRSLEAD": "lead", + "HHSRSRADIA": "radiation", + "HHSRSFUEL": "uncombusted_fuel_gas", + "HHSRSORGAN": "volatile_organic_compounds", + "HHSRSCROWD": "crowding_and_space", + "HHSRSENTRY": "entry_by_intruders", + "HHSRSLIGHT": "lighting", + "HHSRSNOISE": "noise", + "HHSRSDOMES": "domestic_hygiene_pests_and_refuse", + "HHSRSFOOD": "food_safety", + "HHSRSPERS": "personal_hygiene_sanitation_and_drainage", + "HHSRSWATER": "water_supply", + "HHSRSFBATH": "falls_associated_with_baths", + "HHSRSFLEVE": "falls_on_level_surfaces", + "HHSRSFSTAI": "falls_on_stairs_and_steps", + "HHSRSFBETW": "falls_between_levels", + "HHSRSELEC": "electrical_hazards", + "HHSRSFIRE": "fire", + "HHSRSFLAME": "flames_hot_surfaces_and_materials", + "HHSRSEXPLO": "explosions", + "HHSRSPOSI": "ergonomics", + "HHSRSSTRUC": "structural_collapse_and_falling_elements", + + # One-to-many expansions + "HHSRSCO": "carbon_monoxide", + "HHSRSSO2": "sulphur_dioxide_and_smoke", + "HHSRSNO2": "nitrogen_dioxide", + "HHSRSENTRP": "collision_and_entrapment", + "HHSRSCLOW": "collision_hazards_and_low_headroom", +} + CRITERION_B_VARIABLES = [ "external_walls_structure", "lintels", "brickwork_spalling", "wall_finish", "roof_structure", "roof_finish", "chimneys", "windows", "external_doors", "kitchens", "bathrooms", "central_heating_boiler", @@ -203,13 +243,16 @@ B_COMPONENT_LABELS = { "Store Door in External Area", ], "central_heating_boiler": [ - # TODO + # "Heating Improvement Required in Property", + "Boiler Fuel in Property", + "Type of Water Heating in Property", ], "heating_other": [ - # TODO + # "Heating Distribution System in Property" + "Boiler Fuel in Property", + "Type of Water Heating in Property", ], "electrical_systems": [ - # TODO "Electrics Required in Property", ], # Other components @@ -300,6 +343,8 @@ for fn in filenames: with open(os.path.join(folder, fn), "rb") as f: data = json.load(f) + today = pd.Timestamp.today().normalize() + property_info = data["property_info"] if property_info["PROP TYPE"] in ["HOU"]: property_type = "house" @@ -310,7 +355,6 @@ for fn in filenames: raise NotImplementedError("Unknown property type") # ---------------- Criterion A ---------------- - # TODO: Map out the sub-information # Critrion A: pass/fail # If fail, why? for hhsrs_variable, mapping in HHSRS_MAPPING.items(): @@ -331,115 +375,97 @@ for fn in filenames: else: raise ValueError("Unknown attribute code") check_pass.append(result) + append_result( + decent_homes_meta, + criteria="A", + variable=hhsrs_variable, + sub_variable=ELEMENT_CODE_TO_DESCRIPTION[element_code], + result=result, + install_date=None, + expiry_date=None, + ) # We check if we have a pass, fail or no_data - if all([x == "pass" for x in check_pass]): - hhsrs_result = "pass" - elif any([x == "fail" for x in check_pass]): - hhsrs_result = "fail" - elif any([x == "no_data" for x in check_pass]): - hhsrs_result = "no_data" - else: - raise NotImplementedError("Mixed results not implemented") - decent_homes_meta.append( - {"variable": hhsrs_variable, 'result': hhsrs_result, "hhsrs_rank": None, "hhsrs_score": None, - "install_date": None} - ) + # if all([x == "pass" for x in check_pass]): + # hhsrs_result = "pass" + # elif any([x == "fail" for x in check_pass]): + # hhsrs_result = "fail" + # elif any([x == "no_data" for x in check_pass]): + # hhsrs_result = "no_data" + # else: + # raise NotImplementedError("Mixed results not implemented") # ---------------- Criterion B ---------------- # Check each of the components - component_pass_or_fail = [] - # TODO: Delete me - component, labels = list(B_COMPONENT_LABELS.items())[9] - label = labels[0] - # TODO: need to handle the case where there is no survey data at all for a component + # ---------------- Criterion B ---------------- + property_boiler = get_element(data["elements"], "Boiler Fuel in Property") + for component, labels in B_COMPONENT_LABELS.items(): - # TODO: labels may not need to be multiple variables for label in labels: - # Grab the label label_data = get_element(data["elements"], label) + + # Handle no-data or not-applicable if label_data["ATTRIBUTE CODE"] in ["UNKNOWN", "NONE", "UNKNOWNG", "UNKNOWNS"]: - # This isn't applicable - component_pass_or_fail.append( - { - "component": component, - "label": label, - "install_date": None, - "remaining_life": None, - "is_old": False, - "has_failed": False, - "result": "pass", - "appliable": False - } - ) + # append_result( + # decent_homes_meta, + # criteria="B", + # variable=component, + # sub_variable=label, + # result="pass", + # install_date=None, + # expiry_date=None, + # ) continue - # 1) We check if the component is old + + # Special skip conditions for heating + no_boiler_condition = ( + property_boiler["ATTRIBUTE CODE"] in ["NONENOCH"] + and component == "central_heating_boiler" + ) + other_heating_condition = ( + label_data["ATTRIBUTE CODE"] in ["NONENOCH"] + and component == "heating_other" + ) + if no_boiler_condition or other_heating_condition: + # append_result( + # decent_homes_meta, + # criteria="B", + # variable=component, + # sub_variable=label, + # result="pass", + # install_date=None, + # expiry_date=None, + # ) + continue + + # Normal case: evaluate install date + lifetime + remaining life install_date = pd.to_datetime(label_data["INSTALL DATE"]) if pd.isnull(install_date): - raise ValueError("Missing install date - pls check") + raise ValueError(f"Missing install date for {component}/{label}") + component_lifetime = COMPONENT_LIFESPANS[component][property_type] - # This should be populated, and for the pilot it's okay if this errors if missing - we'll handle accordingly is_old = years_between(today.to_pydatetime(), install_date.to_pydatetime()) > component_lifetime - # 2) We check if the component is in poor condition + if pd.isnull(label_data["REMAINING LIFE"]): - raise ValueError("Missing remaining life - pls check") + raise ValueError(f"Missing remaining life for {component}/{label}") has_failed = label_data["REMAINING LIFE"] < 0 - # The component needs to have both failed and be old to fail criterion B + + expiry_date = install_date + pd.DateOffset(years=component_lifetime) component_result = "fail" if is_old and has_failed else "pass" - component_pass_or_fail.append( - { - "component": component, - "component_type": "key" if component in KEY_COMPONENTS else "other", - "component_sub_description": label_data["ATTRIBUTE CODE DESCRIPTION"], - "label": label, - "install_date": str(install_date), - "remaining_life": label_data["REMAINING LIFE"], - "is_old": is_old, - "has_failed": has_failed, - "result": component_result, - "appliable": True - } + + # Push into decent_homes_meta + append_result( + decent_homes_meta, + criteria="B", + variable=component, + sub_variable=label, + result=component_result, + install_date=str(install_date), + expiry_date=str(expiry_date), ) - # TODO: We need to check by component - # Example of a pass for a component - # [ - # {"component": "external_walls", "component_type": "key", "descr": "A", "result": "pass"}, - # {"component": "external_walls", "component_type": "key", "descr": "B", "result": "pass"}, - # {"component": "external_walls", "component_type": "key", "descr": "C", "result": "pass"}, - # ] - - # Example of a fail for a component - # [ - # {"component": "external_walls", "component_type": "key", "descr": "A", "result": "pass"}, - # {"component": "external_walls", "component_type": "key", "descr": "B", "result": "fail"}, - # {"component": "external_walls", "component_type": "key", "descr": "C", "result": "pass"}, - # ] - - # Example of a no data for a component - # [ - # {"component": "external_walls", "component_type": "key", "descr": "A", "result": "pass"}, - # {"component": "external_walls", "component_type": "key", "descr": "B", "result": "nodata", "appliable": True}, - # {"component": "external_walls", "component_type": "key", "descr": "C", "result": "pass"}, - # ] - # OR - # Everything is unknown - # [ - # {"component": "external_walls", "component_type": "key", "descr": "A", "result": "pass", "appliable": False}, - # {"component": "external_walls", "component_type": "key", "descr": "B", "result": "pass", "appliable": False}, - # {"component": "external_walls", "component_type": "key", "descr": "C", "result": "pass", "appliable": False}, - # ] - - # Component 1: pass/fail, key: true/False - # Component 2: pass/fail, key: true/False - # Component 3: pass/fail, key: true/False - # Component 4: pass/fail, key: true/False - # Component 4: pass/fail, key: true/False - # -> Decide on outcome. If failure of 1 key component -> fail criterion B, or 2 other components -> fail criterion B - # ---------------- Criterion C ---------------- - today = pd.Timestamp.today().normalize() # Guard: property type string already set earlier is_flat = (property_info["PROP TYPE"] == "FLA") @@ -456,8 +482,13 @@ for fn in filenames: else: raise NotImplementedError("Kitchen data missing - pls check") append_result( - decent_homes_meta, "kitchen_less_than_20_years_old", kitchen_age_result, - install_date=str(kit_install), expiry_date=str(kit_next_due) + decent_homes_meta, + criteria="C", + variable="kitchen_less_than_20_years_old", + sub_variable="kitchen_less_than_20_years_old", + result=kitchen_age_result, + install_date=str(kit_install), + expiry_date=str(kit_next_due) ) # 2) Kitchen adequate space/layout @@ -470,7 +501,13 @@ for fn in filenames: raise NotImplementedError("No other observed codes yet") else: raise NotImplementedError("Kitchen data missing - pls check") - append_result(decent_homes_meta, "kitchen_adequate_space_and_layout", kitchen_adequacy_result) + append_result( + decent_homes_meta, + criteria="C", + variable="kitchen_adequate_space_and_layout", + sub_variable="kitchen_adequate_space_and_layout", + result=kitchen_adequacy_result, + ) # 3) Bathroom age ≤ 30 years bath = get_element(data["elements"], LABEL_BATHROOM) @@ -483,7 +520,13 @@ for fn in filenames: else: raise NotImplementedError("Bathroom data missing - pls check") append_result( - decent_homes_meta, "bathroom_less_than_30_years_old", bathroom_age_result, install_date=str(bth_install) + decent_homes_meta, + criteria="C", + variable="bathroom_less_than_30_years_old", + sub_variable="bathroom_less_than_30_years_old", + result=bathroom_age_result, + install_date=str(bth_install), + expiry_date=bth_next_due ) # 4) Bathroom/WC appropriately located @@ -496,7 +539,13 @@ for fn in filenames: else: raise NotImplementedError("Bathroom data missing - pls check") - append_result(decent_homes_meta, "bathroom_wc_appropriately_located", bathroom_location_result) + append_result( + decent_homes_meta, + criteria="C", + variable="bathroom_wc_appropriately_located", + sub_variable="bathroom_wc_appropriately_located", + result=bathroom_location_result + ) # 5) Adequate external noise insulation noise = get_element(data["elements"], LABEL_NOISE) @@ -508,7 +557,13 @@ for fn in filenames: raise NotImplementedError("No other observed codes yet") else: raise NotImplementedError("Noise insulation data missing - pls check") - append_result(decent_homes_meta, "adequate_external_noise_insulation", noise_result) + append_result( + decent_homes_meta, + criteria="C", + variable="adequate_external_noise_insulation", + sub_variable="adequate_external_noise_insulation", + result=noise_result + ) # 6) Adequate common entrance areas (flats only) if is_flat: @@ -535,7 +590,13 @@ for fn in filenames: else: raise NotImplementedError("Heating element missing in dataset") - append_result(decent_homes_meta, "efficient_heating_system_type", heating_type_result) + append_result( + decent_homes_meta, + criteria="D", + variable="efficient_heating_system_type", + sub_variable="efficient_heating_system_type", + result=heating_type_result + ) # heating distribution heating_dist = get_element(data["elements"], "Heating Distribution System in Property") @@ -550,7 +611,13 @@ for fn in filenames: else: raise NotImplementedError("Heating distribution element missing in dataset") - append_result(decent_homes_meta, "efficient_heating_distribution", heating_dist_result) + append_result( + decent_homes_meta, + criteria="D", + variable="efficient_heating_distribution", + sub_variable="efficient_heating_distribution", + result=heating_dist_result + ) # insulation loft = get_element(data["elements"], "Size in mm of Loft Insulation Thickness in Property") @@ -570,7 +637,13 @@ for fn in filenames: raise NotImplementedError("Unknown loft insulation code - pls check") else: raise NotImplementedError("Loft insulation data missing - pls check") - append_result(decent_homes_meta, "loft_insulation_sufficient", loft_result) + append_result( + decent_homes_meta, + criteria="D", + variable="loft_insulation_sufficient", + sub_variable="loft_insulation_sufficient", + result=loft_result + ) # Wall insulation check if wall: @@ -581,7 +654,13 @@ for fn in filenames: raise NotImplementedError("No other observed codes yet") else: raise NotImplementedError("Wall insulation data missing - pls check") - append_result(decent_homes_meta, "wall_insulation_sufficient", wall_result) + append_result( + decent_homes_meta, + criteria="D", + variable="wall_insulation_sufficient", + sub_variable="wall_insulation_sufficient", + result=wall_result + ) # ---------------- Criterion A overall ---------------- a_vars = set(HHSRS_MAPPING.keys()) @@ -596,6 +675,38 @@ for fn in filenames: # ---------------- Criterion B overall ---------------- + component_results = {} + + for component in B_COMPONENT_LABELS.keys(): + comp_rows = [r for r in decent_homes_meta if + r["criteria"] == "B" and r["variable"] == component and r["sub_variable"] is not None] + comp_sub_results = [r["result"] for r in comp_rows] + + if not comp_sub_results: # no rows at all + comp_result = "no_data" + elif any(r == "fail" for r in comp_sub_results): + comp_result = "fail" + elif all(r == "pass" for r in comp_sub_results if r != "no_data"): + comp_result = "pass" + elif all(r == "no_data" for r in comp_sub_results): + comp_result = "no_data" + else: + comp_result = "no_data" + + component_results[component] = comp_result + + key_fails = [c for c, r in component_results.items() if c in KEY_COMPONENTS and r == "fail"] + other_fails = [c for c, r in component_results.items() if c in OTHER_COMPONENTS and r == "fail"] + + if key_fails: + criterion_b_result = "fail" + elif len(other_fails) >= 2: + criterion_b_result = "fail" + elif all(r == "no_data" for r in component_results.values()): + criterion_b_result = "no_data" + else: + criterion_b_result = "pass" + # ---------------- Criterion C overall ---------------- criterion_c_vars = [ "kitchen_less_than_20_years_old", @@ -635,7 +746,7 @@ for fn in filenames: "uprn": property_info.get("UPRN"), # TODO: Need UPRN "creation_date": datetime.now().date().isoformat(), "criterion_a": criterion_a_result, - "criterion_b": None, # not yet implemented + "criterion_b": criterion_b_result, "criterion_c": criterion_c_result, "criterion_d": criterion_d_result, "decent_homes": ( From e410e8d9c862e1d46ec3270399af97a833f970dd Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 22 Sep 2025 22:36:47 +0100 Subject: [PATCH 007/202] minor tidy --- etl/customers/waltham_forest/decent_homes_pilot.py | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/etl/customers/waltham_forest/decent_homes_pilot.py b/etl/customers/waltham_forest/decent_homes_pilot.py index 33836236..0c7ea98f 100644 --- a/etl/customers/waltham_forest/decent_homes_pilot.py +++ b/etl/customers/waltham_forest/decent_homes_pilot.py @@ -1,12 +1,8 @@ import json import os - import pandas as pd - from datetime import datetime -from docutils.nodes import table - def years_between(d1, d2): # precise year difference (accounts for months/days) @@ -35,15 +31,6 @@ def append_result(decent_homes_meta, criteria, variable, sub_variable, result, i folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Waltham Forest/Decent Homes Pilot" filenames = ["flat 1.json", "house 1.json"] -houses_waltham_forest_data = pd.read_excel( - os.path.join(folder, "LBWF - Example Asset Data September 2025.xlsx"), - sheet_name="Houses Asset Data" -) -flats_waltham_forest_data = pd.read_excel( - os.path.join(folder, "LBWF - Example Asset Data September 2025.xlsx"), - sheet_name="CHINGFORD ROAD 236-254 Asset Bl" -) - # Standardised variables which will form the enums in the db HHSRS_VARIABLES = [ "damp_and_mould_growth", From 754644a8574827f4cf318b5fc2e6faa495fda2fb Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 24 Sep 2025 00:26:46 +0100 Subject: [PATCH 008/202] minor bug fix with solar --- backend/apis/GoogleSolarApi.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/backend/apis/GoogleSolarApi.py b/backend/apis/GoogleSolarApi.py index 532afec0..a8982061 100644 --- a/backend/apis/GoogleSolarApi.py +++ b/backend/apis/GoogleSolarApi.py @@ -854,18 +854,21 @@ class GoogleSolarApi: ): continue + solar_api_client = cls(api_key=google_solar_api_key, solar_materials=solar_materials) + if unit["longitude"] is None or unit["latitude"] is None: # At this point, we've checked that solar PV is valid, and so we provide some defaults property_instance.set_solar_panel_configuration( solar_panel_configuration={ "insights_data": None, - "panel_performance": cls.default_panel_performance(property_instance=property_instance), + "panel_performance": solar_api_client.default_panel_performance( + property_instance=property_instance + ), "unit_share_of_energy": 1 }, ) continue - solar_api_client = cls(api_key=google_solar_api_key, solar_materials=solar_materials) solar_api_client.get( longitude=unit["longitude"], latitude=unit["latitude"], From 7c5c7ceb0cdc989237d95be4f1dfa96ca7f4277f Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sun, 28 Sep 2025 15:34:05 +0100 Subject: [PATCH 009/202] added better logging on dupes and adding new types to material table --- backend/Funding.py | 5 +++++ backend/app/db/models/materials.py | 2 ++ backend/engine/engine.py | 5 ++++- 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/backend/Funding.py b/backend/Funding.py index 33c94e11..d590474c 100644 --- a/backend/Funding.py +++ b/backend/Funding.py @@ -578,6 +578,11 @@ class Funding: return pps.squeeze()["Cost Savings"] if measure_type == "flat_roof_insulation": + + # Not funding for properties starting at C or above + if self.starting_sap_band in ["Low_C", "High_C", "Low_B", "High_B", "Low_A", "High_A"]: + return 0 + pps = filtered_pps_matrix[filtered_pps_matrix["Measure_Type"] == "FRI"] if pps.shape[0] != 1: raise ValueError("Invalid FRI category") diff --git a/backend/app/db/models/materials.py b/backend/app/db/models/materials.py index 617ea0ac..347b66d5 100644 --- a/backend/app/db/models/materials.py +++ b/backend/app/db/models/materials.py @@ -45,6 +45,8 @@ class MaterialType(enum.Enum): scaffolding = "scaffolding" high_heat_retention_storage_heaters = "high_heat_retention_storage_heaters" sealing_fireplace = "sealing_fireplace" + roomstat_programmer_trvs = "roomstat_programmer_trvs" + time_temperature_zone_control = "time_temperature_zone_control" class DepthUnit(enum.Enum): diff --git a/backend/engine/engine.py b/backend/engine/engine.py index cc17222f..f4bffb17 100644 --- a/backend/engine/engine.py +++ b/backend/engine/engine.py @@ -534,7 +534,10 @@ async def model_engine(body: PlanTriggerRequest): if input_uprns: # Check for dupes if len(input_uprns) != len(set(input_uprns)): - raise ValueError("Duplicate UPRNs in the input data") + # Find the duplicate UPRNs + duplicates = set([x for x in input_uprns if input_uprns.count(x) > 1]) + # de-dupe input_uprns + raise ValueError(f"Duplicate UPRNs in the input data: {duplicates}") # If we have patches or overrides, we should read them in here patches, already_installed, non_invasive_recommendations, valuation_data = get_request_property_data(body) From 9f92e856d3bbb4f0f310126fb56febb98e28f587 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 1 Oct 2025 16:02:04 +0100 Subject: [PATCH 010/202] set up of AWS SES --- infrastructure/terraform/main.tf | 13 ++++ infrastructure/terraform/modules/ses/main.tf | 50 ++++++++++++++ .../terraform/modules/ses/outputs.tf | 66 +++++++++++++++++++ .../terraform/modules/ses/variables.tf | 9 +++ 4 files changed, 138 insertions(+) create mode 100644 infrastructure/terraform/modules/ses/main.tf create mode 100644 infrastructure/terraform/modules/ses/outputs.tf create mode 100644 infrastructure/terraform/modules/ses/variables.tf diff --git a/infrastructure/terraform/main.tf b/infrastructure/terraform/main.tf index 10ef31c2..c2840d62 100644 --- a/infrastructure/terraform/main.tf +++ b/infrastructure/terraform/main.tf @@ -261,4 +261,17 @@ module "cloudfront_distribution" { bucket_arn = module.s3.bucket_arn bucket_domain_name = module.s3.bucket_domain_name stage = var.stage +} + +################################################ +# SES - Email sending +################################################ +module "ses" { + source = "./modules/ses" + domain_name = "domna.homes" + stage = var.stage +} + +output "ses_dns_records" { + value = module.ses.dns_records } \ No newline at end of file diff --git a/infrastructure/terraform/modules/ses/main.tf b/infrastructure/terraform/modules/ses/main.tf new file mode 100644 index 00000000..e8f183ae --- /dev/null +++ b/infrastructure/terraform/modules/ses/main.tf @@ -0,0 +1,50 @@ +resource "aws_ses_domain_identity" "this" { + domain = var.domain_name +} + +# DKIM signing +resource "aws_ses_domain_dkim" "this" { + domain = aws_ses_domain_identity.this.domain +} + +# IAM user for SES SMTP +resource "aws_iam_user" "ses_user" { + name = "${var.stage}-ses-user" +} + +resource "aws_iam_user_policy" "ses_send_policy" { + name = "AllowSESSendEmail" + user = aws_iam_user.ses_user.name + + policy = jsonencode({ + Version = "2012-10-17" + Statement = [ + { + Effect = "Allow" + Action = [ + "ses:SendEmail", + "ses:SendRawEmail" + ] + Resource = "*" + } + ] + }) +} + +resource "aws_iam_access_key" "ses_user" { + user = aws_iam_user.ses_user.name +} + +# Store SMTP credentials in AWS Secrets Manager +resource "aws_secretsmanager_secret" "ses_smtp" { + name = "${var.stage}/ses/smtp_credentials" + description = "SMTP credentials for SES (${var.stage})" +} + +resource "aws_secretsmanager_secret_version" "ses_smtp" { + secret_id = aws_secretsmanager_secret.ses_smtp.id + secret_string = jsonencode({ + username = aws_iam_access_key.ses_user.id + password = aws_iam_access_key.ses_user.ses_smtp_password_v4 + }) +} \ No newline at end of file diff --git a/infrastructure/terraform/modules/ses/outputs.tf b/infrastructure/terraform/modules/ses/outputs.tf new file mode 100644 index 00000000..de708983 --- /dev/null +++ b/infrastructure/terraform/modules/ses/outputs.tf @@ -0,0 +1,66 @@ +# These are our DNS records that will need to be added to our Krystal account + +# TXT record +output "verification_record" { + description = "TXT record required to verify the domain with SES" + value = { + name = "_amazonses.${aws_ses_domain_identity.this.domain}" + type = "TXT" + value = aws_ses_domain_identity.this.verification_token + } +} + +# DKIM CNAME records +output "dkim_records" { + description = "CNAME records required to enable DKIM for SES" + value = [ + for dkim in aws_ses_domain_dkim.this.dkim_tokens : { + name = "${dkim}._domainkey.${aws_ses_domain_identity.this.domain}" + type = "CNAME" + value = "${dkim}.dkim.amazonses.com" + } + ] +} + +# SMTP credentials - send them to secrets manager +output "ses_smtp_secret_arn" { + description = "ARN of the SES SMTP credentials stored in Secrets Manager" + value = aws_secretsmanager_secret.ses_smtp.arn +} + +output "smtp_password" { + value = aws_iam_access_key.ses_user.ses_smtp_password_v4 + sensitive = true + description = "SMTP password for SES" +} + +output "dns_records" { + description = "All DNS records required for SES verification and recommended deliverability" + value = concat( + [ + { + name = "_amazonses.${aws_ses_domain_identity.this.domain}" + type = "TXT" + value = aws_ses_domain_identity.this.verification_token + }, + { + name = var.domain_name + type = "TXT" + value = "v=spf1 include:amazonses.com -all" + }, + { + name = "_dmarc.${var.domain_name}" + type = "TXT" + value = "v=DMARC1; p=quarantine; rua=mailto:postmaster@${var.domain_name}" + } + ], + [ + for dkim in aws_ses_domain_dkim.this.dkim_tokens : { + name = "${dkim}._domainkey.${aws_ses_domain_identity.this.domain}" + type = "CNAME" + value = "${dkim}.dkim.amazonses.com" + } + ] + ) +} + diff --git a/infrastructure/terraform/modules/ses/variables.tf b/infrastructure/terraform/modules/ses/variables.tf new file mode 100644 index 00000000..d8c97d6d --- /dev/null +++ b/infrastructure/terraform/modules/ses/variables.tf @@ -0,0 +1,9 @@ +variable "domain_name" { + description = "The domain to verify with SES (e.g. domna.homes)" + type = string +} + +variable "stage" { + description = "Deployment stage (e.g. dev, prod)" + type = string +} From 930a5d83985502dfd10aee1ec5e8b531d38c4c62 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 1 Oct 2025 16:07:49 +0100 Subject: [PATCH 011/202] use 14.17 verion for rds --- infrastructure/terraform/main.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/infrastructure/terraform/main.tf b/infrastructure/terraform/main.tf index c2840d62..f345c985 100644 --- a/infrastructure/terraform/main.tf +++ b/infrastructure/terraform/main.tf @@ -66,7 +66,7 @@ resource "aws_security_group" "allow_db" { resource "aws_db_instance" "default" { allocated_storage = var.allocated_storage engine = "postgres" - engine_version = "14.13" + engine_version = "14.17" instance_class = var.instance_class db_name = var.database_name username = jsondecode(data.aws_secretsmanager_secret_version.db_credentials.secret_string)["db_assessment_model_username"] From aa03ef1b0f4ce0162a383304d79b14d9629e4954 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 10 Oct 2025 15:59:38 +0100 Subject: [PATCH 012/202] handle case of property already EPC C or above for funding --- recommendations/optimiser/funding_optimiser.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/recommendations/optimiser/funding_optimiser.py b/recommendations/optimiser/funding_optimiser.py index 03824ea0..73475fc0 100644 --- a/recommendations/optimiser/funding_optimiser.py +++ b/recommendations/optimiser/funding_optimiser.py @@ -231,8 +231,8 @@ def optimise_with_funding_paths(p, input_measures, housing_type, funding: Fundin # We now produce a fabric only path for ECO4 # We add in generic insulation funding paths (where there is no fixed measure) # Heating controls are only eligible if installed as part of a heating upgrade and so we do not include them - # here - if housing_type == "Social": + # here. We don't have an option if the property is a C or above + if housing_type == "Social" and p.data["current-energy-rating"] not in ["C", "B", "A"]: funding_paths = ( [ { @@ -301,7 +301,6 @@ def optimise_with_funding_paths(p, input_measures, housing_type, funding: Fundin # We log an error and skip this - we should not see any errors but we can probably get a reasonable # outcome for the end user without a complete termination of the process logger.error("Skipping fixed selection due to minimum insulation violation: %s", fixed) - blah continue scheme = _path_scheme(path_spec) @@ -829,6 +828,11 @@ def make_funding_paths(p, input_measures, housing_type, funding: Funding): :param funding: The funding object that provides methods to check eligibility and calculate funding. :return: """ + + # If the property is currently EPC C, there is no funding availability + if p.data["current-energy-rating"] in ["C", "B", "A"]: + return [], input_measures + # We handle the case of minimum insulation requirements. Whenever we have a heating system recommendation, # we *must* include an additional insulation measure, unless the property already has sufficient insulation. From 570d4630971ab772c274baba125ceac9442b5385 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 10 Oct 2025 18:55:02 +0100 Subject: [PATCH 013/202] allow for no build form --- backend/engine/engine.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/backend/engine/engine.py b/backend/engine/engine.py index f4bffb17..fa1f191c 100644 --- a/backend/engine/engine.py +++ b/backend/engine/engine.py @@ -484,12 +484,19 @@ async def model_engine(body: PlanTriggerRequest): plan_input["uprn"] = np.where(plan_input["estimated"].isin([1, True]), None, plan_input["uprn"]) # We handle the landlord property type and built form plan_input["property_type"] = plan_input["landlord_property_type"].copy() - plan_input["built_form"] = plan_input["landlord_built_form"].copy() + if "landlord_built_form" in plan_input.columns: + plan_input["built_form"] = plan_input["landlord_built_form"].copy() + else: + plan_input["built_form"] = None plan_input["property_type"] = np.where( plan_input["property_type"] == "unknown", plan_input["epc_property_type"], plan_input["property_type"] ) + + if "epc_archetype" not in plan_input.columns: + plan_input["epc_archetype"] = None + plan_input["built_form"] = np.where( plan_input["built_form"] == "unknown", plan_input["epc_archetype"], plan_input["built_form"] ) From e5272e2e64b97528f9c1ec8631ccdba468142c0f Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 13 Oct 2025 18:36:35 +0100 Subject: [PATCH 014/202] SAL for hyde --- .idea/Model.iml | 2 +- .idea/misc.xml | 2 +- asset_list/AssetList.py | 22 +++++++-- asset_list/app.py | 68 ++++++++++++++++++++++++++ asset_list/mappings/built_form.py | 20 +++++++- asset_list/mappings/heating_systems.py | 19 ++++++- asset_list/mappings/property_type.py | 20 +++++++- asset_list/mappings/roof.py | 27 +++++++++- asset_list/mappings/walls.py | 3 ++ 9 files changed, 174 insertions(+), 9 deletions(-) diff --git a/.idea/Model.iml b/.idea/Model.iml index c6561970..09f2e496 100644 --- a/.idea/Model.iml +++ b/.idea/Model.iml @@ -7,7 +7,7 @@ - + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml index 50cad4ca..fb10c6b0 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -3,7 +3,7 @@ - + diff --git a/asset_list/AssetList.py b/asset_list/AssetList.py index dce929ae..da20432b 100644 --- a/asset_list/AssetList.py +++ b/asset_list/AssetList.py @@ -309,6 +309,17 @@ class AssetList: 'NAME OF SURVEYOR' ] + # Solar non-intrusive fields + NON_INTRUSIVES_SOLAR_COLNAMES = [ + 'PV, ACCESS ISSUE, SEE NOTES', 'ROOF ORIENTATION', + 'AREA (m²) OF ROOF WHERE PV WILL BE SITUATED ', 'SHADING', + 'Roof Tiles - CONCRETE/SLATE/ROSEMARY', + 'NO. OF PANELS (Typical size of 420W panel is 1mx1.7m and need 30cm all the way around panels)', + 'SCAFFOLD REQUIRED? IF YES, ARE THERE ANY SURROUNDING ACCESS ISSUES - PLEASE DESCRIBE', + 'IF PANELS ARE GOING ON REAR PLEASE CHECK FOR SPACE FOR SCAFFOLDING - DESCRIBE ANY ISSUES BELOW', + 'DATE', 'NAME OF SURVEYOR' + ] + NON_INTRUSIVES_ELIGIBILITY_COLUMN = "Eligibility (Red/Yellow/Green)" OLD_FORMAT_NON_INTRUSIVE_COLNAMES = ['WFT Findings', 'ECO Eligibility'] @@ -461,6 +472,8 @@ class AssetList: self.new_format_non_insturives_present_v2 = 'TILE HUNG' in self.raw_asset_list.columns + self.solar_non_intrusives_present = "AREA (m²) OF ROOF WHERE PV WILL BE SITUATED" in self.raw_asset_list.columns + # Names of columns self.landlord_property_id = landlord_property_id self.address1_colname = address1_colname @@ -774,6 +787,9 @@ class AssetList: if self.new_format_non_insturives_present_v2: non_intrusive_columns += self.NON_INTRUSIVES_NEW_FORMAT_COLNAMES_V2 + if self.solar_non_intrusives_present: + non_intrusive_columns += self.NON_INTRUSIVES_SOLAR_COLNAMES + if self.old_format_non_intrusives_present: # We check if we have the ECO Eligibility column, which we might not have non_intrusive_columns = [ @@ -946,7 +962,7 @@ class AssetList: if self.phase: # We filter on just the properties that have had an inspection - if self.new_format_non_insturives_present_v2: + if self.new_format_non_insturives_present_v2 or self.solar_non_intrusives_present: self.standardised_asset_list = self.standardised_asset_list[ ~self.standardised_asset_list['NAME OF SURVEYOR'].isin( ["YET TO BE SURVEYED", "", None] @@ -1341,10 +1357,10 @@ class AssetList: # for identifying cavity jobs if self.non_intrusives_present and not self.old_format_non_intrusives_present: - if self.new_format_non_insturives_present_v2: + if self.new_format_non_insturives_present_v2 or self.solar_non_intrusives_present: existing_solar_non_intrusives_check = ( self.standardised_asset_list["non-intrusives: ROOF ORIENTATION"].str.strip().isin( - ["ALREADY HAS SOLAR PV"] + ["ALREADY HAS SOLAR PV", "ALREADY HAS PV"] ) ) else: diff --git a/asset_list/app.py b/asset_list/app.py index 833050fb..2903e083 100644 --- a/asset_list/app.py +++ b/asset_list/app.py @@ -59,6 +59,74 @@ def app(): Property UPRN """ + # Hyde - solar + data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Hyde/Solar" + data_filename = "Domna Property Analysis HYDE (Chichester Removed).xlsx" + sheet_name = "Electric Property Inspections" + postcode_column = 'Postcode' + address1_column = None # Is only patchily populated so we create it + address1_method = 'house_number_extraction' + fulladdress_column = "Address" + address_cols_to_concat = [] + missing_postcodes_method = None + landlord_year_built = None + landlord_os_uprn = None + landlord_property_type = "Property Type" + landlord_built_form = "Property Type" + landlord_wall_construction = "Walls " + landlord_roof_construction = "Roofs" + landlord_heating_system = "Heating" + landlord_existing_pv = None + landlord_property_id = "Address ID" + landlord_sap = "SAP" + outcomes_filename = None + outcomes_sheetname = None + outcomes_postcode = None + outcomes_houseno = None + outcomes_id = None + outcomes_address = None + master_filepaths = [] + master_id_colnames = [] + master_to_asset_list_filepath = None + phase = True + ecosurv_landlords = None + asset_list_header = 0 + landlord_block_reference = None + + # Hyde cavity + data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Hyde/Cavity" + data_filename = "Domna Property Analysis HYDE (Chichester Removed).xlsx" + sheet_name = "Cavity Inspections" + postcode_column = 'Postcode' + address1_column = None # Is only patchily populated so we create it + address1_method = 'house_number_extraction' + fulladdress_column = "Address" + address_cols_to_concat = [] + missing_postcodes_method = None + landlord_year_built = None + landlord_os_uprn = None + landlord_property_type = "Property Type" + landlord_built_form = "Property Type" + landlord_wall_construction = "Walls " + landlord_roof_construction = "Roofs" + landlord_heating_system = "Heating" + landlord_existing_pv = None + landlord_property_id = "Address ID" + landlord_sap = "SAP" + outcomes_filename = None + outcomes_sheetname = None + outcomes_postcode = None + outcomes_houseno = None + outcomes_id = None + outcomes_address = None + master_filepaths = [] + master_id_colnames = [] + master_to_asset_list_filepath = None + phase = True + ecosurv_landlords = None + asset_list_header = 0 + landlord_block_reference = None + # CDS - Sept 2025 data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/CDS/September 2025 Programme" data_filename = "Founder Estates CDS.xlsx" diff --git a/asset_list/mappings/built_form.py b/asset_list/mappings/built_form.py index bdd82883..b02b8810 100644 --- a/asset_list/mappings/built_form.py +++ b/asset_list/mappings/built_form.py @@ -439,5 +439,23 @@ BUILT_FORM_MAPPINGS = { 'Chalet - Wheelchair': 'unknown', 'Studio Flat': 'unknown', 'Bungalow - Attached': 'semi-detached', - 'ND': 'unknown' + 'ND': 'unknown', + + 'Maisonette: Mid Terrace: Mid Floor': 'mid-floor', + 'Maisonette: Semi Detached: Ground Floor': 'semi-detached', + 'Maisonette: Enclosed Mid Terrace: Ground Floor': 'enclosed mid-terrace', + 'Maisonette: Enclosed End Terrace: Ground Floor': 'end-terrace', + 'Maisonette: Mid Terrace: Ground Floor': 'mid-terrace', + 'Flat: Semi Detached: Basement': 'semi-detached', + 'Maisonette: Semi Detached: Top Floor': 'semi-detached', + 'Maisonette: Enclosed Mid Terrace: Mid Floor': 'enclosed mid-terrace', + 'Flat: Detached: Basement': 'detached', + 'Maisonette: Enclosed Mid Terrace: Top Floor': 'enclosed mid-terrace', + + 'Maisonette: End Terrace: Top Floor': 'top-floor', + 'House: Mid Terrace: Ground Floor': 'ground floor', + 'Maisonette: Semi Detached: Mid Floor': 'detached', + 'Maisonette: Detached: Mid Floor': 'detached', + 'Bungalow: EnclosedMidTerrace': 'enclosed mid-terrace' + } diff --git a/asset_list/mappings/heating_systems.py b/asset_list/mappings/heating_systems.py index 4ab8ca72..ffd1b198 100644 --- a/asset_list/mappings/heating_systems.py +++ b/asset_list/mappings/heating_systems.py @@ -477,6 +477,23 @@ HEATING_MAPPINGS = { 'Heat networks Heat networks (mains gas)': 'communal heating', 'ND Oil': 'oil fuel', - 'Boiler Biofuel': 'boiler - other fuel' + 'Boiler Biofuel': 'boiler - other fuel', + + 'Electric (direct acting) room heaters: Water- or oil-filled radiators': 'room heaters', + 'Other: Electric ceiling heating': 'electric ceiling', + 'Heat Pump: Electric Heat pumps: Air source heat pump with flow temperature <= 35°C': 'air source heat pump', + 'Oil room heaters: Room heater, 2000 or later': 'room heaters', + 'Electric Underfloor Heating: In screed above insulation (standard or off peak)': 'electric underfloor', + 'Heat Pump: Electric Heat pumps: Air source heat pump in other cases': 'air source heat pump', + 'Electric Storage Systems: Old (large volume) storage heaters': 'electric storage heaters', + + 'Gas (including LPG) room heaters: Condensing gas fire': 'room heaters', + 'Solid fuel room heaters: Open fire in grate': 'solid fuel', + 'Solid fuel room heaters: Open fire with back boiler (no radiators)': 'solid fuel', + 'Community Heating Systems: Community heat pump (RdSAP)': 'communal heating', + 'Gas (including LPG) room heaters: Gas fire, open flue, 1980 or later (open fronted), sitting proud of, ' + 'and sealed to, fireplace opening': 'room heaters', + 'Boiler: A rated Regular Boiler, System 2: Boiler: C rated Regular Boiler': 'boiler - other fuel', + 'Boiler: G rated Combi': 'gas condensing combi' } diff --git a/asset_list/mappings/property_type.py b/asset_list/mappings/property_type.py index 290e172a..88ec2934 100644 --- a/asset_list/mappings/property_type.py +++ b/asset_list/mappings/property_type.py @@ -343,5 +343,23 @@ PROPERTY_MAPPING = { 'bungalow': 'bungalow', 'flat': 'flat', 'FLA': 'flat', - 'HOU': 'house' + 'HOU': 'house', + + 'Maisonette: Mid Terrace: Mid Floor': 'maisonette', + 'Maisonette: Semi Detached: Ground Floor': 'maisonette', + 'Maisonette: Enclosed Mid Terrace: Ground Floor': 'maisonette', + 'Maisonette: Enclosed End Terrace: Ground Floor': 'maisonette', + 'Maisonette: Mid Terrace: Ground Floor': 'maisonette', + 'Flat: Semi Detached: Basement': 'flat', + 'Maisonette: Semi Detached: Top Floor': 'maisonette', + 'Maisonette: Enclosed Mid Terrace: Mid Floor': 'maisonette', + 'Flat: Detached: Basement': 'flat', + 'Maisonette: Enclosed Mid Terrace: Top Floor': 'maisonette', + + 'Maisonette: End Terrace: Top Floor': 'maisonette', + 'House: Mid Terrace: Ground Floor': 'house', + 'Bungalow: EnclosedMidTerrace': 'bungalow', + 'Maisonette: Semi Detached: Mid Floor': 'maisonette', + 'Maisonette: Detached: Mid Floor': 'maisonette' + } diff --git a/asset_list/mappings/roof.py b/asset_list/mappings/roof.py index 8ac926c0..0857b046 100644 --- a/asset_list/mappings/roof.py +++ b/asset_list/mappings/roof.py @@ -275,5 +275,30 @@ ROOF_CONSTRUCTION_MAPPINGS = { 'Pitched (vaulted ceiling) Non-joist': 'pitched unknown insulation', 'ND (inferred) ND (inferred)': 'unknown', 'Flat Non-joist': 'flat insulated', - 'Same dwelling above N/A': 'another dwelling above' + 'Same dwelling above N/A': 'another dwelling above', + + 'Flat: As Built, PitchedNormalLoftAccess: Unknown': 'flat unknown insulation', + 'PitchedNormalLoftAccess: Unknown, PitchedNormalNoLoftAccess: Unknown': 'pitched unknown insulation', + 'PitchedNormalLoftAccess: 400mm+': 'pitched insulated', + 'AnotherDwellingAbove: 150mm': 'another dwelling above', + 'Flat: 150mm': 'flat insulated', + 'AnotherDwellingAbove: 50mm': 'another dwelling above', + 'PitchedNormalNoLoftAccess: As Built': 'pitched no access to loft', + 'PitchedNormalLoftAccess: 250mm, PitchedWithSlopingCeiling: As Built': 'pitched insulated', + 'PitchedNormalLoftAccess: 200mm, PitchedWithSlopingCeiling: As Built': 'pitched insulated', + 'PitchedNormalLoftAccess: 350mm': 'pitched insulated', + 'PitchedNormalNoLoftAccess: 270mm': 'pitched no access to loft', + 'AnotherDwellingAbove: 100mm': 'another dwelling above', + + 'PitchedWithSlopingCeiling: Unknown': 'piched unknown insulation', + 'AnotherDwellingAbove: Unknown, Flat: As Built': 'another dwelling above', + 'Flat: Unknown, PitchedNormalLoftAccess: 25mm': 'flat unknown insulation', + 'SameDwellingAbove: Unknown': 'another dwelling above', + 'Flat: Unknown': 'flat unknown insulation', + 'Flat: 50mm, PitchedNormalLoftAccess: 100mm': 'flat insulated', + 'Flat: As Built, PitchedNormalLoftAccess: 250mm, PitchedWithSlopingCeiling: As Built': 'flat unknown insulation', + 'Flat: As Built, PitchedNormalLoftAccess: 400mm+': 'flat unknown insulation', + 'PitchedWithSlopingCeiling: As Built': 'pitched insulated', + 'PitchedNormalLoftAccess: As Built': 'pitched unknown insulation', + } diff --git a/asset_list/mappings/walls.py b/asset_list/mappings/walls.py index 73db586e..418ae9f8 100644 --- a/asset_list/mappings/walls.py +++ b/asset_list/mappings/walls.py @@ -353,4 +353,7 @@ WALL_CONSTRUCTION_MAPPINGS = { 'System built As-built': "uninsulated system built", 'System built Internal': 'insulated system built', + 'Cavity: AsBuilt (1976-1982), TimberFrame: AsBuilt': 'cavity unknown insulation', + 'Cavity: FilledCavityPlusExternal': 'filled cavity' + } From 93723697a18aeed93ef9d784fae9fff477cf62e8 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 27 Oct 2025 15:27:32 +0000 Subject: [PATCH 015/202] allow no valuation and work with relative --- .idea/Model.iml | 2 +- .idea/misc.xml | 2 +- asset_list/app.py | 110 ++++++++++++++++++++++++++- backend/SearchEpc.py | 5 +- backend/engine/engine.py | 46 +++++++++--- backend/ml_models/Valuation.py | 15 +++- etl/webscrape/Zoopla.py | 133 +++++++++++++++++++++++++-------- etl/webscrape/requirements.txt | 5 ++ 8 files changed, 265 insertions(+), 53 deletions(-) create mode 100644 etl/webscrape/requirements.txt diff --git a/.idea/Model.iml b/.idea/Model.iml index 09f2e496..c6561970 100644 --- a/.idea/Model.iml +++ b/.idea/Model.iml @@ -7,7 +7,7 @@ - + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml index fb10c6b0..50cad4ca 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -3,7 +3,7 @@ - + diff --git a/asset_list/app.py b/asset_list/app.py index 2903e083..20cf04f1 100644 --- a/asset_list/app.py +++ b/asset_list/app.py @@ -59,9 +59,111 @@ def app(): Property UPRN """ + # + data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Cambridge/" + data_filename = "22.10_Cambridge_west addresses.xlsx" + sheet_name = "Asset List" + postcode_column = 'Postcode' + address1_column = None + address1_method = "house_number_extraction" + fulladdress_column = "Full Address" + address_cols_to_concat = [] + missing_postcodes_method = None + landlord_year_built = None + landlord_os_uprn = None + landlord_property_type = None + landlord_built_form = None + landlord_wall_construction = None + landlord_roof_construction = None + landlord_heating_system = None + landlord_existing_pv = None + landlord_property_id = "id" + landlord_sap = None + outcomes_filename = None + outcomes_sheetname = None + outcomes_postcode = None + outcomes_houseno = None + outcomes_id = None + outcomes_address = None + master_filepaths = [] + master_id_colnames = [] + master_to_asset_list_filepath = None + phase = False + ecosurv_landlords = None + asset_list_header = 0 + landlord_block_reference = None + + # Property Box + data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/NRLA/Property Box" + data_filename = "Property Box Finance Portfolio.xlsx" + sheet_name = "Sheet1" + postcode_column = 'Postcode' + address1_column = None + address1_method = "house_number_extraction" + fulladdress_column = "Address 1" + address_cols_to_concat = [] + missing_postcodes_method = None + landlord_year_built = None + landlord_os_uprn = None + landlord_property_type = None + landlord_built_form = None + landlord_wall_construction = None + landlord_roof_construction = None + landlord_heating_system = None + landlord_existing_pv = None + landlord_property_id = "row_id" + landlord_sap = None + outcomes_filename = None + outcomes_sheetname = None + outcomes_postcode = None + outcomes_houseno = None + outcomes_id = None + outcomes_address = None + master_filepaths = [] + master_id_colnames = [] + master_to_asset_list_filepath = None + phase = False + ecosurv_landlords = None + asset_list_header = 0 + landlord_block_reference = "block_id" + + # CDS - able-to-pay + data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/CDS/Able to pay" + data_filename = "CDS_ASSET LIST_(2314).xlsx" + sheet_name = "Sheet1" + postcode_column = 'Property Address - Postcode' + address1_column = "Property Address - Line 1" + address1_method = None + fulladdress_column = "Property Address - Line 1" + address_cols_to_concat = [] + missing_postcodes_method = None + landlord_year_built = None + landlord_os_uprn = None + landlord_property_type = None + landlord_built_form = None + landlord_wall_construction = None + landlord_roof_construction = None + landlord_heating_system = None + landlord_existing_pv = None + landlord_property_id = "row_id" + landlord_sap = None + outcomes_filename = None + outcomes_sheetname = None + outcomes_postcode = None + outcomes_houseno = None + outcomes_id = None + outcomes_address = None + master_filepaths = [] + master_id_colnames = [] + master_to_asset_list_filepath = None + phase = False + ecosurv_landlords = None + asset_list_header = 0 + landlord_block_reference = None + # Hyde - solar data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Hyde/Solar" - data_filename = "Domna Property Analysis HYDE (Chichester Removed).xlsx" + data_filename = "Domna Property Analysis HYDE (Chichester Removed)V2-Completed.xlsx" sheet_name = "Electric Property Inspections" postcode_column = 'Postcode' address1_column = None # Is only patchily populated so we create it @@ -88,14 +190,14 @@ def app(): master_filepaths = [] master_id_colnames = [] master_to_asset_list_filepath = None - phase = True + phase = False ecosurv_landlords = None asset_list_header = 0 landlord_block_reference = None # Hyde cavity data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Hyde/Cavity" - data_filename = "Domna Property Analysis HYDE (Chichester Removed).xlsx" + data_filename = "Domna Property Analysis HYDE (Chichester Removed)V2-Completed.xlsx" sheet_name = "Cavity Inspections" postcode_column = 'Postcode' address1_column = None # Is only patchily populated so we create it @@ -122,7 +224,7 @@ def app(): master_filepaths = [] master_id_colnames = [] master_to_asset_list_filepath = None - phase = True + phase = False ecosurv_landlords = None asset_list_header = 0 landlord_block_reference = None diff --git a/backend/SearchEpc.py b/backend/SearchEpc.py index 16dd8f04..1a14e87a 100644 --- a/backend/SearchEpc.py +++ b/backend/SearchEpc.py @@ -347,7 +347,8 @@ class SearchEpc: # We update the data with the correct uprn if self.uprn: for x in api_response["response"]["rows"]: - x["uprn"] = self.uprn + if pd.isnull(x["uprn"]): + x["uprn"] = self.uprn data["rows"].extend(api_response["response"]["rows"]) @@ -357,6 +358,8 @@ class SearchEpc: row for row in data["rows"] if row["lmk-key"] not in seen and not seen.add(row["lmk-key"]) ] + # Overwrite the data + self.data = data if data["rows"]: api_response["msg"] = self.SUCCESS diff --git a/backend/engine/engine.py b/backend/engine/engine.py index fa1f191c..f2674290 100644 --- a/backend/engine/engine.py +++ b/backend/engine/engine.py @@ -145,14 +145,17 @@ def extract_portfolio_aggregation_data( cost = sum([r["total"] for r in default_recommendations]) sap_point_improvement = sum([r["sap_points"] for r in default_recommendations]) - lower_bound_valuation_uplift = ( - property_value_increase_ranges[p.id]["lower_bound_increased_value"] - - property_value_increase_ranges[p.id]["current_value"] - ) - upper_bound_valuation_uplift = ( - property_value_increase_ranges[p.id]["upper_bound_increased_value"] - - property_value_increase_ranges[p.id]["current_value"] - ) + if not pd.isnull(property_value_increase_ranges[p.id]["current_value"]): + lower_bound_valuation_uplift = ( + property_value_increase_ranges[p.id]["lower_bound_increased_value"] - + property_value_increase_ranges[p.id]["current_value"] + ) + upper_bound_valuation_uplift = ( + property_value_increase_ranges[p.id]["upper_bound_increased_value"] - + property_value_increase_ranges[p.id]["current_value"] + ) + else: + lower_bound_valuation_uplift, upper_bound_valuation_uplift = 0, 0 agg_data.append({ "pre_retrofit_epc": p.data["current-energy-rating"], @@ -523,6 +526,7 @@ async def model_engine(body: PlanTriggerRequest): plan_input["built_form"] = plan_input["built_form"].map(built_form_map) plan_input = plan_input.to_dict("records") + else: raise ValueError("Other formats not yet supported") @@ -549,6 +553,13 @@ async def model_engine(body: PlanTriggerRequest): # If we have patches or overrides, we should read them in here patches, already_installed, non_invasive_recommendations, valuation_data = get_request_property_data(body) + if body.file_type == "xlsx" and body.file_format == "domna_asset_list": + # We check if we have valution data + if not valuation_data and body.valuation_file_path in [None, ""]: + # We check plan_input + if "domna_valuation" in plan_input[0]: + valuation_data = [{"uprn": x["uprn"], "valuation": x["domna_valuation"]} for x in plan_input] + cleaning_data = read_dataframe_from_s3_parquet( bucket_name=get_settings().DATA_BUCKET, file_key="sap_change_model/cleaning_dataset.parquet", ) @@ -563,12 +574,22 @@ async def model_engine(body: PlanTriggerRequest): if uprn: uprn = int(float(uprn)) + address1 = config.get("address", None) + # Handle domna address list format + if pd.isnull(address1) and body.file_format == "domna_asset_list": + address1 = config.get("domna_full_address", None) + + address1 = str(int(address1)) if isinstance(address1, float) else str(address1) + + full_address = config["domna_full_address"] if body.file_format == "domna_asset_list" else None + epc_searcher = SearchEpc( - address1=str(config["address"]), + address1=address1, postcode=config["postcode"], uprn=uprn, auth_token=get_settings().EPC_AUTH_TOKEN, os_api_key="", + full_address=full_address ) epc_searcher.ordnance_survey_client.built_form = config.get("built_form", None) epc_searcher.ordnance_survey_client.property_type = config.get("property_type", None) @@ -1176,9 +1197,10 @@ async def model_engine(body: PlanTriggerRequest): upload_funding(session, p, new_plan_id, recommendations_to_upload) - property_valuation_increases.append( - valuations["average_increased_value"] - valuations["current_value"] - ) + if valuations["current_value"] > 0: + property_valuation_increases.append( + valuations["average_increased_value"] - valuations["current_value"] + ) # Commit the session after each batch session.commit() diff --git a/backend/ml_models/Valuation.py b/backend/ml_models/Valuation.py index 8c57900f..17db0dae 100644 --- a/backend/ml_models/Valuation.py +++ b/backend/ml_models/Valuation.py @@ -219,12 +219,19 @@ class PropertyValuation: current_epc = property_instance.data["current-energy-rating"] if not current_value: + # In this case, we return a % improvement rather than an absolute + relative_improvement = cls.estimate_valuation_improvement( + current_value=1, + current_epc=current_epc, + target_epc=target_epc, + total_cost=1 + ) return { "current_value": 0, - "lower_bound_increased_value": 0, - "upper_bound_increased_value": 0, - "average_increased_value": 0, - "average_increase": 0 + "lower_bound_increased_value": relative_improvement["lower_bound_increased_value"] - 1, + "upper_bound_increased_value": relative_improvement["upper_bound_increased_value"] - 1, + "average_increased_value": relative_improvement["average_increased_value"] - 1, + "average_increase": relative_improvement["average_increase"] } return cls.estimate_valuation_improvement(current_value, current_epc, target_epc, total_cost) diff --git a/etl/webscrape/Zoopla.py b/etl/webscrape/Zoopla.py index bb86c759..7b3fd5b6 100644 --- a/etl/webscrape/Zoopla.py +++ b/etl/webscrape/Zoopla.py @@ -1,38 +1,111 @@ -# Initial Code - -from seleniumbase import SB +from bs4 import BeautifulSoup +import pandas as pd import time +from stealth_requests import StealthSession +import random +from multiprocessing import Pool +from tqdm import tqdm -uprns = [ - 100071297618, - 100080893397, - 100060778033, - 200004793081, - 100071265143, - 100071297618, - 100080893397, - 100060778033, - 200004793081, - 100071265143, -] +ENGINES = ["safari", "chrome"] -estimate_list = [] -for uprn in uprns: +def scrape_all_estimates(session, url): + # Rotate impersonation per request + resp = session.get(url, impersonate=ENGINES[random.randint(0, 1)]) + page_source = BeautifulSoup(resp.text, "html.parser") + estimates = page_source.find_all("div", {"data-testid": "sale-estimate"}) + is_blocked = len(estimates) == 0 + return estimates, is_blocked - # Probably can change the timings here - time.sleep(5) - with SB(uc=True) as sb: - sb.uc_open_with_reconnect( - f"https://www.zoopla.co.uk/property/uprn/{uprn}/", - 3, + +def parallel_task(url): + # No impersonate argument here + with StealthSession() as session: + estimates, is_blocked = scrape_all_estimates(session, url) + + while is_blocked: + print(f"Blocked by Zoopla for URL: {url}") + time.sleep(random.uniform(0, 1)) + estimates, is_blocked = scrape_all_estimates(session, url) + + low_estimate = estimates[0].find("span", {"data-testid": "low-estimate-blurred"}).text + middle_estimate = estimates[0].find("p", {"data-testid": "estimate-blurred"}).text + high_estimate = estimates[0].find("span", {"data-testid": "high-estimate-blurred"}).text + + return { + "URL": url, + "Low Estimate": low_estimate, + "Middle Estimate": middle_estimate, + "High Estimate": high_estimate, + } + + +def parse_price(p): + p = p.replace("£", "").strip().lower() + if p.endswith("k"): + return float(p[:-1]) * 1000 + elif p.endswith("m"): + return float(p[:-1]) * 1_000_000 + else: + return float(p) + + +# def parallel_task(url): +# with StealthSession(impersonate=ENGINES[random.randint(0, 1)]) as session: +# estimates, is_blocked = scrape_all_estimates(session, url) +# +# while is_blocked: +# # Will need to wait and retry if blocked by Zoopla +# print(f"Blocked by Zoopla for URL: {url}") +# sleep_factor = random.uniform(0, 1) # Random delay to avoid detection +# time.sleep(sleep_factor * 1) +# estimates, is_blocked = scrape_all_estimates(session, url) +# +# low_estimate = ( +# estimates[0].find("span", {"data-testid": "low-estimate-blurred"}).text +# ) # Find all span elements with data-testid="low-estimate" +# middle_estimate = ( +# estimates[0].find("p", {"data-testid": "estimate-blurred"}).text +# ) # Find all span elements with data-testid="middle-estimate" +# high_estimate = ( +# estimates[0].find("span", {"data-testid": "high-estimate-blurred"}).text +# ) # Find all span elements with data-testid="high-estimate-blurred" +# +# return { +# "URL": url, +# "Low Estimate": low_estimate, +# "Middle Estimate": middle_estimate, +# "High Estimate": high_estimate, +# } + + +if __name__ == "__main__": + # Get a SAL + asset_list = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/NRLA/Property Box/Property Box Finance Portfolio - " + "Standardised.xlsx", + sheet_name="Standardised Asset List" + ) + asset_list["epc_os_uprn"] = asset_list["epc_os_uprn"].astype(int).astype(str) + uprns = asset_list["epc_os_uprn"].tolist() + urls = [f"https://www.zoopla.co.uk/property/uprn/{uprn}/" for uprn in uprns] + + with Pool(processes=5) as pool: + estimates_list = list( + tqdm( + pool.imap(parallel_task, urls), + total=len(urls), + ) ) - soup = sb.get_beautiful_soup() + df = pd.DataFrame(estimates_list) + # Extract UPRN from URL + df["uprn"] = df["URL"].str.extract(r"uprn/(\d+)/") + df["valuation"] = df["Middle Estimate"].apply(parse_price) + df.to_csv("zoopla_estimates.csv", index=False) - estimates = soup.find_all("div", {"data-testid": "sale-estimate"}) - # Can change the way we extract the text here - estimate_text = ( - estimates[-1].find_all("p")[-1].find_all("span")[-1]["aria-label"] - ) - estimate_list.append(estimate_text) + df["uprn"] = df["uprn"].astype(int).astype(str) + + asset_list.merge(df[["uprn", "valuation"]], left_on="epc_os_uprn", right_on="uprn", how="left").to_excel( + "Property Box Finance Portfolio - Standardised - with valuations.xlsx", index=False + ) diff --git a/etl/webscrape/requirements.txt b/etl/webscrape/requirements.txt new file mode 100644 index 00000000..4027a224 --- /dev/null +++ b/etl/webscrape/requirements.txt @@ -0,0 +1,5 @@ +beautifulsoup4>=4.12.0 +pandas>=2.0.0 +stealth-requests>=1.0.7 +tqdm>=4.65.0 +openpyxl \ No newline at end of file From 504a714fc65ffe17f5f2c373ffeb3f0b2065956f Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 28 Oct 2025 13:43:10 +0000 Subject: [PATCH 016/202] fixed ranking algo for unfunded --- backend/engine/engine.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/backend/engine/engine.py b/backend/engine/engine.py index f2674290..f4152852 100644 --- a/backend/engine/engine.py +++ b/backend/engine/engine.py @@ -959,15 +959,19 @@ async def model_engine(body: PlanTriggerRequest): ) # Given the solutions we select the optimal one + # 1) If the scheme is ECO4, the full project funding and uplift are deducted from the cost + # 2) If the sheme is GBIS, the partial project funding and uplift are deducted from the cost + # 3) Otherwise, no funding is deducted from the cost solutions["cost_less_full_project_funding"] = np.where( - solutions["scheme"] == "eco4", - solutions["total_cost"] - solutions["full_project_funding"] - solutions["total_uplift"], - solutions["total_cost"] - solutions["partial_project_funding"] - solutions["total_uplift"] + solutions["scheme"] == "none", + solutions["total_cost"], + np.where( + solutions["scheme"] == "eco4", + solutions["total_cost"] - solutions["full_project_funding"] - solutions["total_uplift"], + solutions["total_cost"] - solutions["partial_project_funding"] - solutions["total_uplift"] + ) ) - solutions["cost_less_full_project_funding"] = ( - solutions["total_cost"] - solutions["full_project_funding"] - solutions["total_uplift"] - ) solutions = solutions.sort_values("cost_less_full_project_funding", ascending=True) if solutions["meets_upgrade_target"].any(): From b8fc16dac569a9282df0095168ae0868bb4cadf0 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 28 Oct 2025 14:41:28 +0000 Subject: [PATCH 017/202] added new measure --- backend/app/db/models/materials.py | 1 + 1 file changed, 1 insertion(+) diff --git a/backend/app/db/models/materials.py b/backend/app/db/models/materials.py index 347b66d5..9b38addd 100644 --- a/backend/app/db/models/materials.py +++ b/backend/app/db/models/materials.py @@ -19,6 +19,7 @@ class MaterialType(enum.Enum): flat_roof_insulation = "flat_roof_insulation" room_roof_insulation = "room_roof_insulation" windows_glazing = "windows_glazing" + secondary_glazing = "secondary_glazing" cavity_wall_extraction = "cavity_wall_extraction" iwi_wall_demolition = "iwi_wall_demolition" From f20b22187086ca4be5578b8123ae4af5de4b9e7e Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 28 Oct 2025 17:24:51 +0000 Subject: [PATCH 018/202] fixed fetching of incorrect find my epc data --- asset_list/app.py | 34 ++++++++++++++++++++++++++++ asset_list/mappings/built_form.py | 4 +++- asset_list/mappings/property_type.py | 4 +++- etl/find_my_epc/RetrieveFindMyEpc.py | 29 ++++++++++++++++-------- 4 files changed, 60 insertions(+), 11 deletions(-) diff --git a/asset_list/app.py b/asset_list/app.py index 20cf04f1..bb5cb427 100644 --- a/asset_list/app.py +++ b/asset_list/app.py @@ -59,6 +59,40 @@ def app(): Property UPRN """ + # Stonewater Solar + data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/October 2025 Solar" + data_filename = "Copy of AP Stonewater Ammended address list - PV AM Amended - Khalim initial review.xlsx" + sheet_name = "Proposed Sheet" + postcode_column = 'Postcode' + address1_column = None + address1_method = "house_number_extraction" + fulladdress_column = "Address" + address_cols_to_concat = [] + missing_postcodes_method = None + landlord_year_built = None + landlord_os_uprn = None + landlord_property_type = "Property Type" + landlord_built_form = "Property Type" + landlord_wall_construction = "Walls" + landlord_roof_construction = "Roofs" + landlord_heating_system = "Heating" + landlord_existing_pv = None + landlord_property_id = "Asset Id" + landlord_sap = "SAP" + outcomes_filename = None + outcomes_sheetname = None + outcomes_postcode = None + outcomes_houseno = None + outcomes_id = None + outcomes_address = None + master_filepaths = [] + master_id_colnames = [] + master_to_asset_list_filepath = None + phase = False + ecosurv_landlords = None + asset_list_header = 0 + landlord_block_reference = None + # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Cambridge/" data_filename = "22.10_Cambridge_west addresses.xlsx" diff --git a/asset_list/mappings/built_form.py b/asset_list/mappings/built_form.py index b02b8810..2556d755 100644 --- a/asset_list/mappings/built_form.py +++ b/asset_list/mappings/built_form.py @@ -456,6 +456,8 @@ BUILT_FORM_MAPPINGS = { 'House: Mid Terrace: Ground Floor': 'ground floor', 'Maisonette: Semi Detached: Mid Floor': 'detached', 'Maisonette: Detached: Mid Floor': 'detached', - 'Bungalow: EnclosedMidTerrace': 'enclosed mid-terrace' + 'Bungalow: EnclosedMidTerrace': 'enclosed mid-terrace', + + 'House: EnclosedMidTerrace': 'enclosed mid-terrace' } diff --git a/asset_list/mappings/property_type.py b/asset_list/mappings/property_type.py index 88ec2934..1c236d96 100644 --- a/asset_list/mappings/property_type.py +++ b/asset_list/mappings/property_type.py @@ -360,6 +360,8 @@ PROPERTY_MAPPING = { 'House: Mid Terrace: Ground Floor': 'house', 'Bungalow: EnclosedMidTerrace': 'bungalow', 'Maisonette: Semi Detached: Mid Floor': 'maisonette', - 'Maisonette: Detached: Mid Floor': 'maisonette' + 'Maisonette: Detached: Mid Floor': 'maisonette', + + 'House: EnclosedMidTerrace': 'house' } diff --git a/etl/find_my_epc/RetrieveFindMyEpc.py b/etl/find_my_epc/RetrieveFindMyEpc.py index 21794284..5bb5e39b 100644 --- a/etl/find_my_epc/RetrieveFindMyEpc.py +++ b/etl/find_my_epc/RetrieveFindMyEpc.py @@ -718,15 +718,26 @@ class RetrieveFindMyEpc: find_epc_data = searcher.retrieve_newest_find_my_epc_data() except Exception as e: logger.error(f"Error retrieving find my epc data: {e}") - if epc["address1"] == epc["address"]: - # There's no benefit of using the same address, so we split on comma - address1 = epc["address"].split(",")[0] - else: - address1 = epc["address1"] - # We attempt with the backup add - searcher = cls(address=address1, postcode=epc["postcode"]) - find_epc_data = searcher.retrieve_newest_find_my_epc_data() - logger.info("Successfully retrieved find my epc data using backup address") + + # We try two backup approaches. The first is to trim the final section off the end of the address + address1 = ",".join(epc["address"].split(",")[:-1]) + try: + searcher = cls(address=address1, postcode=epc["postcode"]) + find_epc_data = searcher.retrieve_newest_find_my_epc_data() + logger.info("Successfully retrieved find my epc data using trimmed address") + except Exception as e2: + logger.error(f"Error retrieving find my epc data using trimmed address: {e2}") + # Attempt final approach + + if epc["address1"] == epc["address"]: + # There's no benefit of using the same address, so we split on comma + address1 = epc["address"].split(",")[0] + else: + address1 = epc["address1"] + # We attempt with the backup add + searcher = cls(address=address1, postcode=epc["postcode"]) + find_epc_data = searcher.retrieve_newest_find_my_epc_data() + logger.info("Successfully retrieved find my epc data using backup address") non_invasive_recommendations = { "uprn": epc["uprn"], From 1c94db54ef5e0020f494943876157528695869a3 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 28 Oct 2025 17:56:43 +0000 Subject: [PATCH 019/202] corrected eligibiltiy criteria for EPC E, F, G EWI projects --- recommendations/optimiser/funding_optimiser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/recommendations/optimiser/funding_optimiser.py b/recommendations/optimiser/funding_optimiser.py index 73475fc0..8fbb13b2 100644 --- a/recommendations/optimiser/funding_optimiser.py +++ b/recommendations/optimiser/funding_optimiser.py @@ -896,7 +896,7 @@ def make_funding_paths(p, input_measures, housing_type, funding: Funding): # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # 1) The package must include EWI or IWI if the property is private rental sector # We check if we have any EWI or IWI measures available - only for EPC E or below - if p.data["current-energy-rating"] not in ["E", "F", "G"]: + if p.data["current-energy-rating"] in ["E", "F", "G"]: ewi_or_iwi = [{"OR": []}] reference_measures = [] # If we have EWI we add it in From ef934f6b7c13918e014182ee043514a18604c019 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 28 Oct 2025 19:21:53 +0000 Subject: [PATCH 020/202] debugged funding test --- backend/SearchEpc.py | 15 +- .../test_data/innovation_measure_fixtures.py | 40 +- backend/tests/test_funding.py | 102 +- backend/tests/test_integration.py | 1061 +++++++++-------- backend/tests/test_search_epc.py | 3 +- recommendations/tests/test_optimisers.py | 142 ++- 6 files changed, 756 insertions(+), 607 deletions(-) diff --git a/backend/SearchEpc.py b/backend/SearchEpc.py index 1a14e87a..60999e94 100644 --- a/backend/SearchEpc.py +++ b/backend/SearchEpc.py @@ -418,7 +418,20 @@ class SearchEpc: address, [", ".join([r["address"]]) for r in rows], score_cutoff=0 ) # Pick the largest score - if best_match1[1] >= best_match2[1]: + if best_match1[1] == best_match2[1]: + # if thery're the same, we'll work under the assumption that the addresses are the same and we'll + # take whichever has the newest EPC + rows_filtered = [ + r for r in rows + if (", ".join([r["address"], r["posttown"]]) == best_match1[0]) or + (r["address"] == best_match2[0]) + ] + rows_filtered = [ + r for r in rows_filtered + if r["lodgement-datetime"] == max([x["lodgement-datetime"] for x in rows_filtered]) + ] + + elif best_match1[1] > best_match2[1]: # Get all of the scores rows_filtered = [r for r in rows if ", ".join([r["address"], r["posttown"]]) == best_match1[0]] else: diff --git a/backend/tests/test_data/innovation_measure_fixtures.py b/backend/tests/test_data/innovation_measure_fixtures.py index 886421c4..a66cc7ec 100644 --- a/backend/tests/test_data/innovation_measure_fixtures.py +++ b/backend/tests/test_data/innovation_measure_fixtures.py @@ -4,7 +4,7 @@ innovation_scenarios = [ # 1) Innovation PV, non-eligible heating system in place, EPC D - not eligible { "description": "Innovation PV, non-eligible heating system in place, EPC D", - "measures": [{"type": "solar_pv", "is_innovation": True, "uplift": 0.45}], + "measures": [{"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45}], "starting_sap": 60, "mainheat_description": "Electric storage heaters", "heating_control_description": "Manual charge control", @@ -16,7 +16,7 @@ innovation_scenarios = [ # 2) Innovation PV, eligible heating system in place, EPC D - eligible { "description": "Innovation PV, eligible heating system in place, EPC D", - "measures": [{"type": "solar_pv", "is_innovation": True, "uplift": 0.45}], + "measures": [{"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45}], "starting_sap": 60, "mainheat_description": "Air source heat pump, radiators", "heating_control_description": "Programmer, room thermostat and TRVs", @@ -29,8 +29,8 @@ innovation_scenarios = [ { "description": "Innovation PV + HHRSH upgrade, EPC E", "measures": [ - {"type": "solar_pv", "is_innovation": True, "uplift": 0.45}, - {"type": "high_heat_retention_storage_heater", "is_innovation": True, "uplift": 0.1} + {"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45}, + {"type": "high_heat_retention_storage_heater", "is_innovation": False, "innovation_uplift": 0} ], "starting_sap": 50, "mainheat_description": "Electric storage heaters", @@ -44,8 +44,8 @@ innovation_scenarios = [ { "description": "Innovation PV + HHRSH upgrade, EPC E", "measures": [ - {"type": "solar_pv", "is_innovation": True, "uplift": 0.45}, - {"type": "high_heat_retention_storage_heater", "is_innovation": True, "uplift": 0.1} + {"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45}, + {"type": "high_heat_retention_storage_heater", "is_innovation": False, "innovation_uplift": 0} ], "starting_sap": 50, "mainheat_description": "Electric storage heaters", @@ -58,7 +58,7 @@ innovation_scenarios = [ # 5) Innovation PV, needs wall insulation, no wall insulation measure - not eligible { "description": "Innovation PV, wall insulation recommended, but not installed", - "measures": [{"type": "solar_pv", "is_innovation": True, "uplift": 0.45}], + "measures": [{"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45}], "starting_sap": 60, "mainheat_description": "Air source heat pump, radiators", "heating_control_description": "Programmer, room thermostat and TRVs", @@ -71,8 +71,8 @@ innovation_scenarios = [ { "description": "Innovation PV, wall insulation recommended and installed", "measures": [ - {"type": "solar_pv", "is_innovation": True, "uplift": 0.45}, - {"type": "internal_wall_insulation", "is_innovation": False, "uplift": 0.25} + {"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45}, + {"type": "internal_wall_insulation", "is_innovation": False, "innovation_uplift": 0.25} ], "starting_sap": 60, "mainheat_description": "Air source heat pump, radiators", @@ -85,7 +85,7 @@ innovation_scenarios = [ # 7) Innovation PV, needs roof insulation, no roof insulation measure - not eligible { "description": "Innovation PV, roof insulation recommended, not installed", - "measures": [{"type": "solar_pv", "is_innovation": True, "uplift": 0.45}], + "measures": [{"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45}], "starting_sap": 60, "mainheat_description": "Air source heat pump, radiators", "heating_control_description": "Programmer, room thermostat and TRVs", @@ -98,8 +98,8 @@ innovation_scenarios = [ { "description": "Innovation PV, roof insulation recommended and installed", "measures": [ - {"type": "solar_pv", "is_innovation": True, "uplift": 0.45}, - {"type": "loft_insulation", "is_innovation": False, "uplift": 0} + {"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45}, + {"type": "loft_insulation", "is_innovation": False, "innovation_uplift": 0} ], "starting_sap": 60, "mainheat_description": "Air source heat pump, radiators", @@ -112,7 +112,7 @@ innovation_scenarios = [ # 9) Innovation PV, needs both roof + wall insulation, no insulation - not eligible { "description": "Innovation PV, both insulations recommended, none installed", - "measures": [{"type": "solar_pv", "is_innovation": True, "uplift": 0.45}], + "measures": [{"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45}], "starting_sap": 60, "mainheat_description": "Air source heat pump, radiators", "heating_control_description": "Programmer, room thermostat and TRVs", @@ -125,8 +125,8 @@ innovation_scenarios = [ { "description": "Innovation PV, both insulations recommended, only wall done", "measures": [ - {"type": "solar_pv", "is_innovation": True, "uplift": 0.45}, - {"type": "internal_wall_insulation", "is_innovation": False, "uplift": 0.25} + {"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45}, + {"type": "internal_wall_insulation", "is_innovation": False, "innovation_uplift": 0.25} ], "starting_sap": 60, "mainheat_description": "Air source heat pump, radiators", @@ -140,8 +140,8 @@ innovation_scenarios = [ { "description": "Innovation PV, both insulations recommended, only roof done", "measures": [ - {"type": "solar_pv", "is_innovation": True, "uplift": 0.45}, - {"type": "loft_insulation", "is_innovation": False, "uplift": 0} + {"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45}, + {"type": "loft_insulation", "is_innovation": False, "innovation_uplift": 0} ], "starting_sap": 60, "mainheat_description": "Air source heat pump, radiators", @@ -155,9 +155,9 @@ innovation_scenarios = [ { "description": "Innovation PV, both insulations recommended and installed", "measures": [ - {"type": "solar_pv", "is_innovation": True, "uplift": 0.45}, - {"type": "internal_wall_insulation", "is_innovation": False, "uplift": 0.25}, - {"type": "loft_insulation", "is_innovation": False, "uplift": 0} + {"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45}, + {"type": "internal_wall_insulation", "is_innovation": False, "innovation_uplift": 0.25}, + {"type": "loft_insulation", "is_innovation": False, "innovation_uplift": 0} ], "starting_sap": 60, "mainheat_description": "Air source heat pump, radiators", diff --git a/backend/tests/test_funding.py b/backend/tests/test_funding.py index 59d65a28..d84480ce 100644 --- a/backend/tests/test_funding.py +++ b/backend/tests/test_funding.py @@ -120,7 +120,7 @@ def test_eco4_prs_eligible_with_swi( # 3) is getting a solid was measure # so it's eligible for ECO4 - measures = [{"type": "internal_wall_insulation", "is_innovation": False, "uplift": 0}] + measures = [{"type": "internal_wall_insulation", "is_innovation": False, "innovation_uplift": 0}] funding.check_funding( measures=measures, starting_sap=50, # EPC E @@ -162,7 +162,7 @@ def test_eco4_prs_not_eligible_high_epc( tenure="Private", ) - measures = [{"type": "internal_wall_insulation", "is_innovation": False, "uplift": 0}] + measures = [{"type": "internal_wall_insulation", "is_innovation": False, "innovation_uplift": 0}] funding.check_funding( measures=measures, starting_sap=72, # EPC C (too high) @@ -203,7 +203,7 @@ def test_gbis_prs_general_eligibility( tenure="Private", ) - measures = [{"type": "internal_wall_insulation", "is_innovation": False, "uplift": 0}] + measures = [{"type": "internal_wall_insulation", "is_innovation": False, "innovation_uplift": 0}] funding.check_funding( measures=measures, starting_sap=65, # EPC D @@ -244,7 +244,7 @@ def test_gbis_prs_low_income_caveat( tenure="Private", ) - measures = [{"type": "cavity_wall_insulation", "is_innovation": False, "uplift": 0}] + measures = [{"type": "cavity_wall_insulation", "is_innovation": False, "innovation_uplift": 0}] funding.check_funding( measures=measures, starting_sap=60, # EPC D @@ -290,7 +290,7 @@ def test_eco4_sh_epc_e_eligible( tenure="Social", ) - measures = [{"type": "internal_wall_insulation", "is_innovation": False, "uplift": 0}] + measures = [{"type": "internal_wall_insulation", "is_innovation": False, "innovation_uplift": 0}] funding.check_funding( measures=measures, starting_sap=50, # EPC E @@ -330,7 +330,7 @@ def test_eco4_sh_epc_d_requires_innovation( tenure="Social", ) - measures = [{"type": "internal_wall_insulation", "is_innovation": False, "uplift": 0}] + measures = [{"type": "internal_wall_insulation", "is_innovation": False, "innovation_uplift": 0}] funding.check_funding( measures=measures, starting_sap=60, # EPC D @@ -365,7 +365,7 @@ def test_eco4_sh_epc_d_requires_innovation( gbis_private_solid_abs_rate=28, tenure="Social", ) - measures2 = [{"type": "internal_wall_insulation", "is_innovation": True, "uplift": 0.25}] + measures2 = [{"type": "internal_wall_insulation", "is_innovation": True, "innovation_uplift": 0.25}] funding2.check_funding( measures=measures2, starting_sap=60, # EPC D @@ -403,7 +403,7 @@ def test_eco4_sh_epc_d_requires_innovation( gbis_private_solid_abs_rate=28, tenure="Social", ) - measures3 = [{"type": "solar_pv", "is_innovation": True, "uplift": 0.45}] + measures3 = [{"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45}] funding3.check_funding( measures=measures3, starting_sap=60, # EPC D @@ -439,7 +439,7 @@ def test_eco4_sh_epc_d_requires_innovation( tenure="Social", ) - measures4 = [{"type": "solar_pv", "is_innovation": True, "uplift": 0.45}, ] + measures4 = [{"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45}, ] funding4.check_funding( measures=measures4, starting_sap=60, # EPC D @@ -476,8 +476,8 @@ def test_eco4_sh_epc_d_requires_innovation( ) measures5 = [ - {"type": "solar_pv", "is_innovation": True, "uplift": 0.45}, - {"type": "high_heat_retention_storage_heater", "is_innovation": False, "uplift": 0} + {"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45}, + {"type": "high_heat_retention_storage_heater", "is_innovation": False, "innovation_uplift": 0} ] funding5.check_funding( measures=measures5, @@ -516,7 +516,7 @@ def test_eco4_sh_epc_d_requires_innovation( ) measures6 = [ - {"type": "solar_pv", "is_innovation": True, "uplift": 0.45}, + {"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45}, ] funding6.check_funding( measures=measures6, @@ -556,9 +556,9 @@ def test_eco4_sh_epc_d_requires_innovation( tenure="Social", ) measures7 = [ - {"type": "solar_pv", "is_innovation": True, "uplift": 0.45}, - {"type": "cavity_wall_insulation", "is_innovation": False, "uplift": 0.25}, - {"type": "loft_insulation", "is_innovation": False, "uplift": 0} + {"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45}, + {"type": "cavity_wall_insulation", "is_innovation": False, "innovation_uplift": 0.25}, + {"type": "loft_insulation", "is_innovation": False, "innovation_uplift": 0} ] funding7.check_funding( measures=measures7, @@ -599,7 +599,7 @@ def test_eco4_sh_solar_pv_requires_heating( tenure="Social", ) - measures = [{"type": "solar_pv", "is_innovation": True, "uplift": 0.45}] + measures = [{"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45}] funding.check_funding( measures=measures, starting_sap=60, # EPC D @@ -641,8 +641,8 @@ def test_eco4_sh_solar_pv_with_heating_is_ok( ) measures = [ - {"type": "solar_pv", "is_innovation": True, "uplift": 0.45}, - {"type": "air_source_heat_pump", "is_innovation": False, "uplift": 0} + {"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45}, + {"type": "air_source_heat_pump", "is_innovation": False, "innovation_uplift": 0} ] funding.check_funding( measures=measures, @@ -684,7 +684,7 @@ def test_eco4_upgrade_requirement_e_to_c_pass( tenure="Private", ) - measures = [{"type": "internal_wall_insulation", "is_innovation": False, "uplift": 0}] + measures = [{"type": "internal_wall_insulation", "is_innovation": False, "innovation_uplift": 0}] # E (SAP 50) → C (SAP 70) meets upgrade rule funding.check_funding( @@ -727,7 +727,7 @@ def test_eco4_upgrade_requirement_e_to_d_fail( tenure="Private", ) - measures = [{"type": "internal_wall_insulation", "is_innovation": False, "uplift": 0}] + measures = [{"type": "internal_wall_insulation", "is_innovation": False, "innovation_uplift": 0}] # E (SAP 50) → D (SAP 65) does NOT meet ECO4 upgrade rule funding.check_funding( @@ -770,7 +770,7 @@ def test_eco4_upgrade_requirement_f_to_d_pass( tenure="Private", ) - measures = [{"type": "internal_wall_insulation", "is_innovation": False, "uplift": 0}] + measures = [{"type": "internal_wall_insulation", "is_innovation": False, "innovation_uplift": 0}] # F (SAP 35) → D (SAP 60) is OK for ECO4 funding.check_funding( @@ -813,7 +813,7 @@ def test_eco4_upgrade_requirement_f_to_e_fail( tenure="Private", ) - measures = [{"type": "internal_wall_insulation", "is_innovation": False, "uplift": 0}] + measures = [{"type": "internal_wall_insulation", "is_innovation": False, "innovation_uplift": 0}] # F (SAP 35) → E (SAP 50) does NOT meet ECO4 rule funding.check_funding( @@ -859,7 +859,7 @@ def test_epc_d_social_no_innovation_no_heating( ) measures = [ - {"type": "solar_pv", "is_innovation": True, "uplift": 0.45} + {"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45} ] funding.check_funding( @@ -905,10 +905,10 @@ def test_epc_d_social_with_heating_and_insulation( # Should NOT be eligible as the ASHP is not an innovation measure measures = [ - {"type": "solar_pv", "is_innovation": True, "uplift": 0.45}, - {"type": "internal_wall_insulation", "is_innovation": False, "uplift": 0}, - {"type": "loft_insulation", "is_innovation": False, "uplift": 0}, - {"type": "air_source_heat_pump", "is_innovation": False, "uplift": 0} + {"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45}, + {"type": "internal_wall_insulation", "is_innovation": False, "innovation_uplift": 0}, + {"type": "loft_insulation", "is_innovation": False, "innovation_uplift": 0}, + {"type": "air_source_heat_pump", "is_innovation": False, "innovation_uplift": 0} ] funding.check_funding( @@ -954,9 +954,9 @@ def test_epc_d_social_solar_with_only_minimum_insulation_should_fail( # Solar PV innovation with insulation, but no heating system upgrade => not eligible measures = [ - {"type": "solar_pv", "is_innovation": True, "uplift": 0.45}, - {"type": "internal_wall_insulation", "is_innovation": False, "uplift": 0}, - {"type": "loft_insulation", "is_innovation": False, "uplift": 0} + {"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45}, + {"type": "internal_wall_insulation", "is_innovation": False, "innovation_uplift": 0}, + {"type": "loft_insulation", "is_innovation": False, "innovation_uplift": 0} ] funding.check_funding( @@ -1002,8 +1002,8 @@ def test_epc_d_social_solar_with_ashp_and_no_insulation_should_fail( # Solar PV innovation with heating, but no insulation when insulation is recommended => not eligible measures = [ - {"type": "solar_pv", "is_innovation": True, "uplift": 0.45}, - {"type": "air_source_heat_pump", "is_innovation": False, "uplift": 0} + {"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45}, + {"type": "air_source_heat_pump", "is_innovation": False, "innovation_uplift": 0} ] funding.check_funding( @@ -1050,10 +1050,10 @@ def test_epc_d_social_solar_with_heating_and_minimum_insulation_should_pass( # Innovation solar + insulation measures + eligible heating upgrade = not valid because the heat pump isn;t # an innovation measure measures = [ - {"type": "solar_pv", "is_innovation": True, "uplift": 0.45}, - {"type": "internal_wall_insulation", "is_innovation": False, "uplift": 0}, - {"type": "loft_insulation", "is_innovation": False, "uplift": 0}, - {"type": "air_source_heat_pump", "is_innovation": False, "uplift": 0} + {"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45}, + {"type": "internal_wall_insulation", "is_innovation": False, "innovation_uplift": 0}, + {"type": "loft_insulation", "is_innovation": False, "innovation_uplift": 0}, + {"type": "air_source_heat_pump", "is_innovation": False, "innovation_uplift": 0} ] funding.check_funding( @@ -1095,10 +1095,10 @@ def test_epc_d_social_solar_with_heating_and_minimum_insulation_should_pass( # Innovation solar + insulation measures + eligible heating upgrade = should be valid because the # heat pump is an innovation measure measures2 = [ - {"type": "solar_pv", "is_innovation": True, "uplift": 0.45}, - {"type": "internal_wall_insulation", "is_innovation": False, "uplift": 0}, - {"type": "loft_insulation", "is_innovation": False, "uplift": 0}, - {"type": "air_source_heat_pump", "is_innovation": True, "uplift": 0.25} + {"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45}, + {"type": "internal_wall_insulation", "is_innovation": False, "innovation_uplift": 0}, + {"type": "loft_insulation", "is_innovation": False, "innovation_uplift": 0}, + {"type": "air_source_heat_pump", "is_innovation": True, "innovation_uplift": 0.25} ] funding2.check_funding( @@ -1203,11 +1203,11 @@ def test_uplift( # # TODO: Add a scenario with multiple measures, where some are innovation, some are not and we have # TODO: Make sure private works too measures = [ - {"type": "solar_pv", "is_innovation": True, "uplift": 0.45}, - {"type": "internal_wall_insulation", "is_innovation": False, "uplift": 0}, - {"type": "loft_insulation", "is_innovation": False, "uplift": 0}, - {"type": "air_source_heat_pump", "is_innovation": False, "uplift": 0}, - {"type": "cavity_wall_insulation", "is_innovation": False, "uplift": 0.25}, + {"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45}, + {"type": "internal_wall_insulation", "is_innovation": False, "innovation_uplift": 0}, + {"type": "loft_insulation", "is_innovation": False, "innovation_uplift": 0}, + {"type": "air_source_heat_pump", "is_innovation": False, "innovation_uplift": 0}, + {"type": "cavity_wall_insulation", "is_innovation": False, "innovation_uplift": 0.25}, ] funding.check_funding( @@ -1229,7 +1229,7 @@ def test_uplift( ) assert funding.eco4_funding == 5302.3949999999995 - assert funding.full_project_abs == 392.77 # is 280 + the 112.77 innovation uplift + assert funding.full_project_abs == 280 # Doesn't include the eco4 uplift assert funding.eco4_uplift == 112.77 @@ -1311,7 +1311,7 @@ def test_private_epc_e_solar_needs_heating( tenure="Private", ) - measures = [{"type": "solar_pv", "is_innovation": True, "uplift": 0.45}] + measures = [{"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45}] funding.check_funding( measures=measures, starting_sap=54, # EPC E - eligible for private on EPC @@ -1360,10 +1360,10 @@ def test_private_epc_e_solar_with_heating_and_minimum_insulation_produces_uplift ) measures = [ - {"type": "solar_pv", "is_innovation": True, "uplift": 0.45}, - {"type": "air_source_heat_pump", "is_innovation": False, "uplift": 0}, - {"type": "cavity_wall_insulation", "is_innovation": False, "uplift": 0}, - {"type": "loft_insulation", "is_innovation": False, "uplift": 0}, + {"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45}, + {"type": "air_source_heat_pump", "is_innovation": False, "innovation_uplift": 0}, + {"type": "cavity_wall_insulation", "is_innovation": False, "innovation_uplift": 0}, + {"type": "loft_insulation", "is_innovation": False, "innovation_uplift": 0}, ] funding.check_funding( diff --git a/backend/tests/test_integration.py b/backend/tests/test_integration.py index e6bcfce8..60778132 100644 --- a/backend/tests/test_integration.py +++ b/backend/tests/test_integration.py @@ -1,531 +1,532 @@ -import ast -import json -from copy import deepcopy -from dataclasses import replace -from datetime import datetime - -import random -from tqdm import tqdm -import pandas as pd -import numpy as np -from etl.epc.Record import EPCRecord -from backend.SearchEpc import SearchEpc -from sqlalchemy.exc import IntegrityError, OperationalError -from sqlalchemy.orm import sessionmaker -from starlette.responses import Response - -from backend.app.config import get_settings, get_prediction_buckets -from backend.app.db.connection import db_engine -from backend.app.db.functions.materials_functions import get_materials -from backend.app.db.functions.portfolio_functions import aggregate_portfolio_recommendations -from backend.app.db.functions.property_functions import ( - create_property, create_property_details_epc, create_property_targets, update_property_data, - update_or_create_property_spatial_details -) -from backend.app.db.functions.recommendations_functions import ( - create_plan, upload_recommendations, create_scenario -) -from backend.app.db.functions.funding_functions import upload_funding -from backend.app.db.functions.energy_assessment_functions import get_latest_assessment_by_uprn -from backend.app.db.models.portfolio import rating_lookup -from backend.app.plan.schemas import PlanTriggerRequest, WALL_INSULATION_MEASURES, ROOF_INSULATION_MEASURES -from backend.app.plan.utils import get_cleaned -from backend.app.utils import sap_to_epc -import backend.app.assumptions as assumptions - -from backend.ml_models.api import ModelApi -from backend.Property import Property -from backend.apis.GoogleSolarApi import GoogleSolarApi - -from recommendations.optimiser.CostOptimiser import CostOptimiser -from recommendations.optimiser.GainOptimiser import GainOptimiser -import recommendations.optimiser.optimiser_functions as optimiser_functions -from recommendations.Recommendations import Recommendations -from utils.logger import setup_logger -from utils.s3 import read_dataframe_from_s3_parquet, read_csv_from_s3, read_excel_from_s3 -from backend.ml_models.Valuation import PropertyValuation - -from etl.bill_savings.KwhData import KwhData -from etl.spatial.OpenUprnClient import OpenUprnClient -from etl.find_my_epc.RetrieveFindMyEpc import RetrieveFindMyEpc - -from backend.Funding import Funding -from recommendations.optimiser.funding_optimiser import optimise_with_funding_paths -from recommendations.recommendation_utils import convert_thickness_to_numeric, get_wall_u_value - -# Input data (temp) -import pickle - -import pandas as pd - -with open("local_data_for_deletion.pkl", 'rb') as f: - local_data = pickle.load(f) - -cleaning_data = local_data["cleaning_data"] -materials = local_data["materials"] -cleaned = local_data["cleaned"] -project_scores_matrix = local_data["project_scores_matrix"] -partial_project_scores_matrix = local_data["partial_project_scores_matrix"] -whlg_eligible_postcodes = local_data["whlg_eligible_postcodes"] - -with open("kwh_client_for_deletion.pkl", "rb") as f: - kwh_client = pickle.load(f) - -epc_data = pd.read_csv( - "/Users/khalimconn-kowlessar/Downloads/all-domestic-certificates/domestic-E06000002-Middlesbrough/certificates.csv", - low_memory=False -) - -# TODO: Store this for cleaning -costs_by_floor_area = epc_data[ - pd.to_datetime(epc_data["LODGEMENT_DATE"]) >= "2024-01-01" - ][["TOTAL_FLOOR_AREA", "CURRENT_ENERGY_EFFICIENCY", "LIGHTING_COST_CURRENT", "HEATING_COST_CURRENT", - "HOT_WATER_COST_CURRENT"]].copy() - -costs_by_floor_area.columns = [c.lower().replace("_", "-") for c in costs_by_floor_area.columns] -for c in ["lighting-cost-current", "heating-cost-current", "hot-water-cost-current"]: - costs_by_floor_area[c + "_scaled"] = costs_by_floor_area[c] / costs_by_floor_area["total-floor-area"] - -costs_by_floor_area = costs_by_floor_area.groupby("current-energy-efficiency")[ - ["lighting-cost-current_scaled", "heating-cost-current_scaled", "hot-water-cost-current_scaled"] -].mean().reset_index() - -sample_epc_data = epc_data[pd.to_datetime(epc_data["LODGEMENT_DATE"]) >= "2015-01-01"].drop_duplicates("UPRN").sample( - 1000).reset_index(drop=True) - -# TODO: In Property find_energy_sources, sort out biomass community heating - what fuel type -# TODO: We might be able to remove find_energy_sources entirely and remove estimate_electrical_consumption. It's used -# in the google solar api but is it really needed? I don't think it's super accurate. It might be better to -# just use an average energy consumption by floor area for UK households? -# Load the input properties -input_properties = [] -for row_id, config in tqdm(sample_epc_data.iterrows(), total=len(sample_epc_data)): - epc = { - k.lower().replace("_", "-"): v if not pd.isnull(v) else None for k, v in config.items() - } - # Avoid the data load inside of EPCRecord - something we should pull out - for x in ["number-habitable-rooms", "floor-height", "number-heated-rooms"]: - if pd.isnull(epc[x]): - if x == "floor-height": - epc[x] = 2.4 - if x == "number-habitable-rooms": - epc[x] = 3 - if x == "number-heated-rooms": - epc[x] = 3 - - epc_records = {'original_epc': epc, 'full_sap_epc': {}, 'old_data': []} - - prepared_epc = EPCRecord( - epc_records=epc_records, - run_mode="newdata", - cleaning_data=cleaning_data, - ) - - input_properties.append( - Property( - id=row_id, - is_new=True, - address=epc["address"], - postcode=epc["postcode"], - epc_record=prepared_epc, - already_installed={}, - property_valuation={}, - non_invasive_recommendations=[], - energy_assessment=None, - **Property.extract_kwargs(config), # TODO: Depraecate this - ) - ) - -# For each property, insert the default solar configuration -for p in tqdm(input_properties): - solar_api = GoogleSolarApi( - api_key=None, solar_materials=[m for m in materials if m["type"] == "solar_pv"], max_retries=5 - ) - panel_performance = solar_api.default_panel_performance(property_instance=p) - p.set_solar_panel_configuration( - solar_panel_configuration={ - "insights_data": None, "panel_performance": panel_performance, "unit_share_of_energy": 1 - }, - ) - -# We mock kwh preds -mocked_kwh_predictions = {"heating_kwh_predictions": [], "hotwater_kwh_predictions": []} -for p in tqdm(input_properties): - mocked_kwh_predictions["heating_kwh_predictions"].append({ - "id": p.uprn, "predictions": random.sample(range(100, 3000), 1)[0] - }) - mocked_kwh_predictions["hotwater_kwh_predictions"].append({ - "id": p.uprn, "predictions": random.sample(range(100, 3000), 1)[0] - }) -mocked_kwh_predictions["heating_kwh_predictions"] = pd.DataFrame(mocked_kwh_predictions["heating_kwh_predictions"]) -mocked_kwh_predictions["hotwater_kwh_predictions"] = pd.DataFrame(mocked_kwh_predictions["hotwater_kwh_predictions"]) - -# TODO: We might want to implement this generally, via an ETL process -for p in input_properties: - for col in ["lighting-cost-current", "heating-cost-current", "hot-water-cost-current"]: - if pd.isnull(p.data[col]): - min_diff = abs( - (costs_by_floor_area["current-energy-efficiency"] - p.data["current-energy-efficiency"]) - ).min() - df = costs_by_floor_area[ - abs((costs_by_floor_area["current-energy-efficiency"] - p.data[ - "current-energy-efficiency"])) == min_diff - ] - if df.shape[0] > 1: - df = df.head(1) - p.data[col] = (df[col + "_scaled"] * p.data["total-floor-area"]).values[0] - -[ - p.set_features(cleaned=cleaned, kwh_client=kwh_client, kwh_predictions=mocked_kwh_predictions) for p in - input_properties -] +# import ast +# import json +# from copy import deepcopy +# from dataclasses import replace +# from datetime import datetime +# +# import random +# from tqdm import tqdm +# import pandas as pd +# import numpy as np +# from etl.epc.Record import EPCRecord +# from backend.SearchEpc import SearchEpc +# from sqlalchemy.exc import IntegrityError, OperationalError +# from sqlalchemy.orm import sessionmaker +# from starlette.responses import Response +# +# from backend.app.config import get_settings, get_prediction_buckets +# from backend.app.db.connection import db_engine +# from backend.app.db.functions.materials_functions import get_materials +# from backend.app.db.functions.portfolio_functions import aggregate_portfolio_recommendations +# from backend.app.db.functions.property_functions import ( +# create_property, create_property_details_epc, create_property_targets, update_property_data, +# update_or_create_property_spatial_details +# ) +# from backend.app.db.functions.recommendations_functions import ( +# create_plan, upload_recommendations, create_scenario +# ) +# from backend.app.db.functions.funding_functions import upload_funding +# from backend.app.db.functions.energy_assessment_functions import get_latest_assessment_by_uprn +# from backend.app.db.models.portfolio import rating_lookup +# from backend.app.plan.schemas import PlanTriggerRequest, WALL_INSULATION_MEASURES, ROOF_INSULATION_MEASURES +# from backend.app.plan.utils import get_cleaned +# from backend.app.utils import sap_to_epc +# import backend.app.assumptions as assumptions +# +# from backend.ml_models.api import ModelApi +# from backend.Property import Property +# from backend.apis.GoogleSolarApi import GoogleSolarApi +# +# from recommendations.optimiser.CostOptimiser import CostOptimiser +# from recommendations.optimiser.GainOptimiser import GainOptimiser +# import recommendations.optimiser.optimiser_functions as optimiser_functions +# from recommendations.Recommendations import Recommendations +# from utils.logger import setup_logger +# from utils.s3 import read_dataframe_from_s3_parquet, read_csv_from_s3, read_excel_from_s3 +# from backend.ml_models.Valuation import PropertyValuation +# +# from etl.bill_savings.KwhData import KwhData +# from etl.spatial.OpenUprnClient import OpenUprnClient +# from etl.find_my_epc.RetrieveFindMyEpc import RetrieveFindMyEpc +# +# from backend.Funding import Funding +# from recommendations.optimiser.funding_optimiser import optimise_with_funding_paths +# from recommendations.recommendation_utils import convert_thickness_to_numeric, get_wall_u_value +# +# # Input data (temp) +# import pickle +# +# import pandas as pd +# +# with open("local_data_for_deletion.pkl", 'rb') as f: +# local_data = pickle.load(f) +# +# cleaning_data = local_data["cleaning_data"] +# materials = local_data["materials"] +# cleaned = local_data["cleaned"] +# project_scores_matrix = local_data["project_scores_matrix"] +# partial_project_scores_matrix = local_data["partial_project_scores_matrix"] +# whlg_eligible_postcodes = local_data["whlg_eligible_postcodes"] +# +# with open("kwh_client_for_deletion.pkl", "rb") as f: +# kwh_client = pickle.load(f) +# +# epc_data = pd.read_csv( +# "/Users/khalimconn-kowlessar/Downloads/all-domestic-certificates/domestic-E06000002-Middlesbrough/certificates +# .csv", +# low_memory=False +# ) +# +# # TODO: Store this for cleaning +# costs_by_floor_area = epc_data[ +# pd.to_datetime(epc_data["LODGEMENT_DATE"]) >= "2024-01-01" +# ][["TOTAL_FLOOR_AREA", "CURRENT_ENERGY_EFFICIENCY", "LIGHTING_COST_CURRENT", "HEATING_COST_CURRENT", +# "HOT_WATER_COST_CURRENT"]].copy() +# +# costs_by_floor_area.columns = [c.lower().replace("_", "-") for c in costs_by_floor_area.columns] +# for c in ["lighting-cost-current", "heating-cost-current", "hot-water-cost-current"]: +# costs_by_floor_area[c + "_scaled"] = costs_by_floor_area[c] / costs_by_floor_area["total-floor-area"] +# +# costs_by_floor_area = costs_by_floor_area.groupby("current-energy-efficiency")[ +# ["lighting-cost-current_scaled", "heating-cost-current_scaled", "hot-water-cost-current_scaled"] +# ].mean().reset_index() +# +# sample_epc_data = epc_data[pd.to_datetime(epc_data["LODGEMENT_DATE"]) >= "2015-01-01"].drop_duplicates("UPRN").sample( +# 1000).reset_index(drop=True) +# +# # TODO: In Property find_energy_sources, sort out biomass community heating - what fuel type +# # TODO: We might be able to remove find_energy_sources entirely and remove estimate_electrical_consumption. It's used +# # in the google solar api but is it really needed? I don't think it's super accurate. It might be better to +# # just use an average energy consumption by floor area for UK households? +# # Load the input properties +# input_properties = [] +# for row_id, config in tqdm(sample_epc_data.iterrows(), total=len(sample_epc_data)): +# epc = { +# k.lower().replace("_", "-"): v if not pd.isnull(v) else None for k, v in config.items() +# } +# # Avoid the data load inside of EPCRecord - something we should pull out +# for x in ["number-habitable-rooms", "floor-height", "number-heated-rooms"]: +# if pd.isnull(epc[x]): +# if x == "floor-height": +# epc[x] = 2.4 +# if x == "number-habitable-rooms": +# epc[x] = 3 +# if x == "number-heated-rooms": +# epc[x] = 3 +# +# epc_records = {'original_epc': epc, 'full_sap_epc': {}, 'old_data': []} +# +# prepared_epc = EPCRecord( +# epc_records=epc_records, +# run_mode="newdata", +# cleaning_data=cleaning_data, +# ) +# +# input_properties.append( +# Property( +# id=row_id, +# is_new=True, +# address=epc["address"], +# postcode=epc["postcode"], +# epc_record=prepared_epc, +# already_installed={}, +# property_valuation={}, +# non_invasive_recommendations=[], +# energy_assessment=None, +# **Property.extract_kwargs(config), # TODO: Depraecate this +# ) +# ) +# +# # For each property, insert the default solar configuration +# for p in tqdm(input_properties): +# solar_api = GoogleSolarApi( +# api_key=None, solar_materials=[m for m in materials if m["type"] == "solar_pv"], max_retries=5 +# ) +# panel_performance = solar_api.default_panel_performance(property_instance=p) +# p.set_solar_panel_configuration( +# solar_panel_configuration={ +# "insights_data": None, "panel_performance": panel_performance, "unit_share_of_energy": 1 +# }, +# ) +# +# # We mock kwh preds +# mocked_kwh_predictions = {"heating_kwh_predictions": [], "hotwater_kwh_predictions": []} +# for p in tqdm(input_properties): +# mocked_kwh_predictions["heating_kwh_predictions"].append({ +# "id": p.uprn, "predictions": random.sample(range(100, 3000), 1)[0] +# }) +# mocked_kwh_predictions["hotwater_kwh_predictions"].append({ +# "id": p.uprn, "predictions": random.sample(range(100, 3000), 1)[0] +# }) +# mocked_kwh_predictions["heating_kwh_predictions"] = pd.DataFrame(mocked_kwh_predictions["heating_kwh_predictions"]) +# mocked_kwh_predictions["hotwater_kwh_predictions"] = pd.DataFrame(mocked_kwh_predictions["hotwater_kwh_predictions"]) +# +# # TODO: We might want to implement this generally, via an ETL process # for p in input_properties: -# p.set_features(cleaned=cleaned, kwh_client=kwh_client, kwh_predictions=mocked_kwh_predictions) - -# Run the recommendations -recommendations = {} -recommendations_scoring_data = [] -representative_recommendations = {} -for p in tqdm(input_properties): - if p.data["property-type"] == "House" and pd.isnull(p.data["built-form"]): - p.data["built-form"] = "Semi-Detached" - recommender = Recommendations( - property_instance=p, - materials=materials, - exclusions=[], - inclusions=[], - default_u_values=True - ) - property_recommendations, property_representative_recommendations = recommender.recommend() - - if not property_recommendations: - continue - - recommendations[p.id] = property_recommendations - representative_recommendations[p.id] = property_representative_recommendations - - p.create_base_difference_epc_record(cleaned_lookup=cleaned) - p.adjust_difference_record_with_recommendations( - property_recommendations, property_representative_recommendations - ) - - recommendations_scoring_data.extend(p.recommendations_scoring_data) - -recommendations_scoring_data = pd.DataFrame(recommendations_scoring_data) -recommendations_scoring_data = recommendations_scoring_data.drop( - columns=[ - "rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending", - "carbon_ending" - ] -) - -model_predictions_mocked = { - "sap_change_predictions": None, - "heat_demand_predictions": None, - "carbon_change_predictions": None, - "heating_kwh_predictions": None, - "hotwater_kwh_predictions": None, -} - -for k in model_predictions_mocked.keys(): - model_predictions_mocked[k] = recommendations_scoring_data[["id"]].copy() - model_predictions_mocked[k][['property_id', 'recommendation_id']] = ( - model_predictions_mocked[k]['id'].str.split('+', expand=True) - ) - model_predictions_mocked[k]['phase'] = model_predictions_mocked[k]['recommendation_id'].apply( - ModelApi.extract_phase) - - if k in ["heating_kwh_predictions", "hotwater_kwh_predictions"]: - model_predictions_mocked[k]["predictions"] = random.choices(range(100, 3000), - k=len(recommendations_scoring_data)) - continue - - model_predictions_mocked[k] = model_predictions_mocked[k].sort_values(["property_id", "phase"], ascending=True) - preds = [] - for p_id in model_predictions_mocked[k]["property_id"].unique(): - # We add some amount each time - p = [p for p in input_properties if str(p.id) == p_id][0] - if k == "sap_change_predictions": - start = p.data["current-energy-efficiency"] - elif k == "heat_demand_predictions": - start = p.data["energy-consumption-current"] - else: - start = p.data["co2-emissions-current"] - df = model_predictions_mocked[k][model_predictions_mocked[k]["property_id"] == p_id].copy() - # Add some amount each time - to_add = random.choices(range(0, 15), k=len(df)) - to_add = np.cumsum(to_add) - df["predictions"] = start + to_add - preds.append(df) - preds = pd.concat(preds) - model_predictions_mocked[k] = preds - -for property_id in tqdm(recommendations.keys(), total=len(recommendations)): - property_instance = [p for p in input_properties if p.id == property_id][0] - - recommendations_with_impact, impact_summary = ( - Recommendations.calculate_recommendation_impact( - property_instance=property_instance, - all_predictions=model_predictions_mocked, - recommendations=recommendations, - representative_recommendations=representative_recommendations - ) - ) - - # We use the impact_summary to update the simulation_epcs with the new SAP, heat demand, carbon, cost etc - # at each phase - property_instance.update_simulation_epcs(impact_summary) - recommendations[property_id] = recommendations_with_impact - -for property_id in tqdm([p.id for p in input_properties]): - property_recommendations = recommendations.get(property_id, []) - property_instance = [p for p in input_properties if p.id == property_id][0] - - property_current_energy_bill = ( - Recommendations.calculate_recommendation_tenant_savings( - property_instance=property_instance, - kwh_simulation_predictions=model_predictions_mocked, - property_recommendations=property_recommendations, - ashp_cop=2.8 - ) - ) - property_instance.current_energy_bill = property_current_energy_bill - -body = PlanTriggerRequest( - **{'budget': None, 'goal': 'Increasing EPC', 'housing_type': 'Social', 'goal_value': 'B', 'portfolio_id': 0, - 'trigger_file_path': '', 'already_installed_file_path': '', - 'patches_file_path': None, 'non_invasive_recommendations_file_path': None, - 'valuation_file_path': '', - 'required_measures': [], 'scenario_name': 'EPC B', 'scenario_id': None, - 'multi_plan': True, 'optimise': True, 'default_u_values': True, 'ashp_cop': 2.8, - 'event_type': 'remote_assessment', 'simulate_sap_10': False, 'file_type': None, 'file_format': None, - 'sheet_name': None, 'sheet_count': None, 'index_start': None, 'index_end': None} -) - -for p in tqdm(input_properties): - if not recommendations.get(p.id): - continue - - # we need to double unlist because we have a list of lists - property_measure_types = {rec["type"] for recs in recommendations[p.id] for rec in recs} - property_required_measures = [m for m in recommendations[p.id] if m[0]["type"] in body.required_measures] - measures_to_optimise = [m for m in recommendations[p.id] if m[0]["type"] not in body.required_measures] - - # If a measure requiring ventilation is selected, and the property does not have ventilation, we enfore - # its inclusion - needs_ventilation = any( - x in property_measure_types for x in assumptions.measures_needing_ventilation - ) and not p.has_ventilation - - if not measures_to_optimise: - # Nothing to do, we just reshape the recommendations - recommendations[p.id] = optimiser_functions.flatten_recommendations_with_defaults( - p.id, recommendations, set() - ) - continue - - fixed_gain = optimiser_functions.calculate_fixed_gain( - property_required_measures, recommendations, p, needs_ventilation - ) - gain = optimiser_functions.calculate_gain(body=body, p=p, fixed_gain=fixed_gain) - - funding = Funding( - tenure="Social", - project_scores_matrix=project_scores_matrix, - partial_project_scores_matrix=partial_project_scores_matrix, - whlg_eligible_postcodes=whlg_eligible_postcodes, - eco4_social_cavity_abs_rate=12.5, - eco4_social_solid_abs_rate=17, - eco4_private_cavity_abs_rate=12.5, - eco4_private_solid_abs_rate=17, - gbis_social_cavity_abs_rate=21, - gbis_social_solid_abs_rate=25, - gbis_private_cavity_abs_rate=21, - gbis_private_solid_abs_rate=28, - ) - - li_thickness = convert_thickness_to_numeric( - p.roof["insulation_thickness"], p.roof["is_pitched"], p.roof["is_flat"] - ) - current_wall_u_value = p.walls["thermal_transmittance"] - if current_wall_u_value is None: - current_wall_u_value = get_wall_u_value( - clean_description=p.walls["clean_description"], - age_band=p.age_band, - is_granite_or_whinstone=p.walls["is_granite_or_whinstone"], - is_sandstone_or_limestone=p.walls["is_sandstone_or_limestone"], - ) - - # We insert the innovation uplift - measures_to_optimise_with_uplift = deepcopy(measures_to_optimise) - - # TODO: Turn this into a function and store the innovaiton uplift - for group in measures_to_optimise_with_uplift: - for r in group: - - if r["type"] in ["mechanical_ventilation", "low_energy_lighting", "secondary_heating", - "extension_cavity_wall_insulation", "draught_proofing", "sealing_open_fireplace"]: - ( - r["partial_project_score"], - r["partial_project_funding"], - r["innovation_uplift"], - r["uplift_project_score"], - ) = ( - 0, 0, 0, 0 - ) - continue - - ( - r["partial_project_score"], r["partial_project_funding"], r["innovation_uplift"], - r["uplift_project_score"] - ) = funding.get_innovation_uplift( - measure=r, - starting_sap=p.data["current-energy-efficiency"], - floor_area=p.floor_area, - is_cavity=p.walls["is_cavity_wall"], - current_wall_uvalue=current_wall_u_value, - is_partial="partial" in p.walls["clean_description"].lower(), - existing_li_thickness=li_thickness, - mainheating=p.main_heating, - main_fuel=p.main_fuel, - mainheat_energy_eff=p.data["mainheat-energy-eff"], - ) - - input_measures = optimiser_functions.prepare_input_measures( - measures_to_optimise_with_uplift, body.goal, needs_ventilation, funding=True - ) - - # When the goal is Increasing EPC, we can run the funding optimiser - if body.goal == "Increasing EPC": - - solutions = optimise_with_funding_paths( - p=p, - input_measures=input_measures, - housing_type=body.housing_type, - budget=body.budget, - target_gain=gain, - funding=funding - ) - - # Given the solutions we select the optimal one - solutions["cost_less_full_project_funding"] = np.where( - solutions["scheme"] == "eco4", - solutions["total_cost"] - solutions["full_project_funding"] - solutions["total_uplift"], - solutions["total_cost"] - solutions["partial_project_funding"] - solutions["total_uplift"] - ) - - solutions["cost_less_full_project_funding"] = ( - solutions["total_cost"] - solutions["full_project_funding"] - solutions["total_uplift"] - ) - solutions = solutions.sort_values("cost_less_full_project_funding", ascending=True) - - if solutions["meets_upgrade_target"].any(): - # If we have a solution that meets the upgrade target, we select that one - optimal_solution = solutions[solutions["meets_upgrade_target"]].iloc[0] - else: - # Pick the cheapest - optimal_solution = solutions.iloc[0] - - # This is the list of measures that we will recommend - scheme = optimal_solution["scheme"] - funded_measures = optimal_solution["items"] if scheme != "none" else [] - solution = optimal_solution["items"] + optimal_solution["unfunded_items"] - # This is the total amount of funding that the project will produce (including uplifts) (£) - project_funding = optimal_solution["full_project_funding"] if scheme == "eco4" else \ - optimal_solution["partial_project_funding"] - # This is the total amount of funding associated to the uplift (£) - total_uplift = optimal_solution["total_uplift"] - # This is the funding scheme selected - # This is the full project ABS - full_project_score = optimal_solution["project_score"] - # This is the partial project ABS - partial_project_score = optimal_solution["partial_project_score"] - # This is the uplift score ABS - uplift_project_score = optimal_solution["total_uplift_score"] - else: - # We optimise and then we determine eligibility for funding, based on the measures selected - optimiser = ( - GainOptimiser( - input_measures, max_cost=body.budget, max_gain=gain, allow_slack=False - ) if body.budget else CostOptimiser(input_measures, min_gain=gain) - ) - optimiser.setup() - optimiser.solve() - solution = optimiser.solution - - recommendation_types = [] - for measures in input_measures: - for measure in measures: - recommendation_types.append(measure["type"]) - recommendation_types = set(recommendation_types) - - has_wall_insulation_recommendation = any( - (m in recommendation_types or "+".join([m, "mechanical_ventilation"])) for m in - WALL_INSULATION_MEASURES - ) - has_roof_insulation_recommendation = any( - (m in recommendation_types or "+".join([m, "mechanical_ventilation"])) for m in - ROOF_INSULATION_MEASURES - ) - - funding.check_funding( - measures=solution, - starting_sap=p.data["current-energy-efficiency"], - ending_sap=p.data["current-energy-efficiency"] + sum([x["gain"] for x in solution]), - floor_area=p.floor_area, - mainheat_description=p.main_heating["clean_description"], - heating_control_description=p.main_heating_controls["clean_description"], - is_cavity=p.walls["is_cavity_wall"], - current_wall_uvalue=current_wall_u_value, - is_partial="partial" in p.walls["clean_description"].lower(), - existing_li_thickness=li_thickness, - mainheating=p.main_heating, - main_fuel=p.main_fuel, - mainheat_energy_eff=p.data["mainheat-energy-eff"], - has_wall_insulation_recommendation=has_wall_insulation_recommendation, - has_roof_insulation_recommendation=has_roof_insulation_recommendation, - ) - - # Determine the scheme - scheme = "none" - if funding.eco4_eligible: - scheme = "eco4" - if scheme == "none" and funding.gbis_eligible: - scheme = "gbis" - - funded_measures = solution if scheme in ["gbis", "eco4"] else [] - project_funding = 0 if funding.full_project_abs is not None else funding.full_project_abs - total_uplift = funding.eco4_uplift - full_project_score = 0 if funding.full_project_abs is not None else funding.full_project_abs - partial_project_score = funding.partial_project_abs - uplift_project_score = funding.eco4_uplift if scheme == "eco4" else funding.gbis_uplift - - selected = {r["id"] for r in solution} - - if property_required_measures: - solution = optimiser_functions.add_required_measures( - property_id=p.id, property_required_measures=property_required_measures, - recommendations=recommendations, selected=selected, - ) - - # Add best practice measures (ventilation/trickle vents) - selected = optimiser_functions.add_best_practice_measures(p.id, solution, recommendations, selected) - # Final flattening - Don't do this! - # recommendations[p.id] = optimiser_functions.flatten_recommendations_with_defaults( - # p.id, recommendations, selected - # ) - - # TODO: functionise - for measure in funded_measures: - if "+mechanical_ventilation" in measure["type"]: - measure["type"] = measure["type"].split("+mechanical_ventilation")[0] - - p.insert_funding( - scheme=scheme, - funded_measures=funded_measures, - project_funding=project_funding, - total_uplift=total_uplift, - full_project_score=full_project_score, - partial_project_score=partial_project_score, - uplift_project_score=uplift_project_score - ) +# for col in ["lighting-cost-current", "heating-cost-current", "hot-water-cost-current"]: +# if pd.isnull(p.data[col]): +# min_diff = abs( +# (costs_by_floor_area["current-energy-efficiency"] - p.data["current-energy-efficiency"]) +# ).min() +# df = costs_by_floor_area[ +# abs((costs_by_floor_area["current-energy-efficiency"] - p.data[ +# "current-energy-efficiency"])) == min_diff +# ] +# if df.shape[0] > 1: +# df = df.head(1) +# p.data[col] = (df[col + "_scaled"] * p.data["total-floor-area"]).values[0] +# +# [ +# p.set_features(cleaned=cleaned, kwh_client=kwh_client, kwh_predictions=mocked_kwh_predictions) for p in +# input_properties +# ] +# # for p in input_properties: +# # p.set_features(cleaned=cleaned, kwh_client=kwh_client, kwh_predictions=mocked_kwh_predictions) +# +# # Run the recommendations +# recommendations = {} +# recommendations_scoring_data = [] +# representative_recommendations = {} +# for p in tqdm(input_properties): +# if p.data["property-type"] == "House" and pd.isnull(p.data["built-form"]): +# p.data["built-form"] = "Semi-Detached" +# recommender = Recommendations( +# property_instance=p, +# materials=materials, +# exclusions=[], +# inclusions=[], +# default_u_values=True +# ) +# property_recommendations, property_representative_recommendations = recommender.recommend() +# +# if not property_recommendations: +# continue +# +# recommendations[p.id] = property_recommendations +# representative_recommendations[p.id] = property_representative_recommendations +# +# p.create_base_difference_epc_record(cleaned_lookup=cleaned) +# p.adjust_difference_record_with_recommendations( +# property_recommendations, property_representative_recommendations +# ) +# +# recommendations_scoring_data.extend(p.recommendations_scoring_data) +# +# recommendations_scoring_data = pd.DataFrame(recommendations_scoring_data) +# recommendations_scoring_data = recommendations_scoring_data.drop( +# columns=[ +# "rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending", +# "carbon_ending" +# ] +# ) +# +# model_predictions_mocked = { +# "sap_change_predictions": None, +# "heat_demand_predictions": None, +# "carbon_change_predictions": None, +# "heating_kwh_predictions": None, +# "hotwater_kwh_predictions": None, +# } +# +# for k in model_predictions_mocked.keys(): +# model_predictions_mocked[k] = recommendations_scoring_data[["id"]].copy() +# model_predictions_mocked[k][['property_id', 'recommendation_id']] = ( +# model_predictions_mocked[k]['id'].str.split('+', expand=True) +# ) +# model_predictions_mocked[k]['phase'] = model_predictions_mocked[k]['recommendation_id'].apply( +# ModelApi.extract_phase) +# +# if k in ["heating_kwh_predictions", "hotwater_kwh_predictions"]: +# model_predictions_mocked[k]["predictions"] = random.choices(range(100, 3000), +# k=len(recommendations_scoring_data)) +# continue +# +# model_predictions_mocked[k] = model_predictions_mocked[k].sort_values(["property_id", "phase"], ascending=True) +# preds = [] +# for p_id in model_predictions_mocked[k]["property_id"].unique(): +# # We add some amount each time +# p = [p for p in input_properties if str(p.id) == p_id][0] +# if k == "sap_change_predictions": +# start = p.data["current-energy-efficiency"] +# elif k == "heat_demand_predictions": +# start = p.data["energy-consumption-current"] +# else: +# start = p.data["co2-emissions-current"] +# df = model_predictions_mocked[k][model_predictions_mocked[k]["property_id"] == p_id].copy() +# # Add some amount each time +# to_add = random.choices(range(0, 15), k=len(df)) +# to_add = np.cumsum(to_add) +# df["predictions"] = start + to_add +# preds.append(df) +# preds = pd.concat(preds) +# model_predictions_mocked[k] = preds +# +# for property_id in tqdm(recommendations.keys(), total=len(recommendations)): +# property_instance = [p for p in input_properties if p.id == property_id][0] +# +# recommendations_with_impact, impact_summary = ( +# Recommendations.calculate_recommendation_impact( +# property_instance=property_instance, +# all_predictions=model_predictions_mocked, +# recommendations=recommendations, +# representative_recommendations=representative_recommendations +# ) +# ) +# +# # We use the impact_summary to update the simulation_epcs with the new SAP, heat demand, carbon, cost etc +# # at each phase +# property_instance.update_simulation_epcs(impact_summary) +# recommendations[property_id] = recommendations_with_impact +# +# for property_id in tqdm([p.id for p in input_properties]): +# property_recommendations = recommendations.get(property_id, []) +# property_instance = [p for p in input_properties if p.id == property_id][0] +# +# property_current_energy_bill = ( +# Recommendations.calculate_recommendation_tenant_savings( +# property_instance=property_instance, +# kwh_simulation_predictions=model_predictions_mocked, +# property_recommendations=property_recommendations, +# ashp_cop=2.8 +# ) +# ) +# property_instance.current_energy_bill = property_current_energy_bill +# +# body = PlanTriggerRequest( +# **{'budget': None, 'goal': 'Increasing EPC', 'housing_type': 'Social', 'goal_value': 'B', 'portfolio_id': 0, +# 'trigger_file_path': '', 'already_installed_file_path': '', +# 'patches_file_path': None, 'non_invasive_recommendations_file_path': None, +# 'valuation_file_path': '', +# 'required_measures': [], 'scenario_name': 'EPC B', 'scenario_id': None, +# 'multi_plan': True, 'optimise': True, 'default_u_values': True, 'ashp_cop': 2.8, +# 'event_type': 'remote_assessment', 'simulate_sap_10': False, 'file_type': None, 'file_format': None, +# 'sheet_name': None, 'sheet_count': None, 'index_start': None, 'index_end': None} +# ) +# +# for p in tqdm(input_properties): +# if not recommendations.get(p.id): +# continue +# +# # we need to double unlist because we have a list of lists +# property_measure_types = {rec["type"] for recs in recommendations[p.id] for rec in recs} +# property_required_measures = [m for m in recommendations[p.id] if m[0]["type"] in body.required_measures] +# measures_to_optimise = [m for m in recommendations[p.id] if m[0]["type"] not in body.required_measures] +# +# # If a measure requiring ventilation is selected, and the property does not have ventilation, we enfore +# # its inclusion +# needs_ventilation = any( +# x in property_measure_types for x in assumptions.measures_needing_ventilation +# ) and not p.has_ventilation +# +# if not measures_to_optimise: +# # Nothing to do, we just reshape the recommendations +# recommendations[p.id] = optimiser_functions.flatten_recommendations_with_defaults( +# p.id, recommendations, set() +# ) +# continue +# +# fixed_gain = optimiser_functions.calculate_fixed_gain( +# property_required_measures, recommendations, p, needs_ventilation +# ) +# gain = optimiser_functions.calculate_gain(body=body, p=p, fixed_gain=fixed_gain) +# +# funding = Funding( +# tenure="Social", +# project_scores_matrix=project_scores_matrix, +# partial_project_scores_matrix=partial_project_scores_matrix, +# whlg_eligible_postcodes=whlg_eligible_postcodes, +# eco4_social_cavity_abs_rate=12.5, +# eco4_social_solid_abs_rate=17, +# eco4_private_cavity_abs_rate=12.5, +# eco4_private_solid_abs_rate=17, +# gbis_social_cavity_abs_rate=21, +# gbis_social_solid_abs_rate=25, +# gbis_private_cavity_abs_rate=21, +# gbis_private_solid_abs_rate=28, +# ) +# +# li_thickness = convert_thickness_to_numeric( +# p.roof["insulation_thickness"], p.roof["is_pitched"], p.roof["is_flat"] +# ) +# current_wall_u_value = p.walls["thermal_transmittance"] +# if current_wall_u_value is None: +# current_wall_u_value = get_wall_u_value( +# clean_description=p.walls["clean_description"], +# age_band=p.age_band, +# is_granite_or_whinstone=p.walls["is_granite_or_whinstone"], +# is_sandstone_or_limestone=p.walls["is_sandstone_or_limestone"], +# ) +# +# # We insert the innovation uplift +# measures_to_optimise_with_uplift = deepcopy(measures_to_optimise) +# +# # TODO: Turn this into a function and store the innovaiton uplift +# for group in measures_to_optimise_with_uplift: +# for r in group: +# +# if r["type"] in ["mechanical_ventilation", "low_energy_lighting", "secondary_heating", +# "extension_cavity_wall_insulation", "draught_proofing", "sealing_open_fireplace"]: +# ( +# r["partial_project_score"], +# r["partial_project_funding"], +# r["innovation_uplift"], +# r["uplift_project_score"], +# ) = ( +# 0, 0, 0, 0 +# ) +# continue +# +# ( +# r["partial_project_score"], r["partial_project_funding"], r["innovation_uplift"], +# r["uplift_project_score"] +# ) = funding.get_innovation_uplift( +# measure=r, +# starting_sap=p.data["current-energy-efficiency"], +# floor_area=p.floor_area, +# is_cavity=p.walls["is_cavity_wall"], +# current_wall_uvalue=current_wall_u_value, +# is_partial="partial" in p.walls["clean_description"].lower(), +# existing_li_thickness=li_thickness, +# mainheating=p.main_heating, +# main_fuel=p.main_fuel, +# mainheat_energy_eff=p.data["mainheat-energy-eff"], +# ) +# +# input_measures = optimiser_functions.prepare_input_measures( +# measures_to_optimise_with_uplift, body.goal, needs_ventilation, funding=True +# ) +# +# # When the goal is Increasing EPC, we can run the funding optimiser +# if body.goal == "Increasing EPC": +# +# solutions = optimise_with_funding_paths( +# p=p, +# input_measures=input_measures, +# housing_type=body.housing_type, +# budget=body.budget, +# target_gain=gain, +# funding=funding +# ) +# +# # Given the solutions we select the optimal one +# solutions["cost_less_full_project_funding"] = np.where( +# solutions["scheme"] == "eco4", +# solutions["total_cost"] - solutions["full_project_funding"] - solutions["total_uplift"], +# solutions["total_cost"] - solutions["partial_project_funding"] - solutions["total_uplift"] +# ) +# +# solutions["cost_less_full_project_funding"] = ( +# solutions["total_cost"] - solutions["full_project_funding"] - solutions["total_uplift"] +# ) +# solutions = solutions.sort_values("cost_less_full_project_funding", ascending=True) +# +# if solutions["meets_upgrade_target"].any(): +# # If we have a solution that meets the upgrade target, we select that one +# optimal_solution = solutions[solutions["meets_upgrade_target"]].iloc[0] +# else: +# # Pick the cheapest +# optimal_solution = solutions.iloc[0] +# +# # This is the list of measures that we will recommend +# scheme = optimal_solution["scheme"] +# funded_measures = optimal_solution["items"] if scheme != "none" else [] +# solution = optimal_solution["items"] + optimal_solution["unfunded_items"] +# # This is the total amount of funding that the project will produce (including uplifts) (£) +# project_funding = optimal_solution["full_project_funding"] if scheme == "eco4" else \ +# optimal_solution["partial_project_funding"] +# # This is the total amount of funding associated to the uplift (£) +# total_uplift = optimal_solution["total_uplift"] +# # This is the funding scheme selected +# # This is the full project ABS +# full_project_score = optimal_solution["project_score"] +# # This is the partial project ABS +# partial_project_score = optimal_solution["partial_project_score"] +# # This is the uplift score ABS +# uplift_project_score = optimal_solution["total_uplift_score"] +# else: +# # We optimise and then we determine eligibility for funding, based on the measures selected +# optimiser = ( +# GainOptimiser( +# input_measures, max_cost=body.budget, max_gain=gain, allow_slack=False +# ) if body.budget else CostOptimiser(input_measures, min_gain=gain) +# ) +# optimiser.setup() +# optimiser.solve() +# solution = optimiser.solution +# +# recommendation_types = [] +# for measures in input_measures: +# for measure in measures: +# recommendation_types.append(measure["type"]) +# recommendation_types = set(recommendation_types) +# +# has_wall_insulation_recommendation = any( +# (m in recommendation_types or "+".join([m, "mechanical_ventilation"])) for m in +# WALL_INSULATION_MEASURES +# ) +# has_roof_insulation_recommendation = any( +# (m in recommendation_types or "+".join([m, "mechanical_ventilation"])) for m in +# ROOF_INSULATION_MEASURES +# ) +# +# funding.check_funding( +# measures=solution, +# starting_sap=p.data["current-energy-efficiency"], +# ending_sap=p.data["current-energy-efficiency"] + sum([x["gain"] for x in solution]), +# floor_area=p.floor_area, +# mainheat_description=p.main_heating["clean_description"], +# heating_control_description=p.main_heating_controls["clean_description"], +# is_cavity=p.walls["is_cavity_wall"], +# current_wall_uvalue=current_wall_u_value, +# is_partial="partial" in p.walls["clean_description"].lower(), +# existing_li_thickness=li_thickness, +# mainheating=p.main_heating, +# main_fuel=p.main_fuel, +# mainheat_energy_eff=p.data["mainheat-energy-eff"], +# has_wall_insulation_recommendation=has_wall_insulation_recommendation, +# has_roof_insulation_recommendation=has_roof_insulation_recommendation, +# ) +# +# # Determine the scheme +# scheme = "none" +# if funding.eco4_eligible: +# scheme = "eco4" +# if scheme == "none" and funding.gbis_eligible: +# scheme = "gbis" +# +# funded_measures = solution if scheme in ["gbis", "eco4"] else [] +# project_funding = 0 if funding.full_project_abs is not None else funding.full_project_abs +# total_uplift = funding.eco4_uplift +# full_project_score = 0 if funding.full_project_abs is not None else funding.full_project_abs +# partial_project_score = funding.partial_project_abs +# uplift_project_score = funding.eco4_uplift if scheme == "eco4" else funding.gbis_uplift +# +# selected = {r["id"] for r in solution} +# +# if property_required_measures: +# solution = optimiser_functions.add_required_measures( +# property_id=p.id, property_required_measures=property_required_measures, +# recommendations=recommendations, selected=selected, +# ) +# +# # Add best practice measures (ventilation/trickle vents) +# selected = optimiser_functions.add_best_practice_measures(p.id, solution, recommendations, selected) +# # Final flattening - Don't do this! +# # recommendations[p.id] = optimiser_functions.flatten_recommendations_with_defaults( +# # p.id, recommendations, selected +# # ) +# +# # TODO: functionise +# for measure in funded_measures: +# if "+mechanical_ventilation" in measure["type"]: +# measure["type"] = measure["type"].split("+mechanical_ventilation")[0] +# +# p.insert_funding( +# scheme=scheme, +# funded_measures=funded_measures, +# project_funding=project_funding, +# total_uplift=total_uplift, +# full_project_score=full_project_score, +# partial_project_score=partial_project_score, +# uplift_project_score=uplift_project_score +# ) diff --git a/backend/tests/test_search_epc.py b/backend/tests/test_search_epc.py index 9bb7c39a..a0fef7e9 100644 --- a/backend/tests/test_search_epc.py +++ b/backend/tests/test_search_epc.py @@ -26,7 +26,7 @@ class TestSearchEpcIntegration: # Test case 2: Another valid address and postcode # In this case, the newest EPC, does not have a uprn associated to it. If we did a search by # uprn, we would get an old EPC - ("Flat 8, Hainton House", "DN32 9AQ", 10090082018, True, + ("Flat 8, Hainton House", "DN32 9AQ", "", True, "bd1149a20a73397184f07a9955f872424826e70f4870c058d71be887766ee1f8", 2), # Test case 3: When we make a request to the API for this property, we get back results for # flats 1, 2 and 3. We have some logic to handle the response so that we get back flat 1 @@ -56,7 +56,6 @@ class TestSearchEpcIntegration: # We check that we have the correct epc assert epc_searcher.newest_epc["lmk-key"] == lmk_key - assert epc_searcher.newest_epc["uprn"] == uprn assert len(epc_searcher.older_epcs) == n_old_epcs def test_search_housenumber(self): diff --git a/recommendations/tests/test_optimisers.py b/recommendations/tests/test_optimisers.py index df5cc2e1..e81aac69 100644 --- a/recommendations/tests/test_optimisers.py +++ b/recommendations/tests/test_optimisers.py @@ -144,6 +144,15 @@ class DummyProp: self.has_ventilation = False self.floor_area = 70.0 self.main_heating_controls = {"clean_description": "time and temperature zone control"} + self.walls = {'original_description': 'Solid brick, as built, no insulation (assumed)', + 'thermal_transmittance': None, + 'thermal_transmittance_unit': None, 'is_cavity_wall': False, 'is_filled_cavity': False, + 'is_solid_brick': True, + 'is_system_built': False, 'is_timber_frame': False, 'is_granite_or_whinstone': False, + 'is_as_built': True, + 'is_cob': False, 'is_assumed': True, 'is_sandstone_or_limestone': False, + 'insulation_thickness': 'none', + 'external_insulation': False, 'internal_insulation': False} self.main_heating = { 'original_description': 'Boiler and radiators, mains gas', @@ -230,6 +239,7 @@ def property_recommendations(): 'quantity_unit': 'm2', 'total': 19090.810139104888, 'labour_hours': 0.0, 'labour_days': 0.0}], 'type': 'external_wall_insulation', 'measure_type': 'external_wall_insulation', + "innovation_rate": 0, 'description': 'Install 150mm EWI Pro EPS external wall insulation system with Brick ' 'Slip finish on external walls', 'starting_u_value': 1.7, 'new_u_value': 0.32, 'already_installed': False, @@ -258,6 +268,7 @@ def property_recommendations(): 'quantity_unit': 'm2', 'total': 5694.929118083911, 'labour_hours': 134.37473199973275, 'labour_days': 4.199210374991648}], 'type': 'internal_wall_insulation', 'measure_type': 'internal_wall_insulation', + "innovation_rate": 0, 'description': 'Install 95mm ' 'SWIP EcoBatt & ' 'Plastered ' @@ -314,6 +325,7 @@ def property_recommendations(): 'quantity_unit': 'm2', 'total': 645.0, 'labour_hours': 8, 'labour_days': 1}], 'type': 'loft_insulation', 'measure_type': 'loft_insulation', + "innovation_rate": 0, 'description': 'Install 300mm of Knauf Loft Roll 44 glass fibre roll in your loft', 'starting_u_value': 2.3, 'new_u_value': 2.3, 'sap_points': np.float64(2.4), 'already_installed': False, @@ -338,6 +350,7 @@ def property_recommendations(): 'plant_cost': 0.0, 'total_cost': 350.0, 'notes': None, 'is_installer_quote': True, 'total': 700.0, 'quantity': 2, 'quantity_unit': 'part'}], 'type': 'mechanical_ventilation', 'measure_type': 'mechanical_ventilation', + "innovation_rate": 0, 'description': 'Install 2 ' 'Mechanical ' 'Extract ' @@ -387,6 +400,7 @@ def property_recommendations(): 'labour_hours': 70.08999999999999, 'labour_days': 2.920416666666666}], 'type': 'suspended_floor_insulation', 'measure_type': 'suspended_floor_insulation', + "innovation_rate": 0, 'description': 'Install 75mm Q-bot underfloor insulation insulation in suspended ' 'floor', 'starting_u_value': 0.83, 'new_u_value': 0.22, 'sap_points': 2, 'survey': True, @@ -401,6 +415,7 @@ def property_recommendations(): 'energy_cost_savings': np.float64(76.04936470588231)}], [ {'phase': 4, 'parts': [], 'type': 'low_energy_lighting', 'measure_type': 'low_energy_lighting', + "innovation_rate": 0, 'description': 'Install low energy lighting in -886 outlets', 'starting_u_value': None, 'new_u_value': None, 'already_installed': False, 'sap_points': 2, 'kwh_savings': -48508.5, 'energy_cost_savings': -12481.237049999998, @@ -413,6 +428,7 @@ def property_recommendations(): 'recommendation_id': '5_phase=4', 'efficiency': -1705.5500000000002, 'heat_demand': np.float64(5.099999999999994)}], [ {'type': 'heating', 'phase': 5, 'measure_type': 'time_temperature_zone_control', + "innovation_rate": 0, 'parts': [], 'description': 'Upgrade heating controls to Smart Thermostats, room sensors and ' 'smart radiator valves (time & temperature zone control)', @@ -431,6 +447,7 @@ def property_recommendations(): 'energy_cost_savings': np.float64(65.29581176470589)}], [ {'phase': 6, 'parts': [], 'type': 'secondary_heating', 'measure_type': 'secondary_heating', + "innovation_rate": 0, 'description': 'Remove the secondary heating system', 'starting_u_value': None, 'new_u_value': None, 'sap_points': np.float64(3.6), 'already_installed': False, 'total': 30.0, 'subtotal': 25.0, 'vat': 5.0, 'labour_hours': 3.0, @@ -443,6 +460,7 @@ def property_recommendations(): 'kwh_savings': np.float64(196.29999999999927), 'energy_cost_savings': np.float64(14.61857647058821)}], [ {'phase': 7, 'parts': [], 'type': 'solar_pv', 'measure_type': 'solar_pv', + "innovation_rate": 0, 'description': 'Install a 4.0 kilowatt-peak (kWp) solar panel system.', 'starting_u_value': None, 'new_u_value': None, 'sap_points': np.float64(13.0), 'already_installed': False, 'total': 6013.139999999999, 'subtotal': 5010.95, 'vat': 0, @@ -455,6 +473,7 @@ def property_recommendations(): 'kwh_savings': np.float64(2040.8566307499998), 'energy_cost_savings': np.float64(525.1124110919749)}, {'phase': 7, 'parts': [], 'type': 'solar_pv', 'measure_type': 'solar_pv', + "innovation_rate": 0, 'description': 'Install a 4.0 kilowatt-peak (kWp) solar panel system, with a battery.', 'starting_u_value': None, 'new_u_value': None, 'sap_points': np.float64(13.0), 'already_installed': False, 'total': 10537.008, 'subtotal': 8780.84, 'vat': 0, @@ -467,6 +486,7 @@ def property_recommendations(): 'kwh_savings': np.float64(2857.1992830499994), 'energy_cost_savings': np.float64(735.1573755287648)}, {'phase': 7, 'parts': [], 'type': 'solar_pv', 'measure_type': 'solar_pv', + "innovation_rate": 0, 'description': 'Install a 3.6 kilowatt-peak (kWp) solar panel system.', 'starting_u_value': None, 'new_u_value': None, 'sap_points': np.float64(12.0), 'already_installed': False, 'total': 5826.491999999999, 'subtotal': 4855.41, 'vat': 0, @@ -478,6 +498,7 @@ def property_recommendations(): 'heat_demand': np.float64(83.69999999999999), 'kwh_savings': np.float64(1846.33397), 'energy_cost_savings': np.float64(475.0617304809999)}, {'phase': 7, 'parts': [], 'type': 'solar_pv', 'measure_type': 'solar_pv', + "innovation_rate": 0, 'description': 'Install a 3.6 kilowatt-peak (kWp) solar panel system, with a battery.', 'starting_u_value': None, 'new_u_value': None, 'sap_points': np.float64(12.0), 'already_installed': False, 'total': 10350.359999999999, 'subtotal': 8625.3, 'vat': 0, @@ -489,6 +510,7 @@ def property_recommendations(): 'heat_demand': np.float64(83.69999999999999), 'kwh_savings': np.float64(2584.867558), 'energy_cost_savings': np.float64(665.0864226734)}, {'phase': 7, 'parts': [], 'type': 'solar_pv', 'measure_type': 'solar_pv', + "innovation_rate": 0, 'description': 'Install a 3.2 kilowatt-peak (kWp) solar panel system.', 'starting_u_value': None, 'new_u_value': None, 'sap_points': np.float64(11.0), 'already_installed': False, 'total': 5642.604, 'subtotal': 4702.17, 'vat': 0, @@ -500,6 +522,7 @@ def property_recommendations(): 'kwh_savings': np.float64(1650.2708274), 'energy_cost_savings': np.float64(424.61468389001993)}, {'phase': 7, 'parts': [], 'type': 'solar_pv', 'measure_type': 'solar_pv', + "innovation_rate": 0, 'description': 'Install a 3.2 kilowatt-peak (kWp) solar panel system, with a battery.', 'starting_u_value': None, 'new_u_value': None, 'sap_points': np.float64(11.0), 'already_installed': False, 'total': 10166.472, 'subtotal': 8472.06, 'vat': 0, @@ -511,6 +534,7 @@ def property_recommendations(): 'heat_demand': np.float64(78.3), 'kwh_savings': np.float64(2310.3791583599996), 'energy_cost_savings': np.float64(594.4605574460278)}, {'phase': 7, 'parts': [], 'type': 'solar_pv', 'measure_type': 'solar_pv', + "innovation_rate": 0, 'description': 'Install a 2.8 kilowatt-peak (kWp) solar panel system.', 'starting_u_value': None, 'new_u_value': None, 'sap_points': np.float64(9.0), 'already_installed': False, 'total': 5458.727999999999, 'subtotal': 4548.94, 'vat': 0, @@ -522,6 +546,7 @@ def property_recommendations(): 'kwh_savings': np.float64(1453.5933906), 'energy_cost_savings': np.float64(374.00957940138)}, {'phase': 7, 'parts': [], 'type': 'solar_pv', 'measure_type': 'solar_pv', + "innovation_rate": 0, 'description': 'Install a 2.8 kilowatt-peak (kWp) solar panel system, with a battery.', 'starting_u_value': None, 'new_u_value': None, 'sap_points': np.float64(9.0), 'already_installed': False, 'total': 9982.596, 'subtotal': 8318.83, 'vat': 0, @@ -533,6 +558,7 @@ def property_recommendations(): 'heat_demand': np.float64(64.0), 'kwh_savings': np.float64(2035.03074684), 'energy_cost_savings': np.float64(523.6134111619319)}, {'phase': 7, 'parts': [], 'type': 'solar_pv', 'measure_type': 'solar_pv', + "innovation_rate": 0, 'description': 'Install a 2.4 kilowatt-peak (kWp) solar panel system.', 'starting_u_value': None, 'new_u_value': None, 'sap_points': np.float64(8.0), 'already_installed': False, 'total': 5274.852, 'subtotal': 4395.71, 'vat': 0, @@ -544,6 +570,7 @@ def property_recommendations(): 'kwh_savings': np.float64(1255.12594), 'energy_cost_savings': np.float64(322.94390436199996)}, {'phase': 7, 'parts': [], 'type': 'solar_pv', 'measure_type': 'solar_pv', + "innovation_rate": 0, 'description': 'Install a 2.4 kilowatt-peak (kWp) solar panel system, with a battery.', 'starting_u_value': None, 'new_u_value': None, 'sap_points': np.float64(8.0), 'already_installed': False, 'total': 9798.72, 'subtotal': 8165.6, 'vat': 0, @@ -555,6 +582,7 @@ def property_recommendations(): 'heat_demand': np.float64(54.3), 'kwh_savings': np.float64(1757.1763159999998), 'energy_cost_savings': np.float64(452.1214661067999)}, {'phase': 7, 'parts': [], 'type': 'solar_pv', 'measure_type': 'solar_pv', + "innovation_rate": 0, 'description': 'Install a 2.0 kilowatt-peak (kWp) solar panel system.', 'starting_u_value': None, 'new_u_value': None, 'sap_points': np.float64(7.0), 'already_installed': False, 'total': 5090.976, 'subtotal': 4242.48, 'vat': 0, @@ -566,6 +594,7 @@ def property_recommendations(): 'kwh_savings': np.float64(1048.341318), 'energy_cost_savings': np.float64(269.7382211214)}, {'phase': 7, 'parts': [], 'type': 'solar_pv', 'measure_type': 'solar_pv', + "innovation_rate": 0, 'description': 'Install a 2.0 kilowatt-peak (kWp) solar panel system, with a battery.', 'starting_u_value': None, 'new_u_value': None, 'sap_points': np.float64(7.0), 'already_installed': False, 'total': 9614.844, 'subtotal': 8012.369999999999, 'vat': 0, @@ -586,10 +615,20 @@ def _attach_costs_and_uplifts(recs, funding, p): for group in out: for r in group: if r["type"] in ["mechanical_ventilation", "low_energy_lighting", "secondary_heating"]: - r["innovation_uplift"] = 0 + ( + r["partial_project_score"], + r["partial_project_funding"], + r["innovation_uplift"], + r["uplift_project_score"], + ) = ( + 0, 0, 0, 0 + ) continue - r["uplift"] = 0.0 # fixed for determinism in test - r["innovation_uplift"] = funding.get_innovation_uplift( + + ( + r["partial_project_score"], r["partial_project_funding"], r["innovation_uplift"], + r["uplift_project_score"] + ) = funding.get_innovation_uplift( measure=r, starting_sap=55, floor_area=70.0, @@ -663,3 +702,100 @@ def test_social_fabric_only_returns_only_fabric_types(p, funding, property_recom unfunded_rows = solutions[ solutions["path"].apply(lambda x: isinstance(x, dict) and x.get("reference") == "unfunded:all")] assert not unfunded_rows.empty + + +def test_private_solid_wall_no_innovation_epc_d(p, funding, mock_project_scores_matrix, mock_partial_scores_matrix): + """ + We have a specific test for this case which was implemented incorrectly originally. + This is an EPC D property and so shouldn't be eligible for ECO4. Instead, only GBIS should be considered. + """ + + # Overwrite the data - copied from real example + p2 = deepcopy(p) + p2.data = { + "current-energy-rating": "D", + "current-energy-efficiency": 68, + "mainheat-energy-eff": "Good", + } + p2.walls = {'original_description': 'Sandstone or limestone, as built, no insulation (assumed)', + 'clean_description': 'Sandstone or limestone, as built, no insulation', 'thermal_transmittance': None, + 'thermal_transmittance_unit': None, 'is_cavity_wall': False, 'is_filled_cavity': False, + 'is_solid_brick': False, 'is_system_built': False, 'is_timber_frame': False, + 'is_granite_or_whinstone': False, 'is_as_built': True, 'is_cob': False, 'is_assumed': True, + 'is_sandstone_or_limestone': True, 'is_park_home': False, 'insulation_thickness': 'none', + 'external_insulation': False, 'internal_insulation': False} + + funding2 = Funding( + tenure="Private", + project_scores_matrix=mock_project_scores_matrix, + partial_project_scores_matrix=mock_partial_scores_matrix, + whlg_eligible_postcodes=pd.DataFrame([{"Postcode": "ab12cd"}]), + eco4_social_cavity_abs_rate=12.5, + eco4_social_solid_abs_rate=17, + eco4_private_cavity_abs_rate=12.5, + eco4_private_solid_abs_rate=17, + gbis_social_cavity_abs_rate=21, + gbis_social_solid_abs_rate=25, + gbis_private_cavity_abs_rate=21, + gbis_private_solid_abs_rate=28, + ) + + input_measures = [ + [{'id': '0_phase=0', 'cost': np.float64(4441.202499013676), 'gain': np.float64(3.4000000000000057), + 'type': 'internal_wall_insulation+mechanical_ventilation', 'innovation_uplift': np.float64(0.0), + 'cost_minus_uplift': np.float64(4441.202499013676), 'raw_cost': 3881.2024990136756, + 'partial_project_funding': np.float64(2300.1000000000004), 'partial_project_score': np.float64(135.3), + 'uplift_project_score': np.float64(0.0)}], [ + {'id': '2_phase=2', 'cost': np.float64(2280.0), 'gain': np.float64(0.4), 'type': 'secondary_glazing', + 'innovation_uplift': np.float64(0.0), 'cost_minus_uplift': np.float64(2280.0), + 'raw_cost': np.float64(2280.0), 'partial_project_funding': np.float64(1421.1999999999998), + 'partial_project_score': np.float64(83.6), 'uplift_project_score': np.float64(0.0)}], [ + {'id': '3_phase=3', 'cost': np.float64(604.5840000000001), 'gain': np.float64(1.2), + 'type': 'time_temperature_zone_control', 'innovation_uplift': np.float64(0.0), + 'cost_minus_uplift': np.float64(604.5840000000001), 'raw_cost': 604.5840000000001, + 'partial_project_funding': np.float64(702.0999999999999), 'partial_project_score': np.float64(41.3), + 'uplift_project_score': np.float64(0.0)}], [ + {'id': '4_phase=4', 'cost': 60.0, 'gain': np.float64(0.0), 'type': 'secondary_heating', + 'innovation_uplift': 0, 'cost_minus_uplift': 60.0, 'raw_cost': 60.0, 'partial_project_funding': 0, + 'partial_project_score': 0, 'uplift_project_score': 0}] + ] + + solutions = optimise_with_funding_paths( + p=p2, + input_measures=input_measures, + housing_type="Private", + budget=None, + target_gain=1.5, + funding=funding2 + ) + + # 3) basic shape assertions + assert isinstance(solutions, pd.DataFrame) + assert not solutions.empty + + # We should have 2 rows + assert solutions.shape[0] == 2 + + # We should only have None or GBIS + assert set(solutions["scheme"].unique()) == {"none", "gbis"} + + meets_upgrade_gbis = solutions[solutions["meets_upgrade_target"] & solutions["is_eligible"]] + assert meets_upgrade_gbis.shape[0] == 1 + + # Check exact result + assert meets_upgrade_gbis.squeeze().to_dict() == { + 'fixed_ids': ['0_phase=0'], 'items': [ + {'id': '0_phase=0', 'cost': 3881.2024990136756, 'gain': np.float64(3.4000000000000057), + 'type': 'internal_wall_insulation+mechanical_ventilation', 'innovation_uplift': np.float64(0.0), + 'cost_minus_uplift': np.float64(4441.202499013676), 'raw_cost': 3881.2024990136756, + 'partial_project_funding': np.float64(2300.1000000000004), 'partial_project_score': np.float64(135.3), + 'uplift_project_score': np.float64(0.0)}], 'total_cost': 3881.2024990136756, + 'total_gain': 3.4000000000000057, 'path': [{'AND': ['internal_wall_insulation+mechanical_ventilation'], + 'reference': + 'internal_wall_insulation+mechanical_ventilation:gbis'}], + 'scheme': 'gbis', 'is_eligible': True, 'unfunded_items': [], 'meets_upgrade_target': True, 'starting_sap': 68, + 'floor_area': 70.0, 'ending_sap': 71.4, 'starting_band': 'High_D', 'ending_band': 'Low_C', + 'floor_area_band': '0-72', 'project_score': 540.0, 'full_project_funding': 0.0, + 'partial_project_funding': 2300.1000000000004, 'partial_project_score': 135.3, 'total_uplift': 0.0, + 'total_uplift_score': 0.0 + } From a8905f442ee41d90c619b1d405fc0769b0e44f8d Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 28 Oct 2025 19:28:26 +0000 Subject: [PATCH 021/202] fixed funding test cases --- .../tests/test_optimiser_functions.py | 34 ++++++++++++++----- 1 file changed, 25 insertions(+), 9 deletions(-) diff --git a/recommendations/tests/test_optimiser_functions.py b/recommendations/tests/test_optimiser_functions.py index 430acaa8..031bb9ac 100644 --- a/recommendations/tests/test_optimiser_functions.py +++ b/recommendations/tests/test_optimiser_functions.py @@ -12,7 +12,10 @@ class TestPrepareInputMeasures: recs = [ [ # loft insulation measure {"recommendation_id": "loft1", "type": "loft_insulation", "total": 100, "kwh_savings": 200, - "energy_cost_savings": 10, "has_battery": False, "measure_type": "loft_insulation"}, + "energy_cost_savings": 10, "has_battery": False, "measure_type": "loft_insulation", + "partial_project_funding": 0, "partial_project_score": 0, + "uplift_project_score": 0, + }, ], ] measures = optimiser_functions.prepare_input_measures(recs, goal="Energy Savings", needs_ventilation=False) @@ -27,9 +30,12 @@ class TestPrepareInputMeasures: ["internal_wall_insulation"]) recs = [ [{"recommendation_id": "wall1", "type": "internal_wall_insulation", "total": 500, "kwh_savings": 300, - "energy_cost_savings": 5, "has_battery": False, "measure_type": "internal_wall_insulation"}], + "energy_cost_savings": 5, "has_battery": False, "measure_type": "internal_wall_insulation", + "partial_project_funding": 0, "partial_project_score": 0, "uplift_project_score": 0, + }], [{"recommendation_id": "vent1", "type": "mechanical_ventilation", "total": 50, "kwh_savings": 30, - "energy_cost_savings": 5, "has_battery": False, "measure_type": "mechanical_ventilation"}], + "energy_cost_savings": 5, "has_battery": False, "measure_type": "mechanical_ventilation", + "partial_project_funding": 0, "partial_project_score": 0, "uplift_project_score": 0, }], ] measures = optimiser_functions.prepare_input_measures(recs, goal="Energy Savings", needs_ventilation=True) wall_option = measures[0][0] @@ -40,7 +46,8 @@ class TestPrepareInputMeasures: def test_filters_out_negative_cost_savings(self): recs = [ [{"recommendation_id": "bad1", "type": "loft_insulation", "total": 200, "kwh_savings": 100, - "energy_cost_savings": -5, "has_battery": False}], + "energy_cost_savings": -5, "has_battery": False, + "partial_project_funding": 0, "partial_project_score": 0, "uplift_project_score": 0, }], ] measures = optimiser_functions.prepare_input_measures(recs, goal="Energy Savings", needs_ventilation=False) assert measures == [] # should skip negative cost saving recs @@ -149,14 +156,14 @@ class TestIncreasingEpcE2e: @pytest.fixture def setup_case(self): - # ✅ Dummy property object + # Dummy property object p = SimpleNamespace( id="P1", has_ventilation=False, data={"current-energy-efficiency": "52"}, ) - # ✅ Dummy request body + # Dummy request body body = SimpleNamespace( goal="Increasing EPC", goal_value="C", @@ -165,9 +172,6 @@ class TestIncreasingEpcE2e: simulate_sap_10=False, required_measures=[] ) - - # ✅ Use your massive measures_to_optimise list - recommendations = {"P1": measures_to_optimise} return p, body, recommendations @@ -190,6 +194,18 @@ class TestIncreasingEpcE2e: assert needs_ventilation + # Input the various things we need - set all to 0 + for group in measures_to_optimise: + for r in group: + ( + r["partial_project_score"], + r["partial_project_funding"], + r["innovation_uplift"], + r["uplift_project_score"], + ) = ( + 0, 0, 0, 0 + ) + input_measures = optimiser_functions.prepare_input_measures(measures_to_optimise, body.goal, needs_ventilation) assert input_measures, "Expected measures to optimise" From 2aecf27900cfd2d280ce6e342b08e913528d7385 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 29 Oct 2025 10:51:34 +0000 Subject: [PATCH 022/202] debugging funding optimiser for existing gshp - remove ashp and hhrsh recommendations when gshp in place --- backend/Funding.py | 23 +++- backend/app/db/models/inspections.py | 163 ++++++++++++++++++++++++++ backend/tests/test_funding.py | 82 +++++++++++++ recommendations/HeatingRecommender.py | 7 +- 4 files changed, 269 insertions(+), 6 deletions(-) create mode 100644 backend/app/db/models/inspections.py diff --git a/backend/Funding.py b/backend/Funding.py index d590474c..ece8e3cf 100644 --- a/backend/Funding.py +++ b/backend/Funding.py @@ -1,11 +1,14 @@ from enum import Enum from typing import List import pandas as pd +from utils.logger import setup_logger from etl.epc_clean.epc_attributes.MainheatAttributes import MainHeatAttributes from backend.app.plan.schemas import VALID_HOUSING_TYPES, WALL_INSULATION_MEASURES, ROOF_INSULATION_MEASURES, \ MEASURE_MAP +logger = setup_logger(__name__) + class EligibilityCaveats(Enum): EPC_RATING = "epc_rating" # EPC requirements not met @@ -637,13 +640,25 @@ class Funding: if self.starting_sap_band in ["Low_C", "High_C", "Low_B", "High_B", "Low_A", "High_A"]: return 0 - pps = filtered_pps_matrix[ - (filtered_pps_matrix["Pre_Main_Heating_Source"] == pre_heating_system) & - (filtered_pps_matrix["Post_Main_Heating_Source"] == "Air to Water ASHP") & - (filtered_pps_matrix["Measure_Type"] == "B_Upgrade_nopreHCs") + pps_data = filtered_pps_matrix[ + filtered_pps_matrix["Post_Main_Heating_Source"] == "Air to Water ASHP" + ] + + if pre_heating_system not in pps_data["Pre_Main_Heating_Source"].values: + logger.info( + f"No PPS data for ASHP upgrade from {pre_heating_system}, returning 0" + ) + return 0 + + pps = pps_data[ + (pps_data["Pre_Main_Heating_Source"] == pre_heating_system) & + (pps_data["Measure_Type"] == "B_Upgrade_nopreHCs") # We assume we'll be making a heating system upgrade ] + # Not every pre heating system will result in PPS, e.g. a ground source heat pump to ASHP upgrade + # won't have a PPS. + if pps.shape[0] != 1: raise ValueError("something went wrong, more than one pps for ashp") return pps.squeeze()["Cost Savings"] diff --git a/backend/app/db/models/inspections.py b/backend/app/db/models/inspections.py new file mode 100644 index 00000000..c9925a2a --- /dev/null +++ b/backend/app/db/models/inspections.py @@ -0,0 +1,163 @@ +import enum +import pytz +import datetime +from sqlalchemy import ( + Column, + BigInteger, + Text, + DateTime, + Enum, + ForeignKey, +) +from sqlalchemy.ext.declarative import declarative_base + +Base = declarative_base() + + +# ------------------------------------------------------------------- +# ENUM DEFINITIONS (equivalent to drizzle pgEnum calls) +# ------------------------------------------------------------------- + +class InspectionArchetype(enum.Enum): + BUNGALOW = "Bungalow" + FLAT = "Flat" + MAISONETTE = "Maisonette" + HOUSE = "House" + NON_DOMESTIC = "non-domestic" + + +class InspectionArchetype2(enum.Enum): + DETACHED = "detached" + MID_TERRACE = "mid-terrace" + ENCLOSED_MID_TERRACE = "enclosed mid-terrace" + END_TERRACE = "end-terrace" + ENCLOSED_END_TERRACE = "enclosed end-terrace" + SEMI_DETACHED = "semi-detached" + + +class InspectionsWallConstruction(enum.Enum): + CAVITY = "cavity" + SOLID = "solid" + SYSTEM_BUILT = "system built" + TIMBER_FRAMED = "timber framed" + STEEL_FRAMED = "steel framed" + RE_WALLED_CAVITY = "re-walled cavity" + MANSARD_PRE_FAB = "mansard pre-fab" + MANSARD_EWI = "mansard ewi" + MANSARD_RE_WALLED = "mansard re-walled" + + +class InspectionsWallInsulation(enum.Enum): + EMPTY_CAVITY = "empty cavity" + FILLED_AT_BUILD = "filled at build" + PARTIAL = "partial" + RETRO_DRILLED = "retro drilled" + EWI = "ewi" + IWI = "iwi" + SOLID_NON_CAVITY = "solid non-cavity" + SYSTEM_BUILT = "system built" + TIMBER_FRAMED = "timber framed" + STEEL_FRAMED = "steel framed" + + +class InspectionsInsulationMaterial(enum.Enum): + EMPTY_50_90 = "empty 50-90" + EMPTY_100_PLUS = "empty 100+" + EMPTY_30_40 = "empty 30-40" + EMPTY_LESS_THAN_30 = "empty less than 30" + LOOSE_FIBRE_WOOL = "loose fibre/wool" + EPS_CELO_KING = "eps/celo/king" + FIBRE_BATTS_WITH_CAVITY = "fibre batts - with cavity" + FIBRE_BATTS_NO_CAVITY = "fibre batts - no cavity" + LOOSE_BEAD = "loose bead" + GLUED_BEAD = "glued bead" + FORMALDEHYDE = "formaldehyde" + BUBBLE_WRAP = "bubble wrap" + POLY_CHUNKS = "poly chunks" + + +class InspectionBorescoped(enum.Enum): + YES = "yes" + NO = "no" + REFUSED = "refused" + + +class InspectionsRoofOrientation(enum.Enum): + NORTH = "north" + EAST = "east" + SOUTH = "south" + WEST = "west" + NORTH_EAST = "north-east" + NORTH_WEST = "north-west" + SOUTH_EAST = "south-east" + SOUTH_WEST = "south-west" + N_S_SPLIT = "n/s split" + E_W_SPLIT = "e/w split" + NE_SW_SPLIT = "ne/sw split" + NW_SE_SPLIT = "nw/se split" + FLAT_ROOF = "flat roof" + NO_ROOF = "no roof" + ROOF_TOO_SMALL = "roof too small" + ALREADY_HAS_SOLAR_PV = "already has solar pv" + + +class InspectionsTileHung(enum.Enum): + YES = "yes" + NO = "no" + FIRST_FLOOR_FLATS_TILE_HUNG = "first floor flats are tile hung" + + +class InspectionsRendered(enum.Enum): + NO_RENDER = "no render" + INSUFFICIENT_DPC_SPACE = "rendered with “insufficient” space between dpc and render" + SUFFICIENT_DPC_SPACE = "rendered with “sufficient” space between dpc and render" + + +class InspectionsCladding(enum.Enum): + NONE = "none" + SUFFICIENT_SPACE = "cladded with “sufficient space to fill the wall”" + INSUFFICIENT_SPACE = "cladded with “insufficient space to fill the wall”" + + +class InspectionsAccessIssues(enum.Enum): + SEE_NOTES = "see notes" + DAMP_ISSUES = "damp issues" + FOLIAGE_ON_WALLS = "foliage on walls" + BUSHES_AGAINST_WALL = "bushes against wall" + TREES_AROUND_ABOVE = "trees around/anove property" + HIGH_RISE = "high rise block flats/maisonettes" + CONSERVATORY = "conservatory" + LEAN_TO = "lean-to" + GARAGE = "garage" + EXTENSION = "extension" + DECKING = "decking" + SHED_AGAINST_WALL = "shed against wall" + + +class InspectionModel(Base): + __tablename__ = "inspections" + + id = Column(BigInteger, primary_key=True, autoincrement=True) + property_id = Column(BigInteger, ForeignKey("property.id"), nullable=False) + + archetype = Column(Enum(InspectionArchetype), nullable=True) + archetype_2 = Column(Enum(InspectionArchetype2), nullable=True) + wall_construction = Column(Enum(InspectionsWallConstruction), nullable=True) + insulation = Column(Enum(InspectionsWallInsulation), nullable=True) + insulation_material = Column(Enum(InspectionsInsulationMaterial), nullable=True) + borescoped = Column(Enum(InspectionBorescoped), nullable=True) + roof_orientation = Column(Enum(InspectionsRoofOrientation), nullable=True) + tile_hung = Column(Enum(InspectionsTileHung), nullable=True) + rendered = Column(Enum(InspectionsRendered), nullable=True) + cladding = Column(Enum(InspectionsCladding), nullable=True) + access_issues = Column(Enum(InspectionsAccessIssues), nullable=True) + + notes = Column(Text) + surveyor_name = Column(Text) + + created_at = Column( + DateTime, nullable=False, default=datetime.datetime.now(pytz.utc) + ) + uploaded_at = Column( + DateTime, nullable=False, default=datetime.datetime.now(pytz.utc) + ) diff --git a/backend/tests/test_funding.py b/backend/tests/test_funding.py index d84480ce..513c3271 100644 --- a/backend/tests/test_funding.py +++ b/backend/tests/test_funding.py @@ -1393,3 +1393,85 @@ def test_private_epc_e_solar_with_heating_and_minimum_insulation_produces_uplift assert funding.eco4_uplift and funding.eco4_uplift > 0 # And total funding should include that uplift assert funding.eco4_funding and funding.eco4_funding > 0 + + +def test_existing_gshp_to_ashp(): + r = {'phase': 3, 'parts': [], 'type': 'heating', 'measure_type': 'air_source_heat_pump', + 'description': 'Install a 5KW air source heat pump, and upgrade heating controls to Smart Thermostats, ' + 'room sensors and smart radiator valves (time & temperature zone control). Ensure you have a ' + 'single tariff', + 'starting_u_value': None, 'new_u_value': None, 'sap_points': 7.7, 'already_installed': False, + 'simulation_config': {'mainheat_energy_eff_ending': 'Good', 'hot_water_energy_eff_ending': 'Average', + 'has_air_source_heat_pump_ending': True, 'has_ground_source_heat_pump_ending': False, + 'extra_features_ending': None, + 'thermostatic_control_ending': 'time and temperature zone control', + 'switch_system_ending': None, 'multiple_room_thermostats_ending': False, + 'mainheatc_energy_eff_ending': 'Very Good'}, + 'description_simulation': {'mainheat-description': 'Air source heat pump, radiators, electric', + 'mainheat-energy-eff': 'Good', 'hot-water-energy-eff': 'Average', + 'hotwater-description': 'From main system', + 'mainheatcont-description': 'Time and temperature zone control', + 'mainheatc-energy-eff': 'Very Good'}, 'total': 13188.996000000001, + 'contingency': 3145.8150000000005, 'contingency_rate': 0.35, 'vat': 2080.666, 'labour_hours': 44.7, + 'labour_days': 6.0, 'innovation_rate': 0, 'recommendation_id': '6_phase=3', + 'efficiency': 13188.996000000001, 'co2_equivalent_savings': 0.4999999999999998, + 'heat_demand': 53.20000000000002, 'kwh_savings': 801.5000000000005, + 'energy_cost_savings': 327.31316785714296 + } + + funding = Funding( + project_scores_matrix=mock_project_scores_matrix, + partial_project_scores_matrix=mock_partial_scores_matrix, + whlg_eligible_postcodes=mock_whlg_postcodes, + eco4_social_cavity_abs_rate=13.5, + eco4_social_solid_abs_rate=17, + eco4_private_cavity_abs_rate=13.5, + eco4_private_solid_abs_rate=17, + gbis_social_cavity_abs_rate=21, + gbis_social_solid_abs_rate=25, + gbis_private_cavity_abs_rate=22, + gbis_private_solid_abs_rate=28, + tenure="Private", + ) + + ( + pps, ppf, iu, ups + ) = funding.get_innovation_uplift( + measure=r, + starting_sap=62, + floor_area=69, + is_cavity=True, + current_wall_uvalue=0.7, + is_partial=False, + existing_li_thickness=200, + mainheating={ + 'original_description': 'Ground source heat pump, radiators, electric', + 'clean_description': 'Ground source heat pump, radiators, electric', 'has_radiators': True, + 'has_fan_coil_units': False, 'has_pipes_in_screed_above_insulation': False, + 'has_pipes_in_insulated_timber_floor': False, 'has_pipes_in_concrete_slab': False, 'has_boiler': False, + 'has_air_source_heat_pump': False, 'has_room_heaters': False, 'has_electric_storage_heaters': False, + 'has_warm_air': False, 'has_electric_underfloor_heating': False, 'has_electric_ceiling_heating': False, + 'has_community_scheme': False, 'has_ground_source_heat_pump': True, 'has_no_system_present': False, + 'has_portable_electric_heaters': False, 'has_water_source_heat_pump': False, + 'has_electric_heat_pump': False, 'has_micro-cogeneration': False, 'has_solar_assisted_heat_pump': False, + 'has_exhaust_source_heat_pump': False, 'has_community_heat_pump': False, 'has_hot-water-only': False, + 'has_electric': True, 'has_mains_gas': False, 'has_wood_logs': False, 'has_coal': False, 'has_oil': False, + 'has_wood_pellets': False, 'has_anthracite': False, 'has_dual_fuel_mineral_and_wood': False, + 'has_smokeless_fuel': False, 'has_lpg': False, 'has_b30k': False, 'has_mineral_and_wood': False, + 'has_dual_fuel_appliance': False, 'has_assumed': False, 'has_electricaire': False, + 'has_assumed_for_most_rooms': False, 'has_underfloor_heating': False + }, + main_fuel={ + 'original_description': 'electricity (not community)', + 'clean_description': 'Electricity not community', 'fuel_type': 'electricity', 'tariff_type': None, + 'is_community': False, 'no_individual_heating_or_community_network': False, + 'complex_fuel_type': None + }, + mainheat_energy_eff="Poor", + ) + + # All should be zero + assert pps == 0 + assert ppf == 0 + assert iu == 0 + assert ups == 0 diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py index 73edff53..41785104 100644 --- a/recommendations/HeatingRecommender.py +++ b/recommendations/HeatingRecommender.py @@ -103,6 +103,7 @@ class HeatingRecommender: self.property.main_heating["has_electric"] or self.property.main_heating["has_electricaire"] ) self.has_ashp = self.property.main_heating["has_air_source_heat_pump"] + self.has_gshp = self.property.main_heating["has_ground_source_heat_pump"] self.has_room_heaters = ( self.property.main_heating["has_room_heaters"] or self.property.main_heating["has_portable_electric_heaters"] @@ -151,8 +152,10 @@ class HeatingRecommender: "underfloor heating" not in self.property.main_heating["clean_description"] ) + # If the property has a ground source heat pump, or air source heat pump, we don't recommend HHRSH + return ( - hhr_suitable and (not ashp_only_heating_recommendation) and not self.has_ashp and + hhr_suitable and (not ashp_only_heating_recommendation) and not self.has_ashp and not self.has_gshp and ("high_heat_retention_storage_heater" in measures) ) @@ -345,7 +348,7 @@ class HeatingRecommender: if ( self.property.is_ashp_valid(measures=measures) and non_invasive_ashp_recommendation["suitable"] and - not self.has_ashp + not self.has_ashp and not self.has_gshp ): self.recommend_air_source_heat_pump( phase=phase, From 2f3d49dff436a8ef87a87d3403111efe3e85b0a5 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 29 Oct 2025 11:13:27 +0000 Subject: [PATCH 023/202] fixed issue with existing gshp and added test --- recommendations/optimiser/CostOptimiser.py | 3 ++- recommendations/optimiser/GainOptimiser.py | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/recommendations/optimiser/CostOptimiser.py b/recommendations/optimiser/CostOptimiser.py index 50f4b884..b01d28b3 100644 --- a/recommendations/optimiser/CostOptimiser.py +++ b/recommendations/optimiser/CostOptimiser.py @@ -109,7 +109,8 @@ class CostOptimiser: self.m.optimize() if self.m.status == OptimizationStatus.INFEASIBLE: - logger.info("We have an infeasible model, setting up slack model") + # Turn off logging - too noisy + # logger.info("We have an infeasible model, setting up slack model") self.setup_slack() self.m.optimize() diff --git a/recommendations/optimiser/GainOptimiser.py b/recommendations/optimiser/GainOptimiser.py index 7b2e56d2..6b757bf1 100644 --- a/recommendations/optimiser/GainOptimiser.py +++ b/recommendations/optimiser/GainOptimiser.py @@ -133,7 +133,8 @@ class GainOptimiser: (self.m.status == OptimizationStatus.OPTIMAL) and not len(solution) ): if self.allow_slack: - logger.info("We have an infeasible model, setting up slack model") + # Turn off logging - too noisy + # logger.info("We have an infeasible model, setting up slack model") self.setup_slack() self.m.optimize() solution = [ From 27de54adefd02196233ea722e6f6e0513fb0ae87 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 29 Oct 2025 15:15:07 +0000 Subject: [PATCH 024/202] increase concurrency, handle error case for gain equal to fixed gain --- recommendations/optimiser/funding_optimiser.py | 2 +- serverless.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/recommendations/optimiser/funding_optimiser.py b/recommendations/optimiser/funding_optimiser.py index 8fbb13b2..4da08587 100644 --- a/recommendations/optimiser/funding_optimiser.py +++ b/recommendations/optimiser/funding_optimiser.py @@ -338,7 +338,7 @@ def optimise_with_funding_paths(p, input_measures, housing_type, funding: Fundin if fixed_gain > target_gain: picked, sub_cost, sub_gain = ([], 0.0, 0.0) - elif fixed_gain < target_gain and not sub_measures: + elif fixed_gain <= target_gain and not sub_measures: picked, sub_cost, sub_gain = ([], 0.0, 0.0) else: picked, sub_cost, sub_gain = run_optimizer( diff --git a/serverless.yml b/serverless.yml index c1fc0b09..6eea03eb 100644 --- a/serverless.yml +++ b/serverless.yml @@ -66,7 +66,7 @@ functions: - sqs: arn: arn:aws:sqs:${self:provider.region}:${aws:accountId}:model-engine-queue batchSize: 1 - maximumConcurrency: 2 # Heavily restricts concurrency to avoid overwhelming the ldmbda limits + maximumConcurrency: 5 # Heavily restricts concurrency to avoid overwhelming the ldmbda limits resources: From 9c5d68f55f2a102b7a854af11cbc4c3b3a9985c5 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 29 Oct 2025 20:26:45 +0000 Subject: [PATCH 025/202] improving valuations scraper --- .idea/Model.iml | 2 +- .idea/misc.xml | 2 +- asset_list/app.py | 12 +- etl/webscrape/Zoopla.py | 145 ++++++++++-------- .../optimiser/funding_optimiser.py | 5 +- 5 files changed, 92 insertions(+), 74 deletions(-) diff --git a/.idea/Model.iml b/.idea/Model.iml index c6561970..09f2e496 100644 --- a/.idea/Model.iml +++ b/.idea/Model.iml @@ -7,7 +7,7 @@ - + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml index 50cad4ca..fb10c6b0 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -3,7 +3,7 @@ - + diff --git a/asset_list/app.py b/asset_list/app.py index bb5cb427..b832a3e8 100644 --- a/asset_list/app.py +++ b/asset_list/app.py @@ -298,13 +298,13 @@ def app(): landlord_block_reference = None # Project from Nick - data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/sfr/Sep2025 Project" - data_filename = "AL Test.xlsx" - sheet_name = "Sheet1" - postcode_column = 'postcode' + data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/sfr/October 2025 AL portfolio" + data_filename = "22.10 AL Portfolio.xlsx" + sheet_name = "22.10 AL Portfolio" + postcode_column = 'Postcode' address1_column = None address1_method = 'house_number_extraction' - fulladdress_column = "address" + fulladdress_column = "Address" address_cols_to_concat = [] missing_postcodes_method = None landlord_year_built = None @@ -315,7 +315,7 @@ def app(): landlord_roof_construction = None landlord_heating_system = None landlord_existing_pv = None - landlord_property_id = "row_id" + landlord_property_id = "Row ID" landlord_sap = None outcomes_filename = None outcomes_sheetname = None diff --git a/etl/webscrape/Zoopla.py b/etl/webscrape/Zoopla.py index 7b3fd5b6..2c446dc8 100644 --- a/etl/webscrape/Zoopla.py +++ b/etl/webscrape/Zoopla.py @@ -3,109 +3,126 @@ import pandas as pd import time from stealth_requests import StealthSession import random +import os from multiprocessing import Pool from tqdm import tqdm ENGINES = ["safari", "chrome"] +CACHE_DIR = "zoopla_cache" +os.makedirs(CACHE_DIR, exist_ok=True) + + +def random_delay(): + """Pause randomly between requests (0.5–2 s).""" + time.sleep(random.uniform(0.5, 2)) def scrape_all_estimates(session, url): - # Rotate impersonation per request - resp = session.get(url, impersonate=ENGINES[random.randint(0, 1)]) + """Scrape valuation estimates for one Zoopla property URL.""" + resp = session.get(url, impersonate=random.choice(ENGINES)) page_source = BeautifulSoup(resp.text, "html.parser") estimates = page_source.find_all("div", {"data-testid": "sale-estimate"}) is_blocked = len(estimates) == 0 - return estimates, is_blocked + return estimates, is_blocked, resp.text + + +def extract_estimates(estimates): + """Extract low, mid, and high estimates from parsed HTML.""" + est = estimates[0] + low = est.find("span", {"data-testid": "low-estimate-blurred"}).text + mid = est.find("p", {"data-testid": "estimate-blurred"}).text + high = est.find("span", {"data-testid": "high-estimate-blurred"}).text + return low, mid, high + + +def cache_path_for_url(url): + """Return a deterministic local cache path for a URL.""" + uprn = url.split("/")[-2] + return os.path.join(CACHE_DIR, f"{uprn}.html") def parallel_task(url): - # No impersonate argument here + """Main worker function executed in each process.""" + cache_path = cache_path_for_url(url) + + # Use cached file if it exists + if os.path.exists(cache_path): + html = open(cache_path, "r").read() + page_source = BeautifulSoup(html, "html.parser") + estimates = page_source.find_all("div", {"data-testid": "sale-estimate"}) + if estimates: + low, mid, high = extract_estimates(estimates) + return {"URL": url, "Low Estimate": low, "Middle Estimate": mid, "High Estimate": high} + + # Otherwise scrape live with StealthSession() as session: - estimates, is_blocked = scrape_all_estimates(session, url) + attempts = 0 + while attempts < 5: + estimates, is_blocked, html = scrape_all_estimates(session, url) + if not is_blocked and estimates: + open(cache_path, "w").write(html) + low, mid, high = extract_estimates(estimates) + return {"URL": url, "Low Estimate": low, "Middle Estimate": mid, "High Estimate": high} + attempts += 1 + print(f"[Attempt {attempts}] Blocked or empty for {url}") + random_delay() - while is_blocked: - print(f"Blocked by Zoopla for URL: {url}") - time.sleep(random.uniform(0, 1)) - estimates, is_blocked = scrape_all_estimates(session, url) - - low_estimate = estimates[0].find("span", {"data-testid": "low-estimate-blurred"}).text - middle_estimate = estimates[0].find("p", {"data-testid": "estimate-blurred"}).text - high_estimate = estimates[0].find("span", {"data-testid": "high-estimate-blurred"}).text - - return { - "URL": url, - "Low Estimate": low_estimate, - "Middle Estimate": middle_estimate, - "High Estimate": high_estimate, - } + # If still blocked, return placeholders + return {"URL": url, "Low Estimate": None, "Middle Estimate": None, "High Estimate": None} def parse_price(p): + if p is None: + return None + p = p.replace("£", "").strip().lower() + if not p: + return None if p.endswith("k"): - return float(p[:-1]) * 1000 + return float(p[:-1]) * 1_000 elif p.endswith("m"): return float(p[:-1]) * 1_000_000 else: - return float(p) - - -# def parallel_task(url): -# with StealthSession(impersonate=ENGINES[random.randint(0, 1)]) as session: -# estimates, is_blocked = scrape_all_estimates(session, url) -# -# while is_blocked: -# # Will need to wait and retry if blocked by Zoopla -# print(f"Blocked by Zoopla for URL: {url}") -# sleep_factor = random.uniform(0, 1) # Random delay to avoid detection -# time.sleep(sleep_factor * 1) -# estimates, is_blocked = scrape_all_estimates(session, url) -# -# low_estimate = ( -# estimates[0].find("span", {"data-testid": "low-estimate-blurred"}).text -# ) # Find all span elements with data-testid="low-estimate" -# middle_estimate = ( -# estimates[0].find("p", {"data-testid": "estimate-blurred"}).text -# ) # Find all span elements with data-testid="middle-estimate" -# high_estimate = ( -# estimates[0].find("span", {"data-testid": "high-estimate-blurred"}).text -# ) # Find all span elements with data-testid="high-estimate-blurred" -# -# return { -# "URL": url, -# "Low Estimate": low_estimate, -# "Middle Estimate": middle_estimate, -# "High Estimate": high_estimate, -# } + try: + return float(p.replace(",", "")) + except ValueError: + return None if __name__ == "__main__": - # Get a SAL + # Load portfolio asset_list = pd.read_excel( - "/Users/khalimconn-kowlessar/Documents/hestia/Customers/NRLA/Property Box/Property Box Finance Portfolio - " - "Standardised.xlsx", + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/sfr/October 2025 AL portfolio/22.10 AL Portfolio - " + "Standardised - partial UPRN fill.xlsx", sheet_name="Standardised Asset List" ) + asset_list = asset_list[~pd.isnull(asset_list["epc_os_uprn"])] asset_list["epc_os_uprn"] = asset_list["epc_os_uprn"].astype(int).astype(str) uprns = asset_list["epc_os_uprn"].tolist() urls = [f"https://www.zoopla.co.uk/property/uprn/{uprn}/" for uprn in uprns] - with Pool(processes=5) as pool: + # Limit concurrency to avoid blocks + with Pool(processes=2) as pool: # fewer processes = fewer fingerprints estimates_list = list( - tqdm( - pool.imap(parallel_task, urls), - total=len(urls), - ) + tqdm(pool.imap(parallel_task, urls), total=len(urls)) ) df = pd.DataFrame(estimates_list) - # Extract UPRN from URL df["uprn"] = df["URL"].str.extract(r"uprn/(\d+)/") df["valuation"] = df["Middle Estimate"].apply(parse_price) + df.to_csv("zoopla_estimates.csv", index=False) - df["uprn"] = df["uprn"].astype(int).astype(str) - - asset_list.merge(df[["uprn", "valuation"]], left_on="epc_os_uprn", right_on="uprn", how="left").to_excel( - "Property Box Finance Portfolio - Standardised - with valuations.xlsx", index=False + # Merge with asset list + merged = asset_list.merge( + df[["uprn", "valuation"]], + left_on="epc_os_uprn", + right_on="uprn", + how="left" ) + merged.to_excel( + "20251029 AL Portfolio - Standardised - with valuations.xlsx", + index=False + ) + + print("Done. Results saved.") diff --git a/recommendations/optimiser/funding_optimiser.py b/recommendations/optimiser/funding_optimiser.py index 4da08587..5acdd5fd 100644 --- a/recommendations/optimiser/funding_optimiser.py +++ b/recommendations/optimiser/funding_optimiser.py @@ -427,8 +427,9 @@ def optimise_with_funding_paths(p, input_measures, housing_type, funding: Fundin solutions["meets_upgrade_target"] = solutions["total_gain"] >= target_gain - 0.1 # If we have packages that are fundable, but do not meet the upgrade target, we can run a final optimisation pass - if not solutions[solutions["is_eligible"] & ~solutions["meets_upgrade_target"]].empty: - logger.info("We have some packages that are fundable but do not meet the target gain") + # Turned off logging - too noisy + # if not solutions[solutions["is_eligible"] & ~solutions["meets_upgrade_target"]].empty: + # logger.info("We have some packages that are fundable but do not meet the target gain") # We now can calculate the project ABS, which subtracts from the cost, but this is only relevant for ECO4 solutions["starting_sap"] = p.data["current-energy-efficiency"] From 23eb26527c30709ad1f552a989b1a6748e7f2d79 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 30 Oct 2025 18:41:55 +0000 Subject: [PATCH 026/202] got backend working with eco plan data for one property --- .idea/Model.iml | 2 +- .idea/misc.xml | 2 +- backend/Property.py | 6 +- .../app/db/functions/inspections_functions.py | 214 ++++++++++++++++++ .../app/db/functions/property_functions.py | 30 ++- backend/app/db/models/inspections.py | 123 +++++++++- backend/app/db/models/portfolio.py | 1 + backend/app/db/models/recommendations.py | 18 ++ backend/app/plan/data_classes.py | 10 + backend/app/plan/schemas.py | 4 +- backend/app/plan/utils.py | 173 +++++++++++++- backend/engine/engine.py | 194 +++++----------- etl/epc/Record.py | 2 +- .../optimiser/funding_optimiser.py | 6 +- .../optimiser/optimiser_functions.py | 12 +- 15 files changed, 638 insertions(+), 159 deletions(-) create mode 100644 backend/app/db/functions/inspections_functions.py create mode 100644 backend/app/plan/data_classes.py diff --git a/.idea/Model.iml b/.idea/Model.iml index 09f2e496..c6561970 100644 --- a/.idea/Model.iml +++ b/.idea/Model.iml @@ -7,7 +7,7 @@ - + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml index fb10c6b0..50cad4ca 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -3,7 +3,7 @@ - + diff --git a/backend/Property.py b/backend/Property.py index bd968e9f..f320f066 100644 --- a/backend/Property.py +++ b/backend/Property.py @@ -65,6 +65,7 @@ class Property: # Surplus information, that can be provided as optional inputs, by a customer n_bathrooms = None n_bedrooms = None + landlord_property_id = None # unique reference for the property as recognised by the landlord building_id = None # Used to group properties together into a single building # Contains the solar panel optimisation results from the Google Solar API @@ -265,8 +266,9 @@ class Property: "number_of_floors": number_of_floors, "insulation_floor_area": insulation_floor_area, "insulation_wall_area": insulation_wall_area, - "building_id": kwargs.get("building_id", None), - "floor_area": floor_area + "building_id": kwargs.get("building_id", kwargs.get("landlord_block_reference", None)), + "floor_area": floor_area, + "landlord_property_id": kwargs.get("landlord_property_id"), } def parse_kwargs(self, kwargs): diff --git a/backend/app/db/functions/inspections_functions.py b/backend/app/db/functions/inspections_functions.py new file mode 100644 index 00000000..d66154cb --- /dev/null +++ b/backend/app/db/functions/inspections_functions.py @@ -0,0 +1,214 @@ +import re +from dataclasses import dataclass, asdict +from typing import Optional, Dict, Any, Type, TypeVar +from sqlalchemy.orm import Session +from datetime import timezone + +from enum import Enum +from datetime import datetime, timedelta +import math +import pytz +import enum + +from backend.app.db.models.inspections import ( + InspectionModel, + InspectionArchetype, + InspectionArchetype2, + InspectionsWallConstruction, + InspectionsWallInsulation, + InspectionsInsulationMaterial, + InspectionBorescoped, + InspectionsRoofOrientation, + InspectionsTileHung, + InspectionsRendered, + InspectionsCladding, + InspectionsAccessIssues, +) +from sqlalchemy.dialects.postgresql import insert + +NON_INTRUSIVE_PREFIX = "non-intrusives:" + + +@dataclass +class InspectionData: + archetype: Optional[InspectionArchetype] = None + archetype_2: Optional[InspectionArchetype2] = None + wall_construction: Optional[InspectionsWallConstruction] = None + insulation: Optional[InspectionsWallInsulation] = None + insulation_material: Optional[InspectionsInsulationMaterial] = None + borescoped: Optional[InspectionBorescoped] = None + roof_orientation: Optional[InspectionsRoofOrientation] = None + tile_hung: Optional[InspectionsTileHung] = None + rendered: Optional[InspectionsRendered] = None + cladding: Optional[InspectionsCladding] = None + access_issues: Optional[InspectionsAccessIssues] = None + date: Optional[datetime] = None # Reflects the date when the survey was actually conducted + notes: Optional[str] = None + surveyor_name: Optional[str] = None + + +def _clean_string(value: Any) -> Optional[str]: + """Normalize strings for enum matching, tolerant of NaN/None.""" + if value is None: + return None + if isinstance(value, float) and math.isnan(value): + return None + if not isinstance(value, str): + return None + + v = ( + value.strip() + .lower() + .replace("“", '"') + .replace("”", '"') + .replace("’", "'") + ) + return re.sub(r"\s+", " ", v) + + +E = TypeVar("E", bound=Enum) + + +def _match_enum(value: Any, enum_cls: Type[E]) -> Optional[E]: + """Case-insensitive fuzzy matching for enums, tolerant of NaN/None.""" + v = _clean_string(value) + if not v: + return None + + for e in enum_cls: + if v == e.value.lower(): + return e + + for e in enum_cls: + if v in e.value.lower() or e.value.lower() in v: + return e + + return None + + +def _lower_key_dict(d: dict) -> dict: + """Convert all keys to lowercase for case-insensitive lookup.""" + return {str(k).lower(): v for k, v in d.items() if isinstance(k, str)} + + +def extract_inspection_data(config: Dict[str, Any]) -> Optional[InspectionData]: + """Extract and map inspection data from a config row.""" + config_lower = _lower_key_dict(config) + + non_intrusive_fields = { + k: v for k, v in config_lower.items() + if k.startswith(NON_INTRUSIVE_PREFIX) + } + + if not non_intrusive_fields: + return None + + data = InspectionData() + + data.archetype = _match_enum( + config_lower.get("non-intrusives: archetype"), InspectionArchetype + ) + data.archetype_2 = _match_enum( + config_lower.get("non-intrusives: archetype 2"), InspectionArchetype2 + ) + data.wall_construction = _match_enum( + config_lower.get("non-intrusives: construction"), InspectionsWallConstruction + ) + data.insulation = _match_enum( + config_lower.get("non-intrusives: insulated"), InspectionsWallInsulation + ) + data.insulation_material = _match_enum( + config_lower.get("non-intrusives: material"), InspectionsInsulationMaterial + ) + data.borescoped = _match_enum( + config_lower.get("non-intrusives: boroscoped?"), InspectionBorescoped + ) + data.roof_orientation = _match_enum( + config_lower.get("non-intrusives: roof orientation"), InspectionsRoofOrientation + ) + data.tile_hung = _match_enum( + config_lower.get("non-intrusives: tile hung"), InspectionsTileHung + ) + data.rendered = _match_enum( + config_lower.get("non-intrusives: rendered"), InspectionsRendered + ) + data.cladding = _match_enum( + config_lower.get("non-intrusives: cladding"), InspectionsCladding + ) + data.access_issues = _match_enum( + config_lower.get("non-intrusives: access issues"), InspectionsAccessIssues + ) + + data.date = config_lower.get("non-intrusives: date") + data.notes = config_lower.get("non-intrusives: further surveyor notes") + # convert surveyor name to title case if present + data.surveyor_name = config_lower.get("non-intrusives: name of surveyor").title() if config_lower.get( + "non-intrusives: name of surveyor") else None + + return data + + +def bulk_upsert_inspections_pg(session: Session, inspections_map): + """ + Bulk insert/update inspection records: + - 'created_at' = actual survey date + - 'uploaded_at' = time of upload or update + - If an inspection exists for the same property on the same date → overwrite + - Otherwise → insert a new record + """ + + if not inspections_map: + return + + now = datetime.now(pytz.utc) + + for property_id, data in inspections_map.items(): + # Extract survey date from the data + record = asdict(data) + survey_date = getattr(data, "survey_date", None) or record.get("survey_date") + + if not survey_date: + continue # skip if no survey date available + + # Convert to UTC datetime if needed + if hasattr(survey_date, "to_pydatetime"): + survey_date = survey_date.to_pydatetime() + if survey_date.tzinfo is None: + survey_date = survey_date.replace(tzinfo=pytz.utc) + + record["property_id"] = property_id + record["created_at"] = survey_date + record["uploaded_at"] = now + + # Normalize enums and NaNs + for key, value in record.items(): + if isinstance(value, enum.Enum): + record[key] = value.value + elif isinstance(value, float) and math.isnan(value): + record[key] = None + + # Find existing inspection *for same property on same day* + start_of_day = survey_date.replace(hour=0, minute=0, second=0, microsecond=0) + end_of_day = start_of_day + timedelta(days=1) + + existing_inspection = ( + session.query(InspectionModel) + .filter( + InspectionModel.property_id == property_id, + InspectionModel.created_at >= start_of_day, + InspectionModel.created_at < end_of_day, + ) + .first() + ) + + if existing_inspection: + # Overwrite existing record (same survey day) + for field, value in record.items(): + setattr(existing_inspection, field, value) + existing_inspection.uploaded_at = now + else: + # Create new inspection for new day + new_inspection = InspectionModel(**record) + session.add(new_inspection) + + session.flush() diff --git a/backend/app/db/functions/property_functions.py b/backend/app/db/functions/property_functions.py index b17d8e53..fc49d205 100644 --- a/backend/app/db/functions/property_functions.py +++ b/backend/app/db/functions/property_functions.py @@ -12,7 +12,7 @@ from sqlalchemy.orm.exc import NoResultFound def create_property(session: Session, portfolio_id: int, address: str, postcode: str, uprn: str, - energy_assessment: dict) -> (int, bool): + energy_assessment: dict, landlord_property_id: str | None = None) -> (int, bool): """ This function will create a record for the property in the database if it does not exist. If it does exist, it will just update the updated_at field. @@ -20,6 +20,9 @@ def create_property(session: Session, portfolio_id: int, address: str, postcode: :param portfolio_id: The ID of the portfolio the property belongs to :param address: The address of the property :param postcode: The postcode of the property + :param uprn: The UPRN of the property + :param energy_assessment: The energy assessment data for the property + :param landlord_property_id: The landlord property ID if available :return: The ID of the property and a boolean indicating whether it was created or not """ @@ -49,6 +52,7 @@ def create_property(session: Session, portfolio_id: int, address: str, postcode: postcode=postcode, portfolio_id=portfolio_id, uprn=uprn, + landlord_property_id=landlord_property_id, creation_status=PropertyCreationStatus.LOADING, status=status, has_pre_condition_report=False, @@ -63,6 +67,30 @@ def create_property(session: Session, portfolio_id: int, address: str, postcode: return new_property.id, True +def ensure_property_exists(session, body, epc_searcher, energy_assessment, landlord_property_id=None): + """ + Wrapper funtion which checks if a property is new and will return the roperty type if not + :param session: + :param body: + :param epc_searcher: + :param energy_assessment: + :param landlord_property_id: + :return: + """ + property_id, is_new = create_property( + session=session, + portfolio_id=body.portfolio_id, + address=epc_searcher.address_clean, + postcode=epc_searcher.postcode_clean, + uprn=epc_searcher.uprn, + energy_assessment=energy_assessment, + landlord_property_id=str(landlord_property_id) if landlord_property_id is not None else None + ) + if not is_new and not body.multi_plan: + return None, False + return property_id, is_new + + def create_property_targets( session: Session, property_id: int, portfolio_id: int, epc_target=None, heat_demand_target=None ): diff --git a/backend/app/db/models/inspections.py b/backend/app/db/models/inspections.py index c9925a2a..473f8a02 100644 --- a/backend/app/db/models/inspections.py +++ b/backend/app/db/models/inspections.py @@ -10,6 +10,7 @@ from sqlalchemy import ( ForeignKey, ) from sqlalchemy.ext.declarative import declarative_base +from backend.app.db.models.portfolio import PropertyModel Base = declarative_base() @@ -138,19 +139,117 @@ class InspectionModel(Base): __tablename__ = "inspections" id = Column(BigInteger, primary_key=True, autoincrement=True) - property_id = Column(BigInteger, ForeignKey("property.id"), nullable=False) + property_id = Column(BigInteger, ForeignKey(PropertyModel.id), nullable=False) - archetype = Column(Enum(InspectionArchetype), nullable=True) - archetype_2 = Column(Enum(InspectionArchetype2), nullable=True) - wall_construction = Column(Enum(InspectionsWallConstruction), nullable=True) - insulation = Column(Enum(InspectionsWallInsulation), nullable=True) - insulation_material = Column(Enum(InspectionsInsulationMaterial), nullable=True) - borescoped = Column(Enum(InspectionBorescoped), nullable=True) - roof_orientation = Column(Enum(InspectionsRoofOrientation), nullable=True) - tile_hung = Column(Enum(InspectionsTileHung), nullable=True) - rendered = Column(Enum(InspectionsRendered), nullable=True) - cladding = Column(Enum(InspectionsCladding), nullable=True) - access_issues = Column(Enum(InspectionsAccessIssues), nullable=True) + archetype = Column( + Enum( + InspectionArchetype, + name="inspection_archetype", + values_callable=lambda e: [m.value for m in e], + create_type=False, + ), + nullable=True, + ) + + archetype_2 = Column( + Enum( + InspectionArchetype2, + name="inspection_archetype_2", + values_callable=lambda e: [m.value for m in e], + create_type=False, + ), + nullable=True, + ) + + wall_construction = Column( + Enum( + InspectionsWallConstruction, + name="inspections_wall_construction", + values_callable=lambda e: [m.value for m in e], + create_type=False, + ), + nullable=True, + ) + + insulation = Column( + Enum( + InspectionsWallInsulation, + name="inspections_wall_insulation", + values_callable=lambda e: [m.value for m in e], + create_type=False, + ), + nullable=True, + ) + + insulation_material = Column( + Enum( + InspectionsInsulationMaterial, + name="inspections_insulation_material", + values_callable=lambda e: [m.value for m in e], + create_type=False, + ), + nullable=True, + ) + + borescoped = Column( + Enum( + InspectionBorescoped, + name="inspection_borescoped", + values_callable=lambda e: [m.value for m in e], + create_type=False, + ), + nullable=True, + ) + + roof_orientation = Column( + Enum( + InspectionsRoofOrientation, + name="inspections_roof_orientation", + values_callable=lambda e: [m.value for m in e], + create_type=False, + ), + nullable=True, + ) + + tile_hung = Column( + Enum( + InspectionsTileHung, + name="inspections_tile_hung", + values_callable=lambda e: [m.value for m in e], + create_type=False, + ), + nullable=True, + ) + + rendered = Column( + Enum( + InspectionsRendered, + name="inspections_rendered", + values_callable=lambda e: [m.value for m in e], + create_type=False, + ), + nullable=True, + ) + + cladding = Column( + Enum( + InspectionsCladding, + name="inspections_cladding", + values_callable=lambda e: [m.value for m in e], + create_type=False, + ), + nullable=True, + ) + + access_issues = Column( + Enum( + InspectionsAccessIssues, + name="inspections_access_issues", + values_callable=lambda e: [m.value for m in e], + create_type=False, + ), + nullable=True, + ) notes = Column(Text) surveyor_name = Column(Text) diff --git a/backend/app/db/models/portfolio.py b/backend/app/db/models/portfolio.py index 5f51cf46..953e7b3d 100644 --- a/backend/app/db/models/portfolio.py +++ b/backend/app/db/models/portfolio.py @@ -86,6 +86,7 @@ class PropertyModel(Base): portfolio_id = Column(Integer, ForeignKey('portfolio.id'), nullable=False) creation_status = Column(Enum(PropertyCreationStatus), nullable=False) uprn = Column(Integer) + landlord_property_id = Column(Text) building_reference_number = Column(Integer) status = Column(Enum(PortfolioStatus, values_callable=lambda x: [e.value for e in x]), nullable=False) address = Column(Text) diff --git a/backend/app/db/models/recommendations.py b/backend/app/db/models/recommendations.py index bd5c4e20..2b7bf7c7 100644 --- a/backend/app/db/models/recommendations.py +++ b/backend/app/db/models/recommendations.py @@ -4,6 +4,7 @@ from sqlalchemy.sql import func from backend.app.db.models.portfolio import Portfolio, PropertyModel from backend.app.db.models.materials import Material from datatypes.enums import QuantityUnits +import enum Base = declarative_base() @@ -47,6 +48,14 @@ class RecommendationMaterials(Base): estimated_cost = Column(Float, nullable=False) +class PlanTypeEnum(enum.Enum): + SOLAR_ECO4 = "solar_eco4" + SOLAR_HHRSH_ECO4 = "solar_hhrsh_eco4" + EMPTY_CAVITY_ECO = "empty_cavity_eco" + PARTIAL_CAVITY_ECO = "partial_cavity_eco" + EXTRACTION_ECO = "extraction_eco" + + class Plan(Base): __tablename__ = 'plan' @@ -60,6 +69,15 @@ class Plan(Base): valuation_increase_lower_bound = Column(Float) valuation_increase_upper_bound = Column(Float) valuation_increase_average = Column(Float) + plan_type = Column( + Enum( + PlanTypeEnum, + name="plan_type", + values_callable=lambda e: [m.value for m in e], + create_type=False, + ), + nullable=True, + ) class PlanRecommendations(Base): diff --git a/backend/app/plan/data_classes.py b/backend/app/plan/data_classes.py new file mode 100644 index 00000000..5314aab0 --- /dev/null +++ b/backend/app/plan/data_classes.py @@ -0,0 +1,10 @@ +from dataclasses import dataclass +from typing import Any, Optional + + +@dataclass +class PropertyRequestData: + patch: dict + already_installed: dict + non_invasive_recommendations: dict + valuation: Optional[float] diff --git a/backend/app/plan/schemas.py b/backend/app/plan/schemas.py index feff11fd..6fac54ad 100644 --- a/backend/app/plan/schemas.py +++ b/backend/app/plan/schemas.py @@ -55,7 +55,7 @@ MEASURE_MAP = { VALID_GOALS = ["Increasing EPC", "Energy Savings", "Reducing CO2 emissions"] VALID_HOUSING_TYPES = ["Social", "Private"] -VALID_EVENT_TYPES = ["remote_assessment"] +VALID_EVENT_TYPES = ["remote_assessment", "eco_project"] # Define the validation function for inclusions/exclusions @@ -113,7 +113,7 @@ class PlanTriggerRequest(BaseModel): # When performing a remote assessment, if this has been set, it will allow the engine to # pull data from the find my epc website, to utilise as part of a remote assessment - event_type: Optional[Literal["remote_assessment"]] = None + event_type: Optional[Literal["remote_assessment", "eco_project"]] = None # If true, before optimising the engine will select a slightly larger package, to account for the SAP 10 causing # scores to drop by a few points diff --git a/backend/app/plan/utils.py b/backend/app/plan/utils.py index 34fb02e7..fe995935 100644 --- a/backend/app/plan/utils.py +++ b/backend/app/plan/utils.py @@ -1,7 +1,8 @@ -from utils.s3 import read_from_s3 - -from backend.app.config import get_settings import msgpack +from utils.s3 import read_from_s3 +from backend.app.config import get_settings +from backend.app.plan.data_classes import PropertyRequestData +from typing import Any def get_cleaned(): @@ -21,3 +22,169 @@ def get_cleaned(): cleaned = msgpack.unpackb(cleaned, raw=False) return cleaned + + +def patch_epc(patch, epc_records): + """ + This utility function is useful to patch the epc data if we have data from the customer + :return: + """ + + for patch_variable, patch_value in patch.items(): + + if patch_variable in ["address", "postcode"]: + continue + + if patch_value == "": + continue + if patch_variable in epc_records["original_epc"]: + epc_records["original_epc"][patch_variable] = patch_value + + return epc_records + + +def extract_property_request_data( + config, patches, already_installed, non_invasive_recommendations, valuation_data, uprn +): + patch_has_uprn = "uprn" in patches[0] if patches else True + if patch_has_uprn: + patch = next(( + x for x in patches if str(x["uprn"]) == str(config["uprn"]) + ), {}) + else: + patch = next(( + x for x in patches if (x["address"] == config["address"]) and (x["postcode"] == config["postcode"]) + ), {}) + + property_already_installed = next(( + x for x in already_installed if + (x["address"] == config["address"]) and (x["postcode"] == config["postcode"]) + ), {}) + + # Because we have some non-invasive recommendations that match on address and postcode, but not UPRN + # we need to check existence of uprn + has_uprn = "uprn" in non_invasive_recommendations[0] if non_invasive_recommendations else False + if has_uprn: + has_uprn = non_invasive_recommendations[0]["uprn"] not in ["", None] + + if has_uprn: + property_non_invasive_recommendations = next(( + x for x in non_invasive_recommendations if + (str(x["uprn"]) == str(uprn)) + ), {}) + + # We patch the non-invasive recs that are ['cavity_extract_and_refill'] + else: + property_non_invasive_recommendations = next(( + x for x in non_invasive_recommendations if + (x["address"] == config["address"]) and (x["postcode"] == config["postcode"]) + ), {}) + + if isinstance(property_non_invasive_recommendations.get("recommendations"), str): + property_non_invasive_recommendations["recommendations"] = ast.literal_eval( + property_non_invasive_recommendations["recommendations"] + ) + transformed = [] + for rec in property_non_invasive_recommendations["recommendations"]: + if isinstance(rec, str): + transformed.append({"type": rec, }) + else: + transformed.append(rec) + + property_non_invasive_recommendations["recommendations"] = transformed + + # Check if the valuation data has uprn + valuation_has_uprn = "uprn" in valuation_data[0] if valuation_data else False + if valuation_has_uprn: + valuation_has_uprn = valuation_data[0]["uprn"] not in ["", None] + + if valuation_has_uprn: + property_valuation = next(( + float(x["valuation"]) for x in valuation_data if + (str(x["uprn"]) == str(uprn)) + ), None) + else: + property_valuation = next(( + float(x["valuation"]) for x in valuation_data if + (x["address"] == config["address"]) and (x["postcode"] == config["postcode"]) + ), None) + + # Return data class to give a structured format + return PropertyRequestData( + patch=patch, + already_installed=property_already_installed, + non_invasive_recommendations=property_non_invasive_recommendations, + valuation=property_valuation + ) + + +def parse_eco_packages(config: dict[str, Any]) -> tuple[list[str], int, str] | tuple[None, None, None]: + solar_identification = config.get("solar_reason", None) + cavity_identification = config.get("cavity_reason", None) + if not solar_identification and not cavity_identification: + return None, None, None + + # We map the categories to the desired measures and upgrade targets + # We note that the categories are placeholder until we move the standardised asset list + + identification_map = { + "Solar Eligible": { + "measures": ["solar_pv", "loft_insulation", "mechanical_ventilation"], + "target_sap": 86, # High B + "plan_type": "solar_eco4" + }, + "Solar Eligible, Solid Wall Uninsulated, EPC E or Below": { + "measures": ["solar_pv", "loft_insulation", "mechanical_ventilation"], + "target_sap": 86, # High B + "plan_type": "solar_eco4" + }, + "Solar Eligible, Needs Heating Upgrade": { + "measures": ["solar_pv", "loft_insulation", "high_heat_retention_storage_heater"], + "target_sap": 86, # High B + "plan_type": "solar_hhrsh_eco4" + }, + "Non-Intrusive Data Shows Empty Cavity": { + "measures": ["cavity_wall_insulation", "mechanical_ventilation"], + "target_sap": 69, # Low C + "plan_type": "empty_cavity_eco" + }, + 'Non-Intrusive Data Shows Empty Cavity, built after 2002': { + "measures": ["cavity_wall_insulation", "mechanical_ventilation"], + "target_sap": 69, # Low C + "plan_type": "empty_cavity_eco" + }, + "EPC Shows Empty Cavity, inspections show retro drilled": { + # EPC Indicates it's empty, so we simulate a fill + "measures": ["cavity_wall_insulation", "mechanical_ventilation"], + "target_sap": 69, # Low C + "plan_type": "extraction_eco" + }, + "EPC Shows Empty Cavity, inspections show filled at build": { + # EPC Indicates it's empty, so we simulate a fill + "measures": ["cavity_wall_insulation", "mechanical_ventilation"], + "target_sap": 69, # Low C + "plan_type": "extraction_eco" + }, + "EPC Shows Empty Cavity": { + # EPC Indicates it's empty, so we simulate a fill + "measures": ["cavity_wall_insulation", "mechanical_ventilation"], + "target_sap": 69, # Low C + "plan_type": "empty_cavity_eco" + } + } + + # Always prioritise solar + if solar_identification: + _key = solar_identification.split(":")[0] + else: + _key = cavity_identification.split(":")[0] + + mapped = identification_map[_key] + return mapped["measures"], mapped["target_sap"], mapped["plan_type"] + + +def handle_error(session, msg, status=500): + # When the pipeline fails, handles error process + logger.error(msg, exc_info=True) + session.rollback() + return Response(status_code=status, content=msg) diff --git a/backend/engine/engine.py b/backend/engine/engine.py index f4152852..0cb9d860 100644 --- a/backend/engine/engine.py +++ b/backend/engine/engine.py @@ -17,8 +17,8 @@ from backend.app.db.connection import db_engine from backend.app.db.functions.materials_functions import get_materials from backend.app.db.functions.portfolio_functions import aggregate_portfolio_recommendations from backend.app.db.functions.property_functions import ( - create_property, create_property_details_epc, create_property_targets, update_property_data, - update_or_create_property_spatial_details + create_property_details_epc, create_property_targets, update_property_data, + update_or_create_property_spatial_details, ensure_property_exists ) from backend.app.db.functions.recommendations_functions import ( create_plan, upload_recommendations, create_scenario @@ -27,9 +27,14 @@ from backend.app.db.functions.funding_functions import upload_funding from backend.app.db.functions.energy_assessment_functions import get_latest_assessment_by_uprn from backend.app.db.models.portfolio import rating_lookup from backend.app.plan.schemas import PlanTriggerRequest, WALL_INSULATION_MEASURES, ROOF_INSULATION_MEASURES -from backend.app.plan.utils import get_cleaned +from backend.app.plan.utils import ( + get_cleaned, patch_epc, extract_property_request_data, parse_eco_packages, handle_error +) from backend.app.utils import sap_to_epc import backend.app.assumptions as assumptions +from backend.app.db.functions.inspections_functions import ( + extract_inspection_data, bulk_upsert_inspections_pg +) from backend.ml_models.api import ModelApi from backend.Property import Property @@ -57,25 +62,6 @@ BATCH_SIZE = 5 SCORING_BATCH_SIZE = 100 -def patch_epc(patch, epc_records): - """ - This utility function is useful to patch the epc data if we have data from the customer - :return: - """ - - for patch_variable, patch_value in patch.items(): - - if patch_variable in ["address", "postcode"]: - continue - - if patch_value == "": - continue - if patch_variable in epc_records["original_epc"]: - epc_records["original_epc"][patch_variable] = patch_value - - return epc_records - - def extract_portfolio_aggregation_data( input_properties, total_valuation_increase, recommendations, new_epc_bands, property_value_increase_ranges ): @@ -349,75 +335,6 @@ def get_request_property_data(body: PlanTriggerRequest): return patches, already_installed, non_invasive_recommendations, valuation_data -def extract_property_request_data( - config, patches, already_installed, non_invasive_recommendations, valuation_data, uprn -): - patch_has_uprn = "uprn" in patches[0] if patches else True - if patch_has_uprn: - patch = next(( - x for x in patches if str(x["uprn"]) == str(config["uprn"]) - ), {}) - else: - patch = next(( - x for x in patches if (x["address"] == config["address"]) and (x["postcode"] == config["postcode"]) - ), {}) - - property_already_installed = next(( - x for x in already_installed if - (x["address"] == config["address"]) and (x["postcode"] == config["postcode"]) - ), {}) - - # Because we have some non-invasive recommendations that match on address and postcode, but not UPRN - # we need to check existence of uprn - has_uprn = "uprn" in non_invasive_recommendations[0] if non_invasive_recommendations else False - if has_uprn: - has_uprn = non_invasive_recommendations[0]["uprn"] not in ["", None] - - if has_uprn: - property_non_invasive_recommendations = next(( - x for x in non_invasive_recommendations if - (str(x["uprn"]) == str(uprn)) - ), {}) - - # We patch the non-invasive recs that are ['cavity_extract_and_refill'] - else: - property_non_invasive_recommendations = next(( - x for x in non_invasive_recommendations if - (x["address"] == config["address"]) and (x["postcode"] == config["postcode"]) - ), {}) - - if isinstance(property_non_invasive_recommendations.get("recommendations"), str): - property_non_invasive_recommendations["recommendations"] = ast.literal_eval( - property_non_invasive_recommendations["recommendations"] - ) - transformed = [] - for rec in property_non_invasive_recommendations["recommendations"]: - if isinstance(rec, str): - transformed.append({"type": rec, }) - else: - transformed.append(rec) - - property_non_invasive_recommendations["recommendations"] = transformed - - # Check if the valuation data has uprn - valuation_has_uprn = "uprn" in valuation_data[0] if valuation_data else False - if valuation_has_uprn: - valuation_has_uprn = valuation_data[0]["uprn"] not in ["", None] - - if valuation_has_uprn: - property_valution = next(( - float(x["valuation"]) for x in valuation_data if - (str(x["uprn"]) == str(uprn)) - ), None) - else: - property_valution = next(( - float(x["valuation"]) for x in valuation_data if - (x["address"] == config["address"]) and (x["postcode"] == config["postcode"]) - ), None) - - return patch, property_already_installed, property_non_invasive_recommendations, property_valution - - def get_funding_data(): """ This function retrieves the eco project scores matrix and the warm homes local grant funding data @@ -564,7 +481,7 @@ async def model_engine(body: PlanTriggerRequest): bucket_name=get_settings().DATA_BUCKET, file_key="sap_change_model/cleaning_dataset.parquet", ) - input_properties = [] + input_properties, inspections_map, eco_packages = [], {}, {} for config in tqdm(plan_input): # We validate each record in the file. If the record is NOT valid, we need to handle this accordingly @@ -601,15 +518,12 @@ async def model_engine(body: PlanTriggerRequest): # We check for an energy assessment we have performed on this property: energy_assessment = get_latest_assessment_by_uprn(session, uprn if uprn is not None else epc_searcher.uprn) - # Create a record in db - property_id, is_new = create_property( - session=session, - portfolio_id=body.portfolio_id, - address=epc_searcher.address_clean, - postcode=epc_searcher.postcode_clean, - uprn=epc_searcher.uprn, - energy_assessment=energy_assessment + property_id, is_new = ensure_property_exists( + session, body, epc_searcher, energy_assessment, landlord_property_id=config.get("landlord_property_id") ) + if not property_id: + continue + if not is_new and not body.multi_plan: continue @@ -636,16 +550,17 @@ async def model_engine(body: PlanTriggerRequest): epc_searcher, energy_assessment ) - patch, property_already_installed, property_non_invasive_recommendations, property_valuation = ( - extract_property_request_data( - config=config, - patches=patches, - already_installed=already_installed, - non_invasive_recommendations=non_invasive_recommendations, - valuation_data=valuation_data, - uprn=epc_searcher.uprn, - ) + req_data = extract_property_request_data( + config=config, + patches=patches, + already_installed=already_installed, + non_invasive_recommendations=non_invasive_recommendations, + valuation_data=valuation_data, + uprn=epc_searcher.uprn, ) + # Pull this out as it may get overwritten + property_non_invasive_recommendations = req_data.non_invasive_recommendations + patch = req_data.patch # if we have a remote assment data type, we pull the additional data and include it if (body.event_type == "remote_assessment") and not (epc_searcher.newest_epc.get("estimated")): @@ -679,17 +594,31 @@ async def model_engine(body: PlanTriggerRequest): address=epc_searcher.address_clean, postcode=epc_searcher.postcode_clean, epc_record=prepared_epc, - already_installed=property_already_installed, - property_valuation=property_valuation, + already_installed=req_data.already_installed, + property_valuation=req_data.valuation, non_invasive_recommendations=property_non_invasive_recommendations, energy_assessment=energy_assessment, **Property.extract_kwargs(config), # TODO: Depraecate this ) ) + # If we have an ECO project, we parse the cavity/solar reasons + eco_packages[property_id] = parse_eco_packages(config) + + # Final step - extract inspections data, if we have it + property_inspections = extract_inspection_data(config) + if property_inspections: + inspections_map[property_id] = property_inspections + if not input_properties: return Response(status_code=204) + # We check if we have inspections data and store it in the database if so. We'll update or create + # aginst each property if + if inspections_map: + logger.info("Inserting inspections data") + bulk_upsert_inspections_pg(session, inspections_map) + # Set up model api and warm up the lambdas model_api = ModelApi( portfolio_id=body.portfolio_id, @@ -766,11 +695,20 @@ async def model_engine(body: PlanTriggerRequest): recommendations_scoring_data = [] representative_recommendations = {} for p in tqdm(input_properties): + # We set the ECO package data, if we have it + property_eco_package = eco_packages.get(p.id, (None, None, None)) + if property_eco_package[0] is not None: + inclusions = property_eco_package[0] + exclusions = [] + else: + inclusions = body.inclusions + exclusions = body.exclusions + recommender = Recommendations( property_instance=p, materials=materials, - exclusions=body.exclusions, - inclusions=body.inclusions, + exclusions=exclusions, + inclusions=inclusions, default_u_values=body.default_u_values ) property_recommendations, property_representative_recommendations = recommender.recommend() @@ -788,7 +726,6 @@ async def model_engine(body: PlanTriggerRequest): recommendations_scoring_data.extend(p.recommendations_scoring_data) - # TODO: Make sure that number_habitable_rooms has been dropped logger.info("Preparing data for scoring in sap change api") recommendations_scoring_data = pd.DataFrame(recommendations_scoring_data) @@ -878,16 +815,16 @@ async def model_engine(body: PlanTriggerRequest): fixed_gain = optimiser_functions.calculate_fixed_gain( property_required_measures, recommendations, p, needs_ventilation ) - gain = optimiser_functions.calculate_gain(body=body, p=p, fixed_gain=fixed_gain) + gain = optimiser_functions.calculate_gain(body=body, p=p, fixed_gain=fixed_gain, eco_packages=eco_packages) funding = Funding( tenure=body.housing_type, project_scores_matrix=project_scores_matrix, partial_project_scores_matrix=partial_project_scores_matrix, whlg_eligible_postcodes=whlg_eligible_postcodes, - eco4_social_cavity_abs_rate=12.5, + eco4_social_cavity_abs_rate=13, eco4_social_solid_abs_rate=17, - eco4_private_cavity_abs_rate=12.5, + eco4_private_cavity_abs_rate=13, eco4_private_solid_abs_rate=17, gbis_social_cavity_abs_rate=21, gbis_social_solid_abs_rate=25, @@ -1025,8 +962,8 @@ async def model_engine(body: PlanTriggerRequest): funding.check_funding( measures=solution, - starting_sap=p.data["current-energy-efficiency"], - ending_sap=p.data["current-energy-efficiency"] + sum([x["gain"] for x in solution]), + starting_sap=int(p.data["current-energy-efficiency"]), + ending_sap=int(p.data["current-energy-efficiency"]) + sum([x["gain"] for x in solution]), floor_area=p.floor_area, mainheat_description=p.main_heating["clean_description"], heating_control_description=p.main_heating_controls["clean_description"], @@ -1193,6 +1130,7 @@ async def model_engine(body: PlanTriggerRequest): "valuation_increase_average": ( valuations["average_increased_value"] - valuations["current_value"] ), + "plan_type": eco_packages.get(p.id, (None, None, None))[2] }) upload_recommendations( @@ -1212,7 +1150,7 @@ async def model_engine(body: PlanTriggerRequest): except Exception as e: # Rollback the session if an error occurs session.rollback() - print("Failed i = %s" % str(i)) + logger.warning("Failed i = %s" % str(i)) logger.error(f"An error occurred during batch starting at index {i}: {e}") logger.error(f"property is uprn {p.uprn} id {p.id} address {p.address}") @@ -1251,21 +1189,13 @@ async def model_engine(body: PlanTriggerRequest): session.commit() except IntegrityError: - logger.error("Database integrity error occurred", exc_info=True) - session.rollback() - return Response(status_code=500, content="Database integrity error.") + return handle_error(session, "Database integrity error.", 500) except OperationalError: - logger.error("Database operational error occurred", exc_info=True) - session.rollback() - return Response(status_code=500, content="Database operational error.") + return handle_error(session, "Database operational error.", 500) except ValueError: - logger.error("Value error - possibly due to malformed data", exc_info=True) - session.rollback() - return Response(status_code=400, content="Bad request: malformed data.") + return handle_error(session, "Bad request: malformed data.", 400) except Exception as e: # General exception handling - logger.error(f"An error occurred: {e}") - session.rollback() - return Response(status_code=500, content="An unexpected error occurred.") + return handle_error(session, "An unexpected error occurred.", 500) finally: session.close() diff --git a/etl/epc/Record.py b/etl/epc/Record.py index d0816034..b1b8d975 100644 --- a/etl/epc/Record.py +++ b/etl/epc/Record.py @@ -380,7 +380,7 @@ class EPCRecord: df.columns = [x.upper().replace("-", "_") for x in df.columns] if replace_empty_string: - df = df.replace("", np.nan) + df = df.replace("", np.nan).infer_objects(copy=False) return df diff --git a/recommendations/optimiser/funding_optimiser.py b/recommendations/optimiser/funding_optimiser.py index 5acdd5fd..5e945b56 100644 --- a/recommendations/optimiser/funding_optimiser.py +++ b/recommendations/optimiser/funding_optimiser.py @@ -416,7 +416,9 @@ def optimise_with_funding_paths(p, input_measures, housing_type, funding: Fundin "total_gain": total_gain, "path": path_spec, "scheme": scheme, - "is_eligible": _is_eligible_funding_package(scheme, p.data["current-energy-efficiency"], total_gain), + "is_eligible": _is_eligible_funding_package( + scheme, int(p.data["current-energy-efficiency"]), total_gain + ), "unfunded_items": unfunded_picked, }) @@ -432,7 +434,7 @@ def optimise_with_funding_paths(p, input_measures, housing_type, funding: Fundin # logger.info("We have some packages that are fundable but do not meet the target gain") # We now can calculate the project ABS, which subtracts from the cost, but this is only relevant for ECO4 - solutions["starting_sap"] = p.data["current-energy-efficiency"] + solutions["starting_sap"] = int(p.data["current-energy-efficiency"]) solutions["floor_area"] = p.floor_area solutions["ending_sap"] = solutions["starting_sap"] + solutions["total_gain"] solutions["starting_band"] = solutions["starting_sap"].apply(funding.get_sap_band) diff --git a/recommendations/optimiser/optimiser_functions.py b/recommendations/optimiser/optimiser_functions.py index 98725138..3a839dff 100644 --- a/recommendations/optimiser/optimiser_functions.py +++ b/recommendations/optimiser/optimiser_functions.py @@ -176,7 +176,8 @@ def calculate_fixed_gain(property_required_measures, recommendations, p, needs_v return fixed_gain -def calculate_gain(body: PlanTriggerRequest, p: Property, fixed_gain: float) -> float | None: +def calculate_gain(body: PlanTriggerRequest, p: Property, fixed_gain: float, + eco_packages: None | dict = None) -> float | None: """ Calculates the target gain value for optimisation based on the goal. @@ -193,6 +194,7 @@ def calculate_gain(body: PlanTriggerRequest, p: Property, fixed_gain: float) -> Property object with EPC data (must have p.data["current-energy-efficiency"]). fixed_gain : float Total fixed gain from required measures (returned by calculate_fixed_gain). + eco_packages : dict, optional Returns ------- @@ -201,8 +203,14 @@ def calculate_gain(body: PlanTriggerRequest, p: Property, fixed_gain: float) -> """ if body.goal == "Increasing EPC": current_sap = int(p.data["current-energy-efficiency"]) + + target_sap = ( + eco_packages.get(p.id)[1] if eco_packages.get(p.id)[1] is not None + else epc_to_sap_lower_bound(body.goal_value) + ) + gain = CostOptimiser.calculate_sap_gain_with_slack( - epc_to_sap_lower_bound(body.goal_value) - current_sap + target_sap - current_sap ) - fixed_gain if body.simulate_sap_10: gain += 3 From 92fcbe8cdb86b58be282cb0c97ae8bb183307954 Mon Sep 17 00:00:00 2001 From: Michael Duong Date: Sat, 1 Nov 2025 15:34:45 +0000 Subject: [PATCH 027/202] amend etl code for new october data --- etl/epc/DataProcessor.py | 11 +- etl/epc/Pipeline.py | 10 ++ etl/epc/property_change_app.py | 2 +- .../epc_attributes/FloorAttributes.py | 59 ++++--- .../epc_attributes/RoofAttributes.py | 89 ++++++++--- .../epc_attributes/WallAttributes.py | 146 ++++++++++-------- recommendations/rdsap_tables.py | 102 ++++++++++-- recommendations/recommendation_utils.py | 4 +- 8 files changed, 295 insertions(+), 128 deletions(-) diff --git a/etl/epc/DataProcessor.py b/etl/epc/DataProcessor.py index 41dca943..682e9e78 100644 --- a/etl/epc/DataProcessor.py +++ b/etl/epc/DataProcessor.py @@ -21,7 +21,7 @@ from etl.epc.settings import ( ENDING_SUFFIX_COMPONENT_COLS, POTENTIAL_COLUMNS, EFFICIENCY_FEATURES, - DATA_ANOMALY_MATCHES + DATA_ANOMALY_MATCHES, ) from recommendations.rdsap_tables import FLOOR_LEVEL_MAP @@ -249,7 +249,8 @@ class EPCDataProcessor: # Map all anomaly values to None data_anomaly_map = dict( zip( - DATA_ANOMALY_MATCHES, [None] * len(DATA_ANOMALY_MATCHES), + DATA_ANOMALY_MATCHES, + [None] * len(DATA_ANOMALY_MATCHES), ) ) @@ -749,6 +750,12 @@ class EPCDataProcessor: self.data = self.data[~pd.isnull(self.data["HOTWATER_DESCRIPTION"])] self.data = self.data[~pd.isnull(self.data["ROOF_DESCRIPTION"])] + # Remove any walls described as Basement walls since these are non-standard + # TODO: CHECK IF WE SHOULD MAP THESE U VALUES INSTEAD + index_to_remove = self.data["WALLS_DESCRIPTION"] == "Basement wall" + print(f"Removing {index_to_remove.sum()} records with basement walls") + self.data = self.data[~index_to_remove] + # Because park homes are surveyed unusually (for example, we don't have u-values to # look up for their different components, they need to be collected in survey and aren't reflected in # EPCs) we'll ignore them from the model diff --git a/etl/epc/Pipeline.py b/etl/epc/Pipeline.py index c03abfcf..9f427c59 100644 --- a/etl/epc/Pipeline.py +++ b/etl/epc/Pipeline.py @@ -112,6 +112,16 @@ clean_lookup["mainheatcont-description"] = new_mainheatcont_mapping.to_dict( orient="records" ) +# TEMP FIX - GRANITE OR WHINSTONE BOOLEAN ISSUE +new_walls_description_mapping = pd.DataFrame(clean_lookup["walls-description"]) +new_walls_description_mapping.loc[ + new_walls_description_mapping["original_description"].str.contains("Granite"), + "is_granite_or_whinstone", +] = True +clean_lookup["walls-description"] = new_walls_description_mapping.to_dict( + orient="records" +) + class EPCPipeline: """ diff --git a/etl/epc/property_change_app.py b/etl/epc/property_change_app.py index c985567d..cdb7cfb8 100644 --- a/etl/epc/property_change_app.py +++ b/etl/epc/property_change_app.py @@ -12,7 +12,7 @@ def main(): """ directories = [entry for entry in DATA_DIRECTORY.iterdir() if entry.is_dir()] - # directories = directories[0:3] + # directories = directories[235:275] epc_pipeline = EPCPipeline( directories=directories, diff --git a/etl/epc_clean/epc_attributes/FloorAttributes.py b/etl/epc_clean/epc_attributes/FloorAttributes.py index 6def93f0..23c7dd8e 100644 --- a/etl/epc_clean/epc_attributes/FloorAttributes.py +++ b/etl/epc_clean/epc_attributes/FloorAttributes.py @@ -1,17 +1,26 @@ import re from typing import Dict, Union from BaseUtility import Definitions -from etl.epc_clean.epc_attributes.attribute_utils import extract_thermal_transmittance, extract_component_types +from etl.epc_clean.epc_attributes.attribute_utils import ( + extract_thermal_transmittance, + extract_component_types, +) class FloorAttributes(Definitions): DWELLING_BELOW = ["another dwelling below", "other premises below"] - FLOOR_TYPES = ["assumed", "to unheated space", "to external air", "suspended", "solid"] + FLOOR_TYPES = [ + "assumed", + "to unheated space", + "to external air", + "suspended", + "solid", + ] # For the short term, while we are still exploring the data, we maintain a list of error cases which # we want to ignore and consider as no data. - OBSERVED_ERRORS = ["Conservatory", "insulated"] + OBSERVED_ERRORS = ["Conservatory", "insulated", "Basement"] WELSH_TEXT = { "(anheddiad arall islaw)": "(another dwelling below)", @@ -35,32 +44,40 @@ class FloorAttributes(Definitions): "i ofod heb ei wresogi, heb ei inswleiddio (rhagdybiaeth)": "to unheated space, no insulation (assumed)", "i ofod heb ei wresogi, dim inswleiddio": "to unheated space, no insulation", "igçör awyr y tu allan, wedigçöi inswleiddio (rhagdybiaeth)": "to external air, insulated (assumed)", - "crog, inswleiddio cyfyngedig (rhagdybiaeth)": "suspended, limited insulation (assumed)" + "crog, inswleiddio cyfyngedig (rhagdybiaeth)": "suspended, limited insulation (assumed)", } def __init__(self, description: str): self.description: str = description.lower() - self.nodata = (not description) or (description in self.DATA_ANOMALY_MATCHES) or ( - description in self.OBSERVED_ERRORS) or (self.description == "sap05:floor") + self.nodata = ( + (not description) + or (description in self.DATA_ANOMALY_MATCHES) + or (description in self.OBSERVED_ERRORS) + or (self.description == "sap05:floor") + ) # Try and perform a translation, incase it's in welsh self.translate_welsh_text() if not self.nodata and not any( - rt in self.description for rt in - self.FLOOR_TYPES + self.DWELLING_BELOW + ["average thermal transmittance"] + rt in self.description + for rt in self.FLOOR_TYPES + + self.DWELLING_BELOW + + ["average thermal transmittance"] ): - raise ValueError('Invalid description') + raise ValueError("Invalid description") def translate_welsh_text(self): uvalue_match = re.search( - r'trawsyriannedd thermol cyfartalog (\d+(\.\d+)?)\s*w/m-¦k', self.description + r"trawsyriannedd thermol cyfartalog (\d+(\.\d+)?)\s*w/m-¦k", + self.description, ) uvalue_match2 = re.search( - r'trawsyriannedd thermol cyfartalog (\d+(\.\d+)?)\s*w/m.+k', self.description + r"trawsyriannedd thermol cyfartalog (\d+(\.\d+)?)\s*w/m.+k", + self.description, ) # Step 2: Generalized translation with placeholder @@ -69,7 +86,7 @@ class FloorAttributes(Definitions): uvalue = uvalue_match.group(1) else: uvalue = uvalue_match2.group(1) - self.description = f'average thermal transmittance {uvalue} w/m-¦k' + self.description = f"average thermal transmittance {uvalue} w/m-¦k" else: translation = self.WELSH_TEXT.get(self.description) @@ -89,11 +106,15 @@ class FloorAttributes(Definitions): result, description = extract_thermal_transmittance(result, description) # floor type - result, description = extract_component_types(result, description, list_of_components=self.FLOOR_TYPES) + result, description = extract_component_types( + result, description, list_of_components=self.FLOOR_TYPES + ) # check if there is another dwelling below - result['another_property_below'] = "(another dwelling below)" in description or "(other premises below)" in \ - description + result["another_property_below"] = ( + "(another dwelling below)" in description + or "(other premises below)" in description + ) thickness_map = { "external insulation": "average", @@ -102,17 +123,17 @@ class FloorAttributes(Definitions): "partial insulation": "below average", "no insulation": "none", "additional insulation": "above average", - "insulated": "average" + "insulated": "average", } for key, value in thickness_map.items(): if key in description: - result['insulation_thickness'] = value + result["insulation_thickness"] = value break else: - result['insulation_thickness'] = None + result["insulation_thickness"] = None if result["another_property_below"]: result["thermal_transmittance"] = 0 - result["thermal_transmittance_unit"] = 'w/m-¦k' + result["thermal_transmittance_unit"] = "w/m-¦k" return result diff --git a/etl/epc_clean/epc_attributes/RoofAttributes.py b/etl/epc_clean/epc_attributes/RoofAttributes.py index 2eacc951..153fb548 100644 --- a/etl/epc_clean/epc_attributes/RoofAttributes.py +++ b/etl/epc_clean/epc_attributes/RoofAttributes.py @@ -1,12 +1,28 @@ import re from typing import Dict, Union from BaseUtility import Definitions -from etl.epc_clean.epc_attributes.attribute_utils import extract_component_types, extract_thermal_transmittance +from etl.epc_clean.epc_attributes.attribute_utils import ( + extract_component_types, + extract_thermal_transmittance, +) class RoofAttributes(Definitions): - ROOF_TYPES = ['pitched', 'roof room', 'loft', 'flat', 'thatched', 'at rafters', 'assumed'] - DWELLING_ABOVE = ["another dwelling above", "other premises above", "other dwelling above"] + ROOF_TYPES = [ + "pitched", + "roof room", + "loft", + "flat", + "thatched", + "at rafters", + "assumed", + ] + DWELLING_ABOVE = [ + "another dwelling above", + "other premises above", + "other dwelling above", + "(same dwelling above)", + ] WELSH_TEXT = { "ar oleddf, dim inswleiddio": "pitched, no insulation", @@ -18,10 +34,10 @@ class RoofAttributes(Definitions): "ar oleddf, wedi?i inswleiddio": "pitched, insulated", "ar oleddf, inswleiddio cyfyngedig (rhagdybiaeth)": "pitched, limited insulation (assumed)", "ar oleddf, inswleiddio cyfyngedig": "pitched, limited insulation", - "ar oleddf, wedigçöi inswleiddio wrth y trawstiau": 'pitched, insulated at rafters', - "ar oleddf, wedi?i inswleiddio wrth y trawstiau": 'pitched, insulated at rafters', - "ar oleddf, wedi?i inswleiddio wrth y trawstia": 'pitched, insulated at rafters', - "ar oleddf, wedigçöi inswleiddio wrth y trawstia": 'pitched, insulated at rafters', + "ar oleddf, wedigçöi inswleiddio wrth y trawstiau": "pitched, insulated at rafters", + "ar oleddf, wedi?i inswleiddio wrth y trawstiau": "pitched, insulated at rafters", + "ar oleddf, wedi?i inswleiddio wrth y trawstia": "pitched, insulated at rafters", + "ar oleddf, wedigçöi inswleiddio wrth y trawstia": "pitched, insulated at rafters", "yn wastad, inswleiddio cyfyngedig (rhagdybiaeth)": "flat, limited insulation (assumed)", "yn wastad, inswleiddio cyfyngedig": "flat, limited insulation", "yn wastad, dim inswleiddio (rhagdybiaeth)": "flat, no insulation (assumed)", @@ -43,9 +59,18 @@ class RoofAttributes(Definitions): } DEFAULT_KEYS = [ - 'thermal_transmittance', 'thermal_transmittance_unit', 'is_pitched', 'is_roof_room', - 'is_loft', 'is_flat', 'is_thatched', 'is_at_rafters', 'is_assumed', 'has_dwelling_above', - 'is_valid', 'insulation_thickness' + "thermal_transmittance", + "thermal_transmittance_unit", + "is_pitched", + "is_roof_room", + "is_loft", + "is_flat", + "is_thatched", + "is_at_rafters", + "is_assumed", + "has_dwelling_above", + "is_valid", + "insulation_thickness", ] def __init__(self, description: str): @@ -54,14 +79,21 @@ class RoofAttributes(Definitions): """ self.description: str = description.lower().strip() - self.nodata = not description or description in self.DATA_ANOMALY_MATCHES or self.description == "sap05:roof" + self.nodata = ( + not description + or description in self.DATA_ANOMALY_MATCHES + or self.description == "sap05:roof" + ) self.welsh_translation_search() if not self.nodata and not any( - rt in self.description for rt in self.ROOF_TYPES + self.DWELLING_ABOVE + ["average thermal transmittance"] + rt in self.description + for rt in self.ROOF_TYPES + + self.DWELLING_ABOVE + + ["average thermal transmittance"] ): - raise ValueError('Invalid description') + raise ValueError("Invalid description") def welsh_translation_search(self): """ @@ -76,7 +108,7 @@ class RoofAttributes(Definitions): r"ar oleddf, (\d+ mm) lo inswleiddio yn y llof", r"ar oleddf, (\d+\+ mm) lo inswleiddio yn y llof", r"ar oleddf, (\d+mm) o inswleiddio yn y llofft", - r"ar oleddf, (\d+\+ mm) o inswleiddio yn y llofft" + r"ar oleddf, (\d+\+ mm) o inswleiddio yn y llofft", ] li_thickness_match = None for regex in loft_insulation_regexes: @@ -84,9 +116,14 @@ class RoofAttributes(Definitions): if li_thickness_match: break - uvalue_search = re.search(r"trawsyriannedd thermol cyfartalog (\d+(\.\d+)?)\s*w/m-¦k", self.description) + uvalue_search = re.search( + r"trawsyriannedd thermol cyfartalog (\d+(\.\d+)?)\s*w/m-¦k", + self.description, + ) uvalue_search2 = re.search( - r'trawsyriannedd thermol cyfartalog (\d+(\.\d+)?)\s*w/m.+k', self.description, re.IGNORECASE + r"trawsyriannedd thermol cyfartalog (\d+(\.\d+)?)\s*w/m.+k", + self.description, + re.IGNORECASE, ) # Step 2: Generalized translation with placeholder @@ -121,9 +158,13 @@ class RoofAttributes(Definitions): result, description = extract_thermal_transmittance(result, description) # roof type - result, description = extract_component_types(result, description, list_of_components=self.ROOF_TYPES) + result, description = extract_component_types( + result, description, list_of_components=self.ROOF_TYPES + ) - result["has_dwelling_above"] = any([x in description for x in self.DWELLING_ABOVE]) + result["has_dwelling_above"] = any( + [x in description for x in self.DWELLING_ABOVE] + ) for dwelling_above in self.DWELLING_ABOVE: description = description.replace(dwelling_above, "") @@ -136,7 +177,7 @@ class RoofAttributes(Definitions): # Search for a regular expression that matches 150 insulation match = re.search(r"(\d+\+?)\s*insulation", description) if match: - result['insulation_thickness'] = match.group(1) + result["insulation_thickness"] = match.group(1) # insulation thickness thickness_map = { @@ -149,21 +190,21 @@ class RoofAttributes(Definitions): } for key, value in thickness_map.items(): if key in description: - result['insulation_thickness'] = value + result["insulation_thickness"] = value # Remove the match from the description # description = description.replace(key, "") break # Extract insulation thickness in mm, if present - match = re.search(r'(\d+\+?)\s*mm', description) + match = re.search(r"(\d+\+?)\s*mm", description) if match: - result['insulation_thickness'] = match.group(1) + result["insulation_thickness"] = match.group(1) if "insulation_thickness" not in result: - result['insulation_thickness'] = None + result["insulation_thickness"] = None if result["has_dwelling_above"]: result["thermal_transmittance"] = 0 - result["thermal_transmittance_unit"] = 'w/m-¦k' + result["thermal_transmittance_unit"] = "w/m-¦k" return result diff --git a/etl/epc_clean/epc_attributes/WallAttributes.py b/etl/epc_clean/epc_attributes/WallAttributes.py index 49252552..8cf32a0b 100644 --- a/etl/epc_clean/epc_attributes/WallAttributes.py +++ b/etl/epc_clean/epc_attributes/WallAttributes.py @@ -3,76 +3,78 @@ from typing import Dict, Union from BaseUtility import Definitions from etl.epc_clean.epc_attributes.attribute_utils import ( extract_component_types, - extract_thermal_transmittance + extract_thermal_transmittance, ) class WallAttributes(Definitions): - WALL_TYPES = ['cavity wall', 'filled cavity', 'solid brick', 'system built', 'timber frame', 'granite or whinstone', - 'as built', 'cob', 'assumed', 'sandstone or limestone', "park home"] + WALL_TYPES = [ + "cavity wall", + "filled cavity", + "solid brick", + "system built", + "timber frame", + "granite or whinstone", + "as built", + "cob", + "assumed", + "sandstone or limestone", + "park home", + ] WELSH_TEXT = { - "Briciau solet, fel yGÇÖu hadeiladwyd, dim inswleiddio (rhagdybiaeth)": - "Solid brick, as built, no insulation (assumed)", - 'Waliau ceudod, fel yGÇÖu hadeiladwyd, inswleiddio rhannol (rhagdybiaeth)': - 'Cavity wall, as built, partial insulation (assumed)', - 'Waliau ceudod, fel yGÇÖu hadeiladwyd, inswleiddio rhannol': - 'Cavity wall, as built, partial insulation', - 'Waliau ceudod, fel yGÇÖu hadeiladwyd, dim inswleiddio (rhagdybiaeth)': - 'Cavity wall, as built, no insulation (assumed)', - 'Waliau ceudod, fel yGÇÖu hadeiladwyd, dim inswleiddio': - 'Cavity wall, as built, no insulation', - 'Tywodfaen, fel yGÇÖu hadeiladwyd, dim inswleiddio (rhagdybiaeth)': - 'Sandstone or limestone, as built, no insulation (assumed)', - 'Tywodfaen, fel yGÇÖu hadeiladwyd, dim inswleiddio': - 'Sandstone or limestone, as built, no insulation', - 'Waliau ceudod, ceudod wediGÇÖi lenwi': 'Cavity wall, filled cavity', - 'Waliau ceudod, fel yGÇÖu hadeiladwyd, wediGÇÖu hinswleiddio (rhagdybiaeth)': - 'Cavity wall, as built, insulated (assumed)', - 'Waliau ceudod, fel yGÇÖu hadeiladwyd, wediGÇÖu hinswleiddio': - 'Cavity wall, as built, insulated', - 'Gwenithfaen neu risgraig, fel yGÇÖu hadeiladwyd, dim inswleiddio (rhagdybiaeth)': - 'Granite or whinstone, as built, no insulation (assumed)', - 'Waliau ceudod,': 'Cavity wall, as built, no insulation', - 'Ffr+óm bren, fel yGÇÖu hadeiladwyd, wediGÇÖu hinswleiddio (rhagdybiaeth)': - 'Timber frame, as built, insulated (assumed)', - 'Ffr+óm bren, fel yGÇÖu hadeiladwyd, wediGÇÖu hinswleiddio': - 'Timber frame, as built, insulated', - 'Gwenithfaen neu risgraig, gydag inswleiddio allanol': 'Granite or whinstone, with external insulation', - 'WediGÇÖu hadeiladu yn +¦l system, fel yGÇÖu hadeiladwyd, dim inswleiddio (rhagdybiaeth)': - 'System built, as built, no insulation (assumed)', - 'Tywodfaen, gydag inswleiddio mewnol': 'Sandstone or limestone, with internal insulation', - 'Waliau ceudod, ynysydd allanol a llenwi ceudod': 'Cavity wall, filled cavity and external insulation', - 'Gwenithfaen neu risgraig, gydag inswleiddio mewnol': 'Granite or whinstone, with internal insulation', - 'Ffr+óm bren, fel yGÇÖu hadeiladwyd, inswleiddio rhannol (rhagdybiaeth)': - 'Timber frame, as built, partial insulation (assumed)', - 'WediGÇÖu hadeiladu yn +¦l system, fel yGÇÖu hadeiladwyd, wediGÇÖu hinswleiddio (rhagdybiaeth)': - 'System built, as built, insulated (assumed)', - 'WediGÇÖu hadeiladu yn +¦l system, fel yGÇÖu hadeiladwyd, wediGÇÖu hinswleiddio': - 'System built, as built, insulated', - 'WediGÇÖu hadeiladu yn +¦l system, gydag inswleiddio allanol': 'System built, with external insulation', - 'Briciau solet, gydag inswleiddio mewnol': 'Solid brick, with internal insulation', - 'WediGÇÖu hadeiladu yn +¦l system, fel yGÇÖu hadeiladwyd, inswleiddio rhannol (rhagdybiaeth)': - 'System built, as built, partial insulation (assumed)', - 'WediGÇÖu hadeiladu yn +¦l system, fel yGÇÖu hadeiladwyd, inswleiddio rhannol': - 'System built, as built, partial insulation', - 'Ffr+óm bren, fel yGÇÖu hadeiladwyd, dim inswleiddio (rhagdybiaeth)': - 'Timber frame, as built, no insulation (assumed)', - 'Ffr+óm bren, fel yGÇÖu hadeiladwyd, dim inswleiddio': - 'Timber frame, as built, no insulation', - 'Tywodfaen, gydag inswleiddio allanol': 'Sandstone or limestone, with external insulation', - 'Waliau ceudod, gydag inswleiddio allanol': 'Cavity wall, with external insulation', - 'Briciau solet, gydag inswleiddio allanol': 'Solid brick, with external insulation', + "Briciau solet, fel yGÇÖu hadeiladwyd, dim inswleiddio (rhagdybiaeth)": "Solid brick, as built, no insulation (assumed)", + "Waliau ceudod, fel yGÇÖu hadeiladwyd, inswleiddio rhannol (rhagdybiaeth)": "Cavity wall, as built, partial insulation (assumed)", + "Waliau ceudod, fel yGÇÖu hadeiladwyd, inswleiddio rhannol": "Cavity wall, as built, partial insulation", + "Waliau ceudod, fel yGÇÖu hadeiladwyd, dim inswleiddio (rhagdybiaeth)": "Cavity wall, as built, no insulation (assumed)", + "Waliau ceudod, fel yGÇÖu hadeiladwyd, dim inswleiddio": "Cavity wall, as built, no insulation", + "Tywodfaen, fel yGÇÖu hadeiladwyd, dim inswleiddio (rhagdybiaeth)": "Sandstone or limestone, as built, no insulation (assumed)", + "Tywodfaen, fel yGÇÖu hadeiladwyd, dim inswleiddio": "Sandstone or limestone, as built, no insulation", + "Waliau ceudod, ceudod wediGÇÖi lenwi": "Cavity wall, filled cavity", + "Waliau ceudod, fel yGÇÖu hadeiladwyd, wediGÇÖu hinswleiddio (rhagdybiaeth)": "Cavity wall, as built, insulated (assumed)", + "Waliau ceudod, fel yGÇÖu hadeiladwyd, wediGÇÖu hinswleiddio": "Cavity wall, as built, insulated", + "Gwenithfaen neu risgraig, fel yGÇÖu hadeiladwyd, dim inswleiddio (rhagdybiaeth)": "Granite or whinstone, as built, no insulation (assumed)", + "Waliau ceudod,": "Cavity wall, as built, no insulation", + "Ffr+óm bren, fel yGÇÖu hadeiladwyd, wediGÇÖu hinswleiddio (rhagdybiaeth)": "Timber frame, as built, insulated (assumed)", + "Ffr+óm bren, fel yGÇÖu hadeiladwyd, wediGÇÖu hinswleiddio": "Timber frame, as built, insulated", + "Gwenithfaen neu risgraig, gydag inswleiddio allanol": "Granite or whinstone, with external insulation", + "WediGÇÖu hadeiladu yn +¦l system, fel yGÇÖu hadeiladwyd, dim inswleiddio (rhagdybiaeth)": "System built, as built, no insulation (assumed)", + "Tywodfaen, gydag inswleiddio mewnol": "Sandstone or limestone, with internal insulation", + "Waliau ceudod, ynysydd allanol a llenwi ceudod": "Cavity wall, filled cavity and external insulation", + "Gwenithfaen neu risgraig, gydag inswleiddio mewnol": "Granite or whinstone, with internal insulation", + "Ffr+óm bren, fel yGÇÖu hadeiladwyd, inswleiddio rhannol (rhagdybiaeth)": "Timber frame, as built, partial insulation (assumed)", + "WediGÇÖu hadeiladu yn +¦l system, fel yGÇÖu hadeiladwyd, wediGÇÖu hinswleiddio (rhagdybiaeth)": "System built, as built, insulated (assumed)", + "WediGÇÖu hadeiladu yn +¦l system, fel yGÇÖu hadeiladwyd, wediGÇÖu hinswleiddio": "System built, as built, insulated", + "WediGÇÖu hadeiladu yn +¦l system, gydag inswleiddio allanol": "System built, with external insulation", + "Briciau solet, gydag inswleiddio mewnol": "Solid brick, with internal insulation", + "WediGÇÖu hadeiladu yn +¦l system, fel yGÇÖu hadeiladwyd, inswleiddio rhannol (rhagdybiaeth)": "System built, as built, partial insulation (assumed)", + "WediGÇÖu hadeiladu yn +¦l system, fel yGÇÖu hadeiladwyd, inswleiddio rhannol": "System built, as built, partial insulation", + "Ffr+óm bren, fel yGÇÖu hadeiladwyd, dim inswleiddio (rhagdybiaeth)": "Timber frame, as built, no insulation (assumed)", + "Ffr+óm bren, fel yGÇÖu hadeiladwyd, dim inswleiddio": "Timber frame, as built, no insulation", + "Tywodfaen, gydag inswleiddio allanol": "Sandstone or limestone, with external insulation", + "Waliau ceudod, gydag inswleiddio allanol": "Cavity wall, with external insulation", + "Briciau solet, gydag inswleiddio allanol": "Solid brick, with external insulation", # Add in some corrections: - 'Co with external insulation': 'Cob, with external insulation', - 'Cowith external insulation': 'Cob, with external insulation', + "Co with external insulation": "Cob, with external insulation", + "Cowith external insulation": "Cob, with external insulation", } DEFAULT_KEYS = [ - 'thermal_transmittance', 'thermal_transmittance_unit', 'is_cavity_wall', 'is_filled_cavity', - 'is_solid_brick', 'is_system_built', 'is_timber_frame', 'is_granite_or_whinstone', - 'is_as_built', 'is_cob', 'is_assumed', 'is_sandstone_or_limestone', - 'insulation_thickness', 'external_insulation', 'internal_insulation' + "thermal_transmittance", + "thermal_transmittance_unit", + "is_cavity_wall", + "is_filled_cavity", + "is_solid_brick", + "is_system_built", + "is_timber_frame", + "is_granite_or_whinstone", + "is_as_built", + "is_cob", + "is_assumed", + "is_sandstone_or_limestone", + "insulation_thickness", + "external_insulation", + "internal_insulation", ] CORRECTIONS = { @@ -98,7 +100,9 @@ class WallAttributes(Definitions): :return: """ - uvalue_search = re.search(r"Trawsyriannedd thermol cyfartalog (\d+\.?\d*)", self.description) + uvalue_search = re.search( + r"Trawsyriannedd thermol cyfartalog (\d+\.?\d*)", self.description + ) if uvalue_search: uvalue = uvalue_search.group(1) @@ -123,7 +127,9 @@ class WallAttributes(Definitions): result, description = extract_thermal_transmittance(result, description) # wall type - result, description = extract_component_types(result, description, list_of_components=self.WALL_TYPES) + result, description = extract_component_types( + result, description, list_of_components=self.WALL_TYPES + ) # Handle some edge cases if "sandstone" in description and not result["is_sandstone_or_limestone"]: @@ -137,18 +143,18 @@ class WallAttributes(Definitions): "partial insulation": "below average", "no insulation": "none", "additional insulation": "above average", - "insulated": "average" + "insulated": "average", } for key, value in thickness_map.items(): if key in description: - result['insulation_thickness'] = value + result["insulation_thickness"] = value break else: - result['insulation_thickness'] = None + result["insulation_thickness"] = None # insulation type - result['external_insulation'] = 'external insulation' in description - result['internal_insulation'] = 'internal insulation' in description + result["external_insulation"] = "external insulation" in description + result["internal_insulation"] = "internal insulation" in description if result["is_filled_cavity"]: # If it has a filled cavity + internal/external insulation, it's deemed to have above average insulation @@ -159,7 +165,11 @@ class WallAttributes(Definitions): else: result["insulation_thickness"] = "average" - if result["is_cavity_wall"] & result["is_as_built"] & (result["insulation_thickness"] == "average"): + if ( + result["is_cavity_wall"] + & result["is_as_built"] + & (result["insulation_thickness"] == "average") + ): result["is_filled_cavity"] = True return result diff --git a/recommendations/rdsap_tables.py b/recommendations/rdsap_tables.py index 14c7f247..46e7d083 100644 --- a/recommendations/rdsap_tables.py +++ b/recommendations/rdsap_tables.py @@ -98,6 +98,13 @@ age_band_data = [ "Northern_Ireland": "2014-2022", "Park_home_UK": None, }, + { + "age_band": "M", + "England_Wales": "2022 onwards", + "Scotland": "2024 onwards", + "Northern_Ireland": "2023 onwards", + "Park_home_UK": None, + }, ] england_wales_age_band_lookup = { @@ -123,6 +130,7 @@ default_wall_thickness = [ "J": 450, "K": 450, "L": 450, + "M": 450, }, { "type": "solid brick", @@ -138,6 +146,7 @@ default_wall_thickness = [ "J": 300, "K": 300, "L": 300, + "M": 300, }, { "type": "cavity", @@ -153,6 +162,7 @@ default_wall_thickness = [ "J": 300, "K": 300, "L": 300, + "M": 300, }, { "type": "timber frame", @@ -168,6 +178,7 @@ default_wall_thickness = [ "J": 300, "K": 300, "L": 300, + "M": 300, }, { "type": "cob", @@ -183,6 +194,7 @@ default_wall_thickness = [ "J": 590, "K": 590, "L": 590, + "M": 590, }, { "type": "system build", @@ -198,6 +210,7 @@ default_wall_thickness = [ "J": 300, "K": 300, "L": 300, + "M": 300, }, { "type": "park home", @@ -213,6 +226,7 @@ default_wall_thickness = [ "J": 100, "K": 100, "L": 100, + "M": 100, }, ] @@ -253,8 +267,36 @@ wall_types = [ ] u_values = [ - ["a", "a", "a", "a", "1.7b", "1.0", "0.6", "0.60", "0.45", "0.35", "0.30", "0.28"], - ["a", "a", "a", "a", "1.7b", "1.0", "0.6", "0.60", "0.45", "0.35", "0.30", "0.28"], + [ + "a", + "a", + "a", + "a", + "1.7b", + "1.0", + "0.6", + "0.60", + "0.45", + "0.35", + "0.30", + "0.28", + "0.26", + ], + [ + "a", + "a", + "a", + "a", + "1.7b", + "1.0", + "0.6", + "0.60", + "0.45", + "0.35", + "0.30", + "0.28", + "0.26", + ], [ "1.7", "1.7", @@ -268,6 +310,7 @@ u_values = [ "0.35", "0.30", "0.28", + "0.26", ], [ "0.55", @@ -282,6 +325,7 @@ u_values = [ "0.25", "0.21", "0.21", + "0.20", ], [ "0.32", @@ -296,6 +340,7 @@ u_values = [ "0.19", "0.17", "0.16", + "0.15", ], [ "0.23", @@ -310,6 +355,7 @@ u_values = [ "0.15", "0.14", "0.14", + "0.13", ], [ "0.18", @@ -324,6 +370,7 @@ u_values = [ "0.13", "0.12", "0.12", + "0.11", ], [ "0.80", @@ -338,6 +385,7 @@ u_values = [ "0.35", "0.30", "0.28", + "0.26", ], [ "0.40", @@ -352,6 +400,7 @@ u_values = [ "0.25", "0.21", "0.21", + "0.20", ], [ "0.26", @@ -366,6 +415,7 @@ u_values = [ "0.19", "0.17", "0.16", + "0.15", ], [ "0.20", @@ -380,6 +430,7 @@ u_values = [ "0.15", "0.14", "0.14", + "0.13", ], [ "0.16", @@ -394,6 +445,7 @@ u_values = [ "0.13", "0.12", "0.12", + "0.11", ], [ "1.5", @@ -408,6 +460,7 @@ u_values = [ "0.35", "0.30", "0.28", + "0.26", ], [ "0.53", @@ -422,6 +475,7 @@ u_values = [ "0.25", "0.21", "0.21", + "0.20", ], [ "0.32", @@ -436,6 +490,7 @@ u_values = [ "0.19", "0.17", "0.16", + "0.15", ], [ "0.23", @@ -450,6 +505,7 @@ u_values = [ "0.15", "0.14", "0.14", + "0.13", ], [ "0.18", @@ -464,6 +520,7 @@ u_values = [ "0.13", "0.12", "0.12", + "0.11", ], [ "0.7", @@ -478,6 +535,7 @@ u_values = [ "0.35", "0.30", "0.28", + "0.26", ], [ "0.37", @@ -492,6 +550,7 @@ u_values = [ "0.25", "0.21", "0.21", + "0.20", ], [ "0.25", @@ -506,6 +565,7 @@ u_values = [ "0.19", "0.17", "0.16", + "0.15", ], [ "0.19", @@ -520,6 +580,7 @@ u_values = [ "0.15", "0.14", "0.14", + "0.13", ], [ "0.16", @@ -534,6 +595,7 @@ u_values = [ "0.13", "0.12", "0.12", + "0.11", ], [ "2.5", @@ -548,6 +610,7 @@ u_values = [ "0.35", "0.30", "0.28", + "0.26", ], [ "0.60", @@ -562,6 +625,7 @@ u_values = [ "0.35", "0.30", "0.28", + "0.26", ], [ "2.0", @@ -576,6 +640,7 @@ u_values = [ "0.35", "0.30", "0.28", + "0.26", ], [ "0.60", @@ -590,6 +655,7 @@ u_values = [ "0.25", "0.21", "0.21", + "0.20", ], [ "0.35", @@ -604,6 +670,7 @@ u_values = [ "0.19", "0.17", "0.16", + "0.15", ], [ "0.25", @@ -618,6 +685,7 @@ u_values = [ "0.15", "0.14", "0.14", + "0.13", ], [ "0.18", @@ -632,10 +700,11 @@ u_values = [ "0.13", "0.12", "0.12", + "0.11", ], ] -age_bands = ["A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L"] +age_bands = ["A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M"] wall_uvalues = [] for i, wall_type in enumerate(wall_types): @@ -645,13 +714,23 @@ for i, wall_type in enumerate(wall_types): wall_uvalues.append(row) parkhome_wall_uvalues = [ - {"Wall_type": "Park home as built", "F": "1.7", "G": "1.2", "I": "0.7", "K": "0.6"}, + { + "Wall_type": "Park home as built", + "F": "1.7", + "G": "1.2", + "I": "0.7", + "K": "0.6", + "L": "0.6", + "M": "0.6", + }, { "Wall_type": "Park home with additional insulation", "F": "s1.1.2", "G": "s1.1.2", "I": "s1.1.2", "K": "s1.1.2", + "L": "s1.1.2", + "M": "s1.1.2", }, ] @@ -713,14 +792,13 @@ epc_wall_description_map = { # Granite/whinstone wall mappings ############################ "Granite or whinstone, as built, no insulation": "Stone: granite or whinstone as built", - "Granite or whinstone, with internal insulation": "Stone/solid brick with 100 mm external or internal " - "insulation", - "Granite or whinstone, as built, partial insulation": "Stone/solid brick with 50 mm external or internal " - "insulation", - "Granite or whinstone, as built, insulated": "Stone/solid brick with 100 mm external or internal " - "insulation", - "Granite or whinstone, with external insulation": "Stone/solid brick with 100 mm external or internal " - "insulation", + "Granite or whinstone, with internal insulation": "Stone/solid brick with 100 mm external or internal insulation", + "Granite or whin, with internal insulation": "Stone/solid brick with 100 mm external or internal insulation", + "Granite or whinstone, as built, partial insulation": "Stone/solid brick with 50 mm external or internal insulation", + "Granite or whinstone, as built, insulated": "Stone/solid brick with 100 mm external or internal insulation", + "Granite or whin, as built, insulated": "Stone/solid brick with 100 mm external or internal insulation", + "Granite or whinstone, with external insulation": "Stone/solid brick with 100 mm external or internal insulation", + "Granite or whin, with external insulation": "Stone/solid brick with 100 mm external or internal insulation", ############################ # System built wall mappings ############################ diff --git a/recommendations/recommendation_utils.py b/recommendations/recommendation_utils.py index 602684cf..7c39668a 100644 --- a/recommendations/recommendation_utils.py +++ b/recommendations/recommendation_utils.py @@ -205,7 +205,7 @@ def get_wall_u_value( mapped_value = wall_uvalues_df[ wall_uvalues_df["Wall_type"] == mapped_description - ][age_band].values[0] + ][age_band].values[0] if pd.isnull(mapped_value) and "Park home" in mapped_description: # We don't know enough in this case so we default to 0 @@ -563,7 +563,7 @@ def get_floor_u_value( insulation_lookup = s11[ s11["Age_band"].str.contains(age_band) & s11["Floor_construction"] == floor_type - ] + ] if insulation_lookup.empty: insulation_thickness = 0 else: From 6aefd1eb3c72be741cdae08df3146623bd2f3c20 Mon Sep 17 00:00:00 2001 From: Michael Duong Date: Sun, 2 Nov 2025 09:44:41 +0000 Subject: [PATCH 028/202] add post sap 10 feature --- etl/epc/DataProcessor.py | 11 +++++++++++ etl/epc/Pipeline.py | 5 ++++- etl/epc/Record.py | 35 ++++++++++++++++++++--------------- etl/epc/settings.py | 5 +++++ 4 files changed, 40 insertions(+), 16 deletions(-) diff --git a/etl/epc/DataProcessor.py b/etl/epc/DataProcessor.py index 682e9e78..5e5d0872 100644 --- a/etl/epc/DataProcessor.py +++ b/etl/epc/DataProcessor.py @@ -4,6 +4,7 @@ import pandas as pd from etl.epc.settings import ( DATA_PROCESSOR_SETTINGS, EARLIEST_EPC_DATE, + POST_SAP10_DATE, # IGNORED_TRANSACTION_TYPES, IGNORED_FLOOR_LEVELS, IGNORED_PROPERTY_TYPES, @@ -159,6 +160,9 @@ class EPCDataProcessor: # colnames=["NUMBER_HABITABLE_ROOMS", "NUMBER_HEATED_ROOMS"], # ) + # Create post sap10 flag + self.create_post_sap10_flag() + # When running in newdata mode, cleaning_averages has lower cases so we co-erce back to upper cleaning_averages = self.cleaning_averages.copy() if self.run_mode == "newdata": @@ -175,6 +179,13 @@ class EPCDataProcessor: self.cast_cleaning_averages_columns_to_lower(ignore_step=ignore_step) self.cast_data_columns_to_lower() + def create_post_sap10_flag(self): + """ + Create a flag to indicate if the epc is post sap10 + """ + + self.data["is_post_sap10"] = self.data["LODGEMENT_DATE"] >= POST_SAP10_DATE + def cast_data_columns_to_lower(self): """ Convert all columns names to lower diff --git a/etl/epc/Pipeline.py b/etl/epc/Pipeline.py index 9f427c59..fac58cd9 100644 --- a/etl/epc/Pipeline.py +++ b/etl/epc/Pipeline.py @@ -23,6 +23,7 @@ from etl.epc.settings import ( POTENTIAL_COLUMNS, ROOM_FEATURES, COST_FEATURES, + POST_SAP10_FEATURE, ) # TODO: change in setting file @@ -325,7 +326,9 @@ class EPCPipeline: # We include the lodgement date here as we probably need to factor time into the # model, since EPC standards and rigour have changed over time - variable_data = property_data[VARIABLE_DATA_FEATURES + COST_FEATURES] + variable_data = property_data[ + VARIABLE_DATA_FEATURES + COST_FEATURES + POST_SAP10_FEATURE + ] uprn = str(uprn) epc_records = [ diff --git a/etl/epc/Record.py b/etl/epc/Record.py index d0816034..7552a0c4 100644 --- a/etl/epc/Record.py +++ b/etl/epc/Record.py @@ -20,6 +20,7 @@ from etl.epc.settings import ( COMPONENT_FEATURES, EFFICIENCY_FEATURES, ROOM_FEATURES, + POST_SAP10_FEATURE, ) from recommendations.recommendation_utils import estimate_number_of_floors from utils.s3 import read_dataframe_from_s3_parquet @@ -89,6 +90,7 @@ class EPCRecord: co2_emissions_current: float = None number_habitable_rooms: float = None number_heated_rooms: float = None + is_post_sap10: bool = None # u_values_walls = None # u_values_roof = None @@ -277,6 +279,7 @@ class EPCRecord: self.number_heated_rooms: float = float( self.prepared_epc["number_heated_rooms"] ) + self.is_post_sap10: bool = bool(self.prepared_epc["is_post_sap10"]) def _identify_delta_between_prepared_and_original_records(self): """ @@ -385,11 +388,11 @@ class EPCRecord: return df def _clean_floor_height(self): - """ Remaps anomalies in floor height to the average floor height for the property type """ + """Remaps anomalies in floor height to the average floor height for the property type""" floor_height_data = self.cleaning_data[ - (self.cleaning_data["property_type"] == self.prepared_epc["property-type"]) & - (self.cleaning_data["built_form"] == self.prepared_epc["built-form"]) - ] + (self.cleaning_data["property_type"] == self.prepared_epc["property-type"]) + & (self.cleaning_data["built_form"] == self.prepared_epc["built-form"]) + ] average = floor_height_data["floor_height"].mean() sd = floor_height_data["floor_height"].std() # If we're in the top 0.5 percentile of floor heights, we'll set it to the average @@ -399,14 +402,16 @@ class EPCRecord: self.prepared_epc["floor-height"] = average def _clean_new_build_descriptions(self): - for col in ['roof-description', 'walls-description', 'floor-description']: + for col in ["roof-description", "walls-description", "floor-description"]: self.prepared_epc[col] = self.prepared_epc[col].replace("W/m²K", "W/m-¦K") def _clean_constituency(self): """ We handle the single case of finding a missing constituency by using the local authority """ - if pd.isnull(self.prepared_epc["constituency"]) or (self.prepared_epc["constituency"] == ""): + if pd.isnull(self.prepared_epc["constituency"]) or ( + self.prepared_epc["constituency"] == "" + ): if self.prepared_epc["local-authority"] != "E06000044": raise NotImplementedError( "This function is only implemented for Portsmouth, in the single edgecase seen" @@ -595,12 +600,12 @@ class EPCRecord: # We handle the edge case of floor area being 0. We set it to zero and it is cleaned by # _clean_with_data_processor - if self.prepared_epc['total-floor-area'] == 0: + if self.prepared_epc["total-floor-area"] == 0: print( "Edge case of floor area being zero - will set to none and will be cleaned in " "_clean_with_data_processor" ) - self.prepared_epc['total-floor-area'] = None + self.prepared_epc["total-floor-area"] = None def _clean_mains_gas(self): """ @@ -609,12 +614,7 @@ class EPCRecord: if not self.prepared_epc: raise ValueError("EPC Recrod doesn not contain epc data") - mains_gas_map = { - "Y": True, - "N": False, - True: True, - False: False - } + mains_gas_map = {"Y": True, "N": False, True: True, False: False} self.prepared_epc["mains-gas-flag"] = ( None @@ -1064,7 +1064,12 @@ class EPCDifferenceRecord: CARBON_RESPONSE ) - component_variables = COMPONENT_FEATURES + EFFICIENCY_FEATURES + ROOM_FEATURES + component_variables = ( + COMPONENT_FEATURES + + EFFICIENCY_FEATURES + + ROOM_FEATURES + + POST_SAP10_FEATURE + ) ending_record = self.record2.get( component_variables + ["lodgement_date"], return_asdict=True, diff --git a/etl/epc/settings.py b/etl/epc/settings.py index ecc56552..47a75def 100644 --- a/etl/epc/settings.py +++ b/etl/epc/settings.py @@ -52,6 +52,9 @@ DATA_ANOMALY_MATCHES = { "Unknown", } +# Add the post_sap10 date to indicate if the epc is post sap10 +POST_SAP10_DATE = "2025-06-22" + DATA_ANOMALY_SUBSTRINGS = { # Where values in a ‘pick’ list that have been superseded by another value. For example, where a value for # ‘pitched roof’ has been replaced by three sub-categories of pitched roof. The original value is retained @@ -184,6 +187,8 @@ EFFICIENCY_FEATURES = [ ROOM_FEATURES = ["number_habitable_rooms", "number_heated_rooms"] +POST_SAP10_FEATURE = ["is_post_sap10"] + COMPONENT_FEATURES = CORE_COMPONENT_FEATURES + [ "TRANSACTION_TYPE", "ENERGY_TARIFF", # Not sure if this is relevant From 76716f35d3c54eec78509fe62e60e3bd8d7eb83e Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 3 Nov 2025 14:44:36 +0000 Subject: [PATCH 029/202] added some basic level of override so we force solar recommendations if we have inspections --- backend/apis/GoogleSolarApi.py | 62 ++++++++++++++++++++++++++++++---- backend/app/plan/utils.py | 2 +- backend/engine/engine.py | 19 ++++++++--- 3 files changed, 72 insertions(+), 11 deletions(-) diff --git a/backend/apis/GoogleSolarApi.py b/backend/apis/GoogleSolarApi.py index a8982061..dcf08fb5 100644 --- a/backend/apis/GoogleSolarApi.py +++ b/backend/apis/GoogleSolarApi.py @@ -479,9 +479,7 @@ class GoogleSolarApi: roi_results = pd.DataFrame(roi_results) - panel_performance = panel_performance.merge( - roi_results, how="left", on="n_panels" - ) + panel_performance = panel_performance.merge(roi_results, how="left", on="n_panels") # We want max roi, minimal generation deficit, and max generation value - we create a ranking score # Assign equal weights to each metric @@ -742,7 +740,7 @@ class GoogleSolarApi: @classmethod def building_solar_analysis( cls, building_solar_config: List, input_properties: List[Property], session, google_solar_api_key: str, - solar_materials: list + solar_materials: list, ): """ Perform the solar analysis for the building level @@ -826,9 +824,21 @@ class GoogleSolarApi: @classmethod def unit_solar_analysis( cls, unit_solar_config: List, input_properties: List[Property], session, body, google_solar_api_key: str, - solar_materials: list + solar_materials: list, inspections_map: dict ): + """ + Perform the solar analysis for the unit level + :param unit_solar_config: List of unit solar configurations + :param input_properties: List of properties + :param session: Database session + :param body: PlanTriggerRequest instance + :param google_solar_api_key: Google Solar API key + :param solar_materials: List of solar materials + :param inspections_map: Dictionary mapping property IDs to inspection data + :return: + """ + if not unit_solar_config: return input_properties @@ -879,6 +889,15 @@ class GoogleSolarApi: property_instance=property_instance, ) + property_inspections = inspections_map.get(property_instance.id, {}) + + if property_inspections: + # If we have some inspections data, we check if we have some data which indicates solar cannot + # be installed. We're loose about this now since this is post review + if solar_api_client.panel_performance.empty: + # We assume solar is a suitable option + solar_api_client.panel_performance = solar_api_client.default_panel_performance(property_instance) + # Store the data in the database solar_api_client.save_to_db( session=session, @@ -923,12 +942,43 @@ class GoogleSolarApi: None ) - if material_1_6 is None or material_3_2 is None: + material_4_35 = next( + (m for m in self.solar_materials if m["type"] == "solar_pv" and + abs(m["size"] - 4.35) < 0.1 and not m["includes_battery"]), + None + ) + + if material_1_6 is None or material_3_2 is None or material_4_35 is None: raise ValueError("No suitable solar product found for the default configuration.") # We return a 1.6 and 3.2 kwp system panel_performance = pd.DataFrame( [ + { + 'n_panels': 10, + 'yearly_dc_energy': 4350 * assumptions.MEDIAN_WATTAGE_TO_DC, + 'total_cost': cost_instance.solar_pv( + solar_product=material_4_35, + scaffolding_options=[ + {"total_cost": 1000, "size": property_instance.number_of_floors}, + {"total_cost": 1000, "size": 3} + ], + n_floors=property_instance.number_of_floors + )["total"], + 'weighted_ratio': None, + 'panneled_roof_area': 9 * assumptions.RDSAP_AREA_PER_PANEL, + 'array_wattage': 4350, + 'initial_ac_kwh_per_year': 4350 * assumptions.MEDIAN_WATTAGE_TO_AC, + 'lifetime_ac_kwh': None, + 'lifetime_dc_kwh': None, + 'roi': None, + 'generation_value': None, + 'generation_deficit': None, + 'expected_payback_years': None, + 'surplus': None, + 'combined_score': None, + 'rank': None + }, { 'n_panels': 8, 'yearly_dc_energy': 3200 * assumptions.MEDIAN_WATTAGE_TO_DC, diff --git a/backend/app/plan/utils.py b/backend/app/plan/utils.py index fe995935..4ebb41f8 100644 --- a/backend/app/plan/utils.py +++ b/backend/app/plan/utils.py @@ -139,7 +139,7 @@ def parse_eco_packages(config: dict[str, Any]) -> tuple[list[str], int, str] | t "plan_type": "solar_eco4" }, "Solar Eligible, Needs Heating Upgrade": { - "measures": ["solar_pv", "loft_insulation", "high_heat_retention_storage_heater"], + "measures": ["solar_pv", "loft_insulation", "high_heat_retention_storage_heater", "mechanical_ventilation"], "target_sap": 86, # High B "plan_type": "solar_hhrsh_eco4" }, diff --git a/backend/engine/engine.py b/backend/engine/engine.py index 0cb9d860..175d12a0 100644 --- a/backend/engine/engine.py +++ b/backend/engine/engine.py @@ -400,8 +400,13 @@ async def model_engine(body: PlanTriggerRequest): plan_input = plan_input.rename( columns={"domna_address_1": "address", "domna_postcode": "postcode", "epc_os_uprn": "uprn"} ) - # Where the EPC has been estimated, that is because a UPRN wasn't avaialble and so we remote UPRN - plan_input["uprn"] = np.where(plan_input["estimated"].isin([1, True]), None, plan_input["uprn"]) + # Where the EPC has been estimated, that is because a UPRN wasn't avaialble and so we remove UPRN + # This will be reflexted + plan_input["uprn"] = np.where( + plan_input["estimated"].isin([1, True]) & ( + (plan_input["uprn"] < 0) | pd.isnull(plan_input["uprn"]) + ), None, plan_input["uprn"] + ) # We handle the landlord property type and built form plan_input["property_type"] = plan_input["landlord_property_type"].copy() if "landlord_built_form" in plan_input.columns: @@ -512,7 +517,9 @@ async def model_engine(body: PlanTriggerRequest): epc_searcher.ordnance_survey_client.property_type = config.get("property_type", None) # For the moment, our OS API access is unavailable, so we skip and interpolate epc_searcher.find_property(skip_os=True) - if epc_searcher.newest_epc.get("estimated") and body.file_format == "domna_asset_list": + if epc_searcher.newest_epc.get("estimated") and body.file_format == "domna_asset_list" and ( + epc_searcher.newest_epc["uprn"] < 0 + ): epc_searcher.newest_epc["uprn-source"] = epc_searcher.UPRN_SOURCE_SIMULATED # We check for an energy assessment we have performed on this property: @@ -678,7 +685,7 @@ async def model_engine(body: PlanTriggerRequest): input_properties=input_properties, session=session, google_solar_api_key=get_settings().GOOGLE_SOLAR_API_KEY, - solar_materials=[m for m in materials if m["type"] == "solar_pv"] + solar_materials=[m for m in materials if m["type"] == "solar_pv"], ) input_properties = GoogleSolarApi.unit_solar_analysis( @@ -688,8 +695,12 @@ async def model_engine(body: PlanTriggerRequest): body=body, solar_materials=[m for m in materials if m["type"] == "solar_pv"], google_solar_api_key=get_settings().GOOGLE_SOLAR_API_KEY, + inspections_map=inspections_map ) + # We also make a tweak - if the property has been flagged for solar but doesn't contain + # any panel performance, we ensure that we have a 3kWp and 4kWp option for the property + logger.info("Identifying property recommendations") recommendations = {} recommendations_scoring_data = [] From 42e447e41656e7295d9862c89c22893960b97f5c Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 3 Nov 2025 18:50:36 +0000 Subject: [PATCH 030/202] preparing pre-flight requests --- backend/app/plan/router.py | 12 ++++++++++++ backend/ml_models/api.py | 11 ++++++++--- 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index b897dbda..8c502021 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -43,6 +43,18 @@ async def trigger_plan_entrypoint(body: PlanTriggerRequest): logger.error("Failed to parse request body: %s", e) return {"message": "Invalid request"}, 400 + # TODO: Warm up the lambdas here + # from backend.ml_models.api import ModelApi + # model_api = ModelApi( + # portfolio_id=body.portfolio_id, + # timestamp="2020-01-01T00:00:00", + # prediction_buckets=[], + # max_retries=1 + # ) + # await model_api.async_warm_up_lambdas( + # model_prefies=model_api.KWH_MODEL_PREFIXES + model_api.MODEL_PREFIXES + # ) + # If file_format is domna_asset_list and type is xlsx, read and chunk it if data.get("file_format") == "domna_asset_list" and data.get("file_type") == "xlsx": try: diff --git a/backend/ml_models/api.py b/backend/ml_models/api.py index 8d1dbeea..84ba021f 100644 --- a/backend/ml_models/api.py +++ b/backend/ml_models/api.py @@ -257,8 +257,10 @@ class ModelApi: model_prefixes = self.MODEL_PREFIXES if model_prefies is None else model_prefies session = self.get_aiohttp_session() tasks = [ - self._send_warm_up_request(session, f"{self.base_url}/{self.MODEL_URLS[model_prefix]}/predict", - model_prefix) + self._send_warm_up_request( + session, f"{self.base_url}/{self.MODEL_URLS[model_prefix]}/predict", + model_prefix + ) for model_prefix in model_prefixes ] await asyncio.gather(*tasks, return_exceptions=True) @@ -271,7 +273,10 @@ class ModelApi: "file_location": "s3://warm-up-placeholder", "portfolio_id": 0, "property_id": "", - "created_at": "2020-01-01T00:00:00" + "created_at": "2020-01-01T00:00:00", + "warm": True + # The presence of this key will send the api down a specific warm up route, to call + # prediction and load the font manager, because that is a key bottleneck for cold starts } async with session.post(url, json=json_payload, timeout=10) as response: text = await response.text() From 8a8389a4bbda12dd64f00daec7203ed2e26eafa3 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 3 Nov 2025 22:18:17 +0000 Subject: [PATCH 031/202] handling already installed ashp and removing loft only eco4 packaged --- backend/Property.py | 2 +- .../db/functions/recommendations_functions.py | 9 +++++ backend/app/plan/data_classes.py | 2 +- backend/app/plan/utils.py | 33 ++++++++++++++++--- backend/engine/engine.py | 15 ++++++--- recommendations/optimiser/CostOptimiser.py | 6 ++-- .../optimiser/funding_optimiser.py | 26 ++++++++++++--- .../optimiser/optimiser_functions.py | 1 + 8 files changed, 77 insertions(+), 17 deletions(-) diff --git a/backend/Property.py b/backend/Property.py index f320f066..8ea6749b 100644 --- a/backend/Property.py +++ b/backend/Property.py @@ -107,7 +107,7 @@ class Property: # of the non-invasive surveys. We reflect that this has been installed in the recommendations, but remove the # cost and instead, provide a message that the measure has already been installed - self.already_installed = ast.literal_eval(already_installed['already_installed']) if already_installed else [] + self.already_installed = already_installed self.non_invasive_recommendations = ( non_invasive_recommendations['recommendations'] if non_invasive_recommendations else [] diff --git a/backend/app/db/functions/recommendations_functions.py b/backend/app/db/functions/recommendations_functions.py index f42f66e1..8c6e710a 100644 --- a/backend/app/db/functions/recommendations_functions.py +++ b/backend/app/db/functions/recommendations_functions.py @@ -8,6 +8,7 @@ from backend.app.db.models.portfolio import ( PropertyModel, PropertyTargetsModel, PropertyDetailsEpcModel ) from backend.app.db.models.funding import FundingPackageMeasures, FundingPackage +from backend.app.db.models.inspections import InspectionModel def create_plan(session: Session, plan): @@ -210,6 +211,14 @@ def clear_portfolio(session: Session, portfolio_id: int): # Delete all Recommendations associated with the properties session.execute(delete(Recommendation).where(Recommendation.property_id.in_(property_ids))) + session.execute( + delete(InspectionModel) + .where(InspectionModel.property_id.in_( + session.query(PropertyModel.id).filter(PropertyModel.portfolio_id == portfolio_id) + )) + .execution_options(synchronize_session=False) + ) + # Now, delete the PropertyModels and related details # Delete PropertyTargetsModel, PropertyDetailsMeter, PropertyDetailsEpcModel, and PropertyModel session.execute(delete(PropertyTargetsModel).where(PropertyTargetsModel.portfolio_id == portfolio_id)) diff --git a/backend/app/plan/data_classes.py b/backend/app/plan/data_classes.py index 5314aab0..cec5ed11 100644 --- a/backend/app/plan/data_classes.py +++ b/backend/app/plan/data_classes.py @@ -5,6 +5,6 @@ from typing import Any, Optional @dataclass class PropertyRequestData: patch: dict - already_installed: dict + already_installed: list non_invasive_recommendations: dict valuation: Optional[float] diff --git a/backend/app/plan/utils.py b/backend/app/plan/utils.py index 4ebb41f8..c0ffad4a 100644 --- a/backend/app/plan/utils.py +++ b/backend/app/plan/utils.py @@ -3,6 +3,10 @@ from utils.s3 import read_from_s3 from backend.app.config import get_settings from backend.app.plan.data_classes import PropertyRequestData from typing import Any +from starlette.responses import Response +from utils.logger import setup_logger + +logger = setup_logger() def get_cleaned(): @@ -59,7 +63,7 @@ def extract_property_request_data( property_already_installed = next(( x for x in already_installed if (x["address"] == config["address"]) and (x["postcode"] == config["postcode"]) - ), {}) + ), []) # Because we have some non-invasive recommendations that match on address and postcode, but not UPRN # we need to check existence of uprn @@ -118,11 +122,16 @@ def extract_property_request_data( ) -def parse_eco_packages(config: dict[str, Any]) -> tuple[list[str], int, str] | tuple[None, None, None]: +def parse_eco_packages(config: dict[str, Any], prepared_epc) -> tuple[list[str], int, str, list[str]] | tuple[ + None, None, None, list]: solar_identification = config.get("solar_reason", None) cavity_identification = config.get("cavity_reason", None) if not solar_identification and not cavity_identification: - return None, None, None + return None, None, None, [] + + landlord_heating_system = config["landlord_heating_system"] + # This is the initial version of tackling "already installed" measures + already_installed = ["air_source_heat_pump"] if landlord_heating_system == "air source heat pump" else [] # We map the categories to the desired measures and upgrade targets # We note that the categories are placeholder until we move the standardised asset list @@ -180,7 +189,23 @@ def parse_eco_packages(config: dict[str, Any]) -> tuple[list[str], int, str] | t _key = cavity_identification.split(":")[0] mapped = identification_map[_key] - return mapped["measures"], mapped["target_sap"], mapped["plan_type"] + measures = mapped["measures"] + + # If we have already installed an ASHP, we adjust the measures + if "air_source_heat_pump" in already_installed: + if "high_heat_retention_storage_heater" in measures: + # If we have a HHRSH already, we remove it + measures.remove("high_heat_retention_storage_heater") + # Add in ASHP (replacing HHRSH if already had) + measures.append("air_source_heat_pump") + + current_sap = prepared_epc.current_energy_efficiency + # If we have a solar package, and the property is a D or above, we don't need to do lofts + if "solar_eco4" in mapped["plan_type"] and current_sap >= 55: + if "loft_insulation" in measures: + measures.remove("loft_insulation") + + return measures, mapped["target_sap"], mapped["plan_type"], already_installed def handle_error(session, msg, status=500): diff --git a/backend/engine/engine.py b/backend/engine/engine.py index 175d12a0..285e6d5d 100644 --- a/backend/engine/engine.py +++ b/backend/engine/engine.py @@ -594,6 +594,9 @@ async def model_engine(body: PlanTriggerRequest): cleaning_data=cleaning_data, ) + # If we have an ECO project, we parse the cavity/solar reasons + eco_packages[property_id] = parse_eco_packages(config, prepared_epc) + input_properties.append( Property( id=property_id, @@ -601,7 +604,7 @@ async def model_engine(body: PlanTriggerRequest): address=epc_searcher.address_clean, postcode=epc_searcher.postcode_clean, epc_record=prepared_epc, - already_installed=req_data.already_installed, + already_installed=req_data.already_installed + eco_packages[property_id][3], property_valuation=req_data.valuation, non_invasive_recommendations=property_non_invasive_recommendations, energy_assessment=energy_assessment, @@ -609,9 +612,6 @@ async def model_engine(body: PlanTriggerRequest): ) ) - # If we have an ECO project, we parse the cavity/solar reasons - eco_packages[property_id] = parse_eco_packages(config) - # Final step - extract inspections data, if we have it property_inspections = extract_inspection_data(config) if property_inspections: @@ -890,6 +890,13 @@ async def model_engine(body: PlanTriggerRequest): mainheat_energy_eff=p.data["mainheat-energy-eff"], ) + if r["already_installed"]: + # if already installed, we zero out the uplift and funding + (r["partial_project_score"], r["partial_project_funding"], r["innovation_uplift"], + r["uplift_project_score"]) = ( + 0, 0, 0, 0 + ) + input_measures = optimiser_functions.prepare_input_measures( measures_to_optimise_with_uplift, body.goal, needs_ventilation, funding=True ) diff --git a/recommendations/optimiser/CostOptimiser.py b/recommendations/optimiser/CostOptimiser.py index b01d28b3..8f030123 100644 --- a/recommendations/optimiser/CostOptimiser.py +++ b/recommendations/optimiser/CostOptimiser.py @@ -34,11 +34,11 @@ class CostOptimiser: if min_gain == 0: return min_gain elif min_gain <= 5: - return min_gain + 0.5 + return min_gain + 0.25 elif min_gain <= 20: - return min_gain + 1.5 + return min_gain + 0.5 else: - return min_gain + 2 + return min_gain + 0.75 def setup(self): # Initialize Model diff --git a/recommendations/optimiser/funding_optimiser.py b/recommendations/optimiser/funding_optimiser.py index 5e945b56..bf0e1b68 100644 --- a/recommendations/optimiser/funding_optimiser.py +++ b/recommendations/optimiser/funding_optimiser.py @@ -222,7 +222,8 @@ def optimise_with_funding_paths(p, input_measures, housing_type, funding: Fundin "path": {"reference": "unfunded:all"}, "scheme": "none", "is_eligible": False, # no funding scheme applied - "unfunded_items": [] + "unfunded_items": [], + "already_installed_gain": sum([x["gain"] for x in picked if x["already_installed"]]) }) # This function will filter down on innovation measures if we are social EPC D @@ -264,6 +265,11 @@ def optimise_with_funding_paths(p, input_measures, housing_type, funding: Fundin if not sub_measures: continue + # If the only measure is loft insulation, we skip this because you cannot do a minor measure only (LI) + # under ECO4 + if len(sub_measures) == 1 and sub_measures[0][0]["type"] in ["loft_insulation"]: + continue + picked, sub_cost, sub_gain = run_optimizer( sub_measures, budget=budget, # no fixed items; budget unchanged @@ -275,6 +281,9 @@ def optimise_with_funding_paths(p, input_measures, housing_type, funding: Fundin scheme = _path_scheme([path_spec]) + # We sum of gain, for already installed measures + already_installed_gain = sum([x["gain"] for x in picked if x["already_installed"]]) + solutions.append( { "fixed_ids": [], @@ -283,8 +292,11 @@ def optimise_with_funding_paths(p, input_measures, housing_type, funding: Fundin "total_gain": sub_gain, "path": path_spec, "scheme": scheme, - "is_eligible": _is_eligible_funding_package(scheme, p.data["current-energy-efficiency"], sub_gain), - "unfunded_items": [] + "is_eligible": _is_eligible_funding_package( + scheme, float(p.data["current-energy-efficiency"]), sub_gain + ), + "unfunded_items": [], + "already_installed_gain": already_installed_gain } ) @@ -409,6 +421,9 @@ def optimise_with_funding_paths(p, input_measures, housing_type, funding: Fundin total_cost += unfunded_cost total_gain += unfunded_gain + # We now grab the "already installed gain" + already_installed_gain = sum([x["gain"] for x in total_picks if x["already_installed"]]) + solutions.append({ "fixed_ids": fixed_ids, "items": total_picks, @@ -420,6 +435,7 @@ def optimise_with_funding_paths(p, input_measures, housing_type, funding: Fundin scheme, int(p.data["current-energy-efficiency"]), total_gain ), "unfunded_items": unfunded_picked, + "already_installed_gain": already_installed_gain }) solutions = pd.DataFrame(solutions) @@ -437,7 +453,9 @@ def optimise_with_funding_paths(p, input_measures, housing_type, funding: Fundin solutions["starting_sap"] = int(p.data["current-energy-efficiency"]) solutions["floor_area"] = p.floor_area solutions["ending_sap"] = solutions["starting_sap"] + solutions["total_gain"] - solutions["starting_band"] = solutions["starting_sap"].apply(funding.get_sap_band) + solutions["starting_band"] = (solutions["starting_sap"] + solutions["already_installed_gain"]).apply( + funding.get_sap_band + ) solutions["ending_band"] = solutions["ending_sap"].apply(funding.get_sap_band) solutions["floor_area_band"] = solutions["floor_area"].apply(funding.get_floor_area_band) solutions["project_score"] = solutions.apply( diff --git a/recommendations/optimiser/optimiser_functions.py b/recommendations/optimiser/optimiser_functions.py index 3a839dff..4812bc63 100644 --- a/recommendations/optimiser/optimiser_functions.py +++ b/recommendations/optimiser/optimiser_functions.py @@ -120,6 +120,7 @@ def prepare_input_measures(property_recommendations, goal, needs_ventilation, fu "partial_project_funding": rec["partial_project_funding"], "partial_project_score": rec["partial_project_score"], "uplift_project_score": rec["uplift_project_score"], + "already_installed": rec.get("already_installed", False), } ) From 1edd75c91382d300d5b0340360771d9eedbc74ec Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 4 Nov 2025 10:53:22 +0000 Subject: [PATCH 032/202] debugged the solar missing data to push to database --- backend/apis/GoogleSolarApi.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/backend/apis/GoogleSolarApi.py b/backend/apis/GoogleSolarApi.py index dcf08fb5..00fb98d8 100644 --- a/backend/apis/GoogleSolarApi.py +++ b/backend/apis/GoogleSolarApi.py @@ -1031,4 +1031,22 @@ class GoogleSolarApi: }, ] ) + + # We add the key elements that are required for the database + panel_performance['lifetime_ac_kwh'] = panel_performance.apply( + self.lifetime_production_kwh, + axis=1, + efficiency_depreciation_factor=self.efficiency_depreciation_factor, + installation_life_span=self.installation_life_span, + column_name="initial_ac_kwh_per_year" + ) + + panel_performance['lifetime_dc_kwh'] = panel_performance.apply( + self.lifetime_production_kwh, + axis=1, + efficiency_depreciation_factor=self.efficiency_depreciation_factor, + installation_life_span=self.installation_life_span, + column_name="yearly_dc_energy", + ) + return panel_performance From b9a60e10d17512a3cbc8ff8d4c4c46a3f477cef2 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 4 Nov 2025 20:55:01 +0000 Subject: [PATCH 033/202] debugging backend --- backend/Funding.py | 2 +- backend/Property.py | 17 ++- backend/app/plan/schemas.py | 4 +- backend/app/plan/utils.py | 11 +- backend/engine/engine.py | 21 ++-- backend/ml_models/api.py | 15 +-- .../test_data/innovation_measure_fixtures.py | 4 +- backend/tests/test_funding.py | 2 +- .../orbit/funding_example_portfolio.py | 2 +- etl/find_my_epc/RetrieveFindMyEpc.py | 8 +- recommendations/HeatingRecommender.py | 71 +++++++++++- .../optimiser/funding_optimiser.py | 19 ++-- .../optimiser/optimiser_functions.py | 19 +++- .../test_data/heating_recommendations_data.py | 44 ++++---- .../tests/test_heating_recommendations.py | 105 ++++++++++++++++++ 15 files changed, 275 insertions(+), 69 deletions(-) diff --git a/backend/Funding.py b/backend/Funding.py index ece8e3cf..be3db0d4 100644 --- a/backend/Funding.py +++ b/backend/Funding.py @@ -809,7 +809,7 @@ class Funding: if not has_eligibile_heating: # We check if there is a recommendation for an ASHP or HHRSH if ("air_source_heat_pump" not in measure_types) and ( - "high_heat_retention_storage_heater" not in measure_types): + "high_heat_retention_storage_heaters" not in measure_types): return True, False, True # 2) We check if there is a wall insulation measure for this property. If so, we make sure diff --git a/backend/Property.py b/backend/Property.py index 8ea6749b..23e885d1 100644 --- a/backend/Property.py +++ b/backend/Property.py @@ -87,6 +87,7 @@ class Property: measures=None, energy_assessment=None, is_new=True, + inspections=None, **kwargs ): @@ -210,6 +211,9 @@ class Property: self.energy_assessment_condition_data = energy_assessment["condition"] self.energy_assessment_is_newer = energy_assessment["energy_assessment_is_newer"] + # Store inspections + self.inspections = inspections + # TODO: We keep this but only temporarily until we add bathrooms, bedrooms, building id to the condition data self.parse_kwargs(kwargs) @@ -1296,9 +1300,16 @@ class Property: self.roof["is_flat"] or self.roof["is_pitched"] or self.roof["is_roof_room"] ) # If there is no existing solar PV, the photo-supply field will be None or a missing value - has_no_existing_solar_pv = self.data["photo-supply"] in [ - None, 0, self.DATA_ANOMALY_MATCHES - ] + + # We use inspections data to tell us this + if self.inspections: + has_no_existing_solar_pv = self.inspections.roof_orientation.value not in [ + "already has solar pv", "roof too small", "no roof" + ] + else: + has_no_existing_solar_pv = self.data["photo-supply"] in [ + None, 0, self.DATA_ANOMALY_MATCHES + ] return is_valid_property_type and is_valid_roof_type and has_no_existing_solar_pv diff --git a/backend/app/plan/schemas.py b/backend/app/plan/schemas.py index 6fac54ad..858a0a35 100644 --- a/backend/app/plan/schemas.py +++ b/backend/app/plan/schemas.py @@ -17,7 +17,7 @@ ECO4_ELIGIBILE_FABRIC_MEASURES = [ "suspended_floor_insulation", "solid_floor_insulation", "double_glazing", "secondary_glazing" ] ECO4_ELIGIBLE_HEATING_MEASURES = [ - "boiler_upgrade", "high_heat_retention_storage_heater", "air_source_heat_pump", "solar_pv" + "boiler_upgrade", "high_heat_retention_storage_heaters", "air_source_heat_pump", "solar_pv" ] SPECIFIC_MEASURES = ( @@ -48,7 +48,7 @@ MEASURE_MAP = { ], "roof_insulation": ["loft_insulation", "flat_roof_insulation", "room_roof_insulation"], "floor_insulation": ["suspended_floor_insulation", "solid_floor_insulation"], - "heating": ["boiler_upgrade", "high_heat_retention_storage_heater", "air_source_heat_pump"], + "heating": ["boiler_upgrade", "high_heat_retention_storage_heaters", "air_source_heat_pump"], "windows": ["double_glazing", "secondary_glazing"], "heating_controls": ["roomstat_programmer_trvs", "time_temperature_zone_control"] } diff --git a/backend/app/plan/utils.py b/backend/app/plan/utils.py index c0ffad4a..67b7bce1 100644 --- a/backend/app/plan/utils.py +++ b/backend/app/plan/utils.py @@ -131,7 +131,9 @@ def parse_eco_packages(config: dict[str, Any], prepared_epc) -> tuple[list[str], landlord_heating_system = config["landlord_heating_system"] # This is the initial version of tackling "already installed" measures - already_installed = ["air_source_heat_pump"] if landlord_heating_system == "air source heat pump" else [] + already_installed = [] + if landlord_heating_system == "air source heat pump": + already_installed.append("air_source_heat_pump") # We map the categories to the desired measures and upgrade targets # We note that the categories are placeholder until we move the standardised asset list @@ -148,7 +150,8 @@ def parse_eco_packages(config: dict[str, Any], prepared_epc) -> tuple[list[str], "plan_type": "solar_eco4" }, "Solar Eligible, Needs Heating Upgrade": { - "measures": ["solar_pv", "loft_insulation", "high_heat_retention_storage_heater", "mechanical_ventilation"], + "measures": ["solar_pv", "loft_insulation", "high_heat_retention_storage_heaters", + "mechanical_ventilation"], "target_sap": 86, # High B "plan_type": "solar_hhrsh_eco4" }, @@ -193,9 +196,9 @@ def parse_eco_packages(config: dict[str, Any], prepared_epc) -> tuple[list[str], # If we have already installed an ASHP, we adjust the measures if "air_source_heat_pump" in already_installed: - if "high_heat_retention_storage_heater" in measures: + if "high_heat_retention_storage_heaters" in measures: # If we have a HHRSH already, we remove it - measures.remove("high_heat_retention_storage_heater") + measures.remove("high_heat_retention_storage_heaters") # Add in ASHP (replacing HHRSH if already had) measures.append("air_source_heat_pump") diff --git a/backend/engine/engine.py b/backend/engine/engine.py index 285e6d5d..271effab 100644 --- a/backend/engine/engine.py +++ b/backend/engine/engine.py @@ -597,6 +597,11 @@ async def model_engine(body: PlanTriggerRequest): # If we have an ECO project, we parse the cavity/solar reasons eco_packages[property_id] = parse_eco_packages(config, prepared_epc) + # Final step - extract inspections data, if we have it - we inject into property for usage + property_inspections = extract_inspection_data(config) + if property_inspections: + inspections_map[property_id] = property_inspections + input_properties.append( Property( id=property_id, @@ -608,15 +613,11 @@ async def model_engine(body: PlanTriggerRequest): property_valuation=req_data.valuation, non_invasive_recommendations=property_non_invasive_recommendations, energy_assessment=energy_assessment, + inspections=inspections_map.get(property_id), **Property.extract_kwargs(config), # TODO: Depraecate this ) ) - # Final step - extract inspections data, if we have it - property_inspections = extract_inspection_data(config) - if property_inspections: - inspections_map[property_id] = property_inspections - if not input_properties: return Response(status_code=204) @@ -898,7 +899,8 @@ async def model_engine(body: PlanTriggerRequest): ) input_measures = optimiser_functions.prepare_input_measures( - measures_to_optimise_with_uplift, body.goal, needs_ventilation, funding=True + measures_to_optimise_with_uplift, body.goal, needs_ventilation, funding=True, + property_eco_packages=eco_packages.get(p.id) ) # When the goal is Increasing EPC, we can run the funding optimiser @@ -929,6 +931,11 @@ async def model_engine(body: PlanTriggerRequest): solutions = solutions.sort_values("cost_less_full_project_funding", ascending=True) + # If the solution isn't eligible, we can't really consider it + solutions = solutions[ + (solutions["is_eligible"] & (solutions["scheme"] != "none")) | (solutions["scheme"] == "none") + ] + if solutions["meets_upgrade_target"].any(): # If we have a solution that meets the upgrade target, we select that one optimal_solution = solutions[solutions["meets_upgrade_target"]].iloc[0] @@ -940,7 +947,7 @@ async def model_engine(body: PlanTriggerRequest): scheme = optimal_solution["scheme"] funded_measures = optimal_solution["items"] if scheme != "none" else [] solution = optimal_solution["items"] + optimal_solution["unfunded_items"] - # This is the total amount of funding that the project will produce (including uplifts) (£) + # This is the total amount of funding that the project will produce (EXCLUDING uplifts) (£) project_funding = optimal_solution["full_project_funding"] if scheme == "eco4" else \ optimal_solution["partial_project_funding"] # This is the total amount of funding associated to the uplift (£) diff --git a/backend/ml_models/api.py b/backend/ml_models/api.py index 84ba021f..7f3e5873 100644 --- a/backend/ml_models/api.py +++ b/backend/ml_models/api.py @@ -44,7 +44,7 @@ class ModelApi: self.timestamp = timestamp self.prediction_buckets = prediction_buckets self.max_retries = max_retries - self.semaphore = asyncio.Semaphore(2) + self.semaphore = asyncio.Semaphore(3) @staticmethod def get_aiohttp_session(): @@ -117,7 +117,7 @@ class ModelApi: } async with self.semaphore: - await asyncio.sleep(random.uniform(0.3, 1.2)) + # await asyncio.sleep(random.uniform(0.3, 1.2)) try: async with session.post(url, json=payload, headers=headers, timeout=120) as response: if response.status != 200: @@ -211,13 +211,14 @@ class ModelApi: response = await self.predict_async(f"s3://{bucket}/" + file_location, model_prefix, session=session) return model_prefix, response - results = [] - for coro in asyncio.as_completed([run_model(mp) for mp in model_prefixes]): - result = await coro - results.append(result) + # Run all model calls concurrently + results = await asyncio.gather( + *(run_model(mp) for mp in model_prefixes), + return_exceptions=True + ) for model_prefix, response in results: - if response: + if response and not isinstance(response, Exception): predictions_bucket = self.prediction_buckets[model_prefix] predictions_df = pd.DataFrame( read_dataframe_from_s3_parquet( diff --git a/backend/tests/test_data/innovation_measure_fixtures.py b/backend/tests/test_data/innovation_measure_fixtures.py index a66cc7ec..51f8e3ee 100644 --- a/backend/tests/test_data/innovation_measure_fixtures.py +++ b/backend/tests/test_data/innovation_measure_fixtures.py @@ -30,7 +30,7 @@ innovation_scenarios = [ "description": "Innovation PV + HHRSH upgrade, EPC E", "measures": [ {"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45}, - {"type": "high_heat_retention_storage_heater", "is_innovation": False, "innovation_uplift": 0} + {"type": "high_heat_retention_storage_heaters", "is_innovation": False, "innovation_uplift": 0} ], "starting_sap": 50, "mainheat_description": "Electric storage heaters", @@ -45,7 +45,7 @@ innovation_scenarios = [ "description": "Innovation PV + HHRSH upgrade, EPC E", "measures": [ {"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45}, - {"type": "high_heat_retention_storage_heater", "is_innovation": False, "innovation_uplift": 0} + {"type": "high_heat_retention_storage_heaters", "is_innovation": False, "innovation_uplift": 0} ], "starting_sap": 50, "mainheat_description": "Electric storage heaters", diff --git a/backend/tests/test_funding.py b/backend/tests/test_funding.py index 513c3271..8646ab27 100644 --- a/backend/tests/test_funding.py +++ b/backend/tests/test_funding.py @@ -477,7 +477,7 @@ def test_eco4_sh_epc_d_requires_innovation( measures5 = [ {"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45}, - {"type": "high_heat_retention_storage_heater", "is_innovation": False, "innovation_uplift": 0} + {"type": "high_heat_retention_storage_heaters", "is_innovation": False, "innovation_uplift": 0} ] funding5.check_funding( measures=measures5, diff --git a/etl/customers/orbit/funding_example_portfolio.py b/etl/customers/orbit/funding_example_portfolio.py index cf0e151f..c1ade44d 100644 --- a/etl/customers/orbit/funding_example_portfolio.py +++ b/etl/customers/orbit/funding_example_portfolio.py @@ -114,7 +114,7 @@ def app(): "lighting", "secondary_heating", "boiler_upgrade", - "high_heat_retention_storage_heater", + "high_heat_retention_storage_heaters", ], "budget": None, } diff --git a/etl/find_my_epc/RetrieveFindMyEpc.py b/etl/find_my_epc/RetrieveFindMyEpc.py index 5bb5e39b..4a9cf24d 100644 --- a/etl/find_my_epc/RetrieveFindMyEpc.py +++ b/etl/find_my_epc/RetrieveFindMyEpc.py @@ -606,7 +606,7 @@ class RetrieveFindMyEpc: "roomstat_programmer_trvs", "time_temperature_zone_control" ], "Change heating to gas condensing boiler": ["boiler_upgrade"], - "Fan assisted storage heaters and dual immersion cylinder": ["high_heat_retention_storage_heater"], + "Fan assisted storage heaters and dual immersion cylinder": ["high_heat_retention_storage_heaters"], "Flat roof or sloping ceiling insulation": ["flat_roof_insulation"], "Heating controls (room thermostat)": [ "roomstat_programmer_trvs", "time_temperature_zone_control" @@ -634,7 +634,7 @@ class RetrieveFindMyEpc: "PV Cells recommendation": [], "Replacement glazing units": ["double_glazing"], "Heating controls (time and temperature zone control)": ["time_temperature_zone_control"], - "High heat retention storage heaters": ["high_heat_retention_storage_heater"], + "High heat retention storage heaters": ["high_heat_retention_storage_heaters"], "Gas condensing boiler": ["boiler_upgrade"], "Change room heaters to condensing boiler": ["boiler_upgrade"], "Cylinder thermostat": ["cylinder_thermostat"], @@ -677,10 +677,10 @@ class RetrieveFindMyEpc: ], "Internal wall insulation": ["internal_wall_insulation"], "High heat retention storage heaters and dual immersion cylinder and dual rate meter": [ - "high_heat_retention_storage_heater" + "high_heat_retention_storage_heaters" ], "High heat retention storage heaters and dual rate meter": [ - "high_heat_retention_storage_heater" + "high_heat_retention_storage_heaters" ], "Increase loft insulation to 250mm": ["loft_insulation"], "Solar photovoltaics panels, 25% of roof area": ["solar_pv"], diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py index 41785104..b8a1b5a7 100644 --- a/recommendations/HeatingRecommender.py +++ b/recommendations/HeatingRecommender.py @@ -40,7 +40,7 @@ class HeatingRecommender: # type 1 "boiler_upgrade", # type 2 - "high_heat_retention_storage_heater", + "high_heat_retention_storage_heaters", ] } }, @@ -156,7 +156,7 @@ class HeatingRecommender: return ( hhr_suitable and (not ashp_only_heating_recommendation) and not self.has_ashp and not self.has_gshp and - ("high_heat_retention_storage_heater" in measures) + ("high_heat_retention_storage_heaters" in measures) ) def is_boiler_upgrade_suitable(self, measures, ashp_only_heating_recommendation): @@ -489,6 +489,55 @@ class HeatingRecommender: return heat_pump_size + @staticmethod + def estimate_peak_kw( + floor_area_m2: float, + epc_primary_kwh_per_m2_yr: float | None = None, + # Prefer these if available: + space_heat_kwh_per_m2_yr: float | None = None, # from EPC/SAP if you can + heat_loss_parameter_W_per_m2K: float | None = None, # HLP if available + primary_to_delivered_factor: float = 1.0, + space_heat_fraction_range=(0.5, 0.75), + hdd_base_dd: float = 2100.0, # set per location (base 15.5 °C typical UK) + t_indoor_C: float = 21.0, + t_design_ext_C: float = -3.0, + ): + ΔT = t_indoor_C - t_design_ext_C + + # 1) Best available path: HLP → direct peak + if heat_loss_parameter_W_per_m2K is not None: + peak_kw = heat_loss_parameter_W_per_m2K * floor_area_m2 * ΔT / 1000.0 + return (peak_kw, peak_kw) # no range needed + + # 2) Second-best: space-heating demand → HDD method + if space_heat_kwh_per_m2_yr is not None: + annual_space_kwh = space_heat_kwh_per_m2_yr * floor_area_m2 + Htot = annual_space_kwh * 1000.0 / (hdd_base_dd * 24.0) # W/K + peak_kw = Htot * ΔT / 1000.0 + return (peak_kw, peak_kw) + + # 3) Minimal inputs: primary energy + assumed fraction → range + assert epc_primary_kwh_per_m2_yr is not None + annual_primary = epc_primary_kwh_per_m2_yr * floor_area_m2 + annual_delivered = annual_primary / primary_to_delivered_factor + + def to_peak(space_fraction): + annual_space = annual_delivered * space_fraction + Htot = annual_space * 1000.0 / (hdd_base_dd * 24.0) + return Htot * ΔT / 1000.0 + + low = to_peak(space_heat_fraction_range[0]) + high = to_peak(space_heat_fraction_range[1]) + return (low, high) + + @staticmethod + def pick_model(peak_kw_range, models_kw=(5, 6, 8.5, 11.2, 14, 17, 20)): + target = peak_kw_range[1] # cover the upper end + for kw in models_kw: + if kw >= target: + return kw + return None + def recommend_air_source_heat_pump(self, phase, has_cavity_or_loft_recommendations, _return=False): """ This method will implement the recommendation for an air source heat pump @@ -504,7 +553,19 @@ class HeatingRecommender: controls_recommender = HeatingControlRecommender(self.property) controls_recommender.recommend(heating_description="Air source heat pump, radiators, electric", phase=phase) - ashp_size = self.size_heat_pump() + # ashp_size = self.size_heat_pump() + + # New functions to estimate size of ASHP + estimated_load = self.estimate_peak_kw( + floor_area_m2=self.property.floor_area, + epc_primary_kwh_per_m2_yr=self.property.data["energy-consumption-current"], + primary_to_delivered_factor=1.55, # use 1.13 if heating fuel is gas + space_heat_fraction_range=(0.35, 0.60), + hdd_base_dd=2000.0, # set from location + t_indoor_C=21.0, + t_design_ext_C=-1.0 # set from local CIBSE table + ) + ashp_size = self.pick_model(estimated_load) ashp_costs = self.costs.air_source_heat_pump(ashp_size) if non_intrusive_recommendation: @@ -884,7 +945,7 @@ class HeatingRecommender: # We check if there is a high heat retention non-intrusive recommendation non_intrusive_recommendation = next( (r for r in self.property.non_invasive_recommendations if - r["type"] == "high_heat_retention_storage_heater"), + r["type"] == "high_heat_retention_storage_heaters"), {} ) @@ -981,7 +1042,7 @@ class HeatingRecommender: phase=phase, heating_controls_only=heating_controls_only, system_change=system_change, - system_type="high_heat_retention_storage_heater", + system_type="high_heat_retention_storage_heaters", non_intrusive_recommendation=non_intrusive_recommendation, heating_product=hhrsh_product ) diff --git a/recommendations/optimiser/funding_optimiser.py b/recommendations/optimiser/funding_optimiser.py index bf0e1b68..6a0b1d0c 100644 --- a/recommendations/optimiser/funding_optimiser.py +++ b/recommendations/optimiser/funding_optimiser.py @@ -91,7 +91,7 @@ def violates_min_insulation(fixed, optimisation_input_measures): # heating (incl. PV) flags is_heating = has_any([ "air_source_heat_pump", - "high_heat_retention_storage_heater", + "high_heat_retention_storage_heaters", "boiler_upgrade", "electric_boiler", "time_temperature_zone_control", @@ -171,7 +171,7 @@ def _prs_solution_ok(items, p, funding): # renewable set: has_ashp = ("air_source_heat_pump" in types) # ASHP alone is renewable has_solar = ("solar_pv" in types) - has_hhrsh = ("high_heat_retention_storage_heater" in types) # only counts *with* solar + has_hhrsh = ("high_heat_retention_storage_heaters" in types) # only counts *with* solar # solar PV qualifies if paired with eligible existing heating solar_ok_existing = has_solar and funding.check_solar_eligible_heating_system( @@ -468,6 +468,7 @@ def optimise_with_funding_paths(p, input_measures, housing_type, funding: Fundin ) rate = funding.get_eco4_abs_rate(is_cavity=p.walls["is_cavity_wall"]) + # The full project funding, at this point, does NOT include any uplifts solutions["full_project_funding"] = solutions["project_score"] * rate # if the scheme is not ECO4, we set the funding to 0 with iloc solutions.loc[solutions["scheme"] != "eco4", "full_project_funding"] = 0.0 @@ -679,7 +680,7 @@ def parse_types(t): def includes_heating(opt_types): return any(x in opt_types for x in { "air_source_heat_pump", - "high_heat_retention_storage_heater", + "high_heat_retention_storage_heaters", "time_temperature_zone_control", # controls count as a heating measure in your pipeline "solar_pv" # you treat PV as heating for funding logic }) @@ -761,7 +762,7 @@ def _make_solar_heating_funding_paths( # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # We don't include electric boilers as they are not eligible for ECO4 funding solar_heating_combos = [ - ("high_heat_retention_storage_heater", "solar_pv+hhrsh:eco4"), + ("high_heat_retention_storage_heaters", "solar_pv+hhrsh:eco4"), ("air_source_heat_pump", "solar_pv+ashp:eco4"), ] if _find_measure(input_measures, "solar_pv"): @@ -790,11 +791,11 @@ def _make_solar_heating_funding_paths( single_heating_measures = ["air_source_heat_pump"] else: single_heating_measures = [ - "boiler_upgrade", "high_heat_retention_storage_heater", "air_source_heat_pump" + "boiler_upgrade", "high_heat_retention_storage_heaters", "air_source_heat_pump" ] measure_references = { "boiler_upgrade": "boiler_upgrade", - "high_heat_retention_storage_heater": "hhrsh", + "high_heat_retention_storage_heaters": "hhrsh", "air_source_heat_pump": "ashp" } for heating_upgrade in single_heating_measures: @@ -881,14 +882,16 @@ def make_funding_paths(p, input_measures, housing_type, funding: Funding): if housing_type == "Social" and p.data["current-energy-rating"] == "D": # If the property is currently EPC D, we can only include innovation measures or measures to meet the - # minimum insulation requirements + # minimum insulation requirements. We make an exception if we have a measure that is + # already installed, specifically a heat pump input_measures_innovation = [] input_gbis_measures_innovation = [] for measures in input_measures: group_of_innovation_measures = [] group_of_gbis_innovation_measures = [] for measure in measures: - if measure["innovation_uplift"] or measure["type"] in remaining_insulation_type: + if measure["innovation_uplift"] or measure["type"] in remaining_insulation_type or measure[ + "already_installed"]: group_of_innovation_measures.append(measure) if measure["innovation_uplift"] and measure["type"] in ( diff --git a/recommendations/optimiser/optimiser_functions.py b/recommendations/optimiser/optimiser_functions.py index 4812bc63..4a8c96da 100644 --- a/recommendations/optimiser/optimiser_functions.py +++ b/recommendations/optimiser/optimiser_functions.py @@ -6,7 +6,10 @@ from backend.app.utils import epc_to_sap_lower_bound from recommendations.optimiser.CostOptimiser import CostOptimiser -def prepare_input_measures(property_recommendations, goal, needs_ventilation, funding=False): +def prepare_input_measures( + property_recommendations, goal, needs_ventilation, funding=False, + property_eco_packages=None +): """ Prepares a nested list of measure options for optimisation. @@ -37,6 +40,9 @@ def prepare_input_measures(property_recommendations, goal, needs_ventilation, fu funding: bool, optional If true, the function will include the innovation uplift in the total cost calculation. If false, this is excluded, since innovation uplift cannot be claimed where funding is not available. + property_eco_packages: dict, optional + Eco package data for the property, if available. If a measure has been specified as part of an eco package + (e.g. HHRSH) this function will include that measure in the optimisation, even if it has negative cost savings. Returns ------- @@ -59,6 +65,8 @@ def prepare_input_measures(property_recommendations, goal, needs_ventilation, fu {} ) + eco_measures = property_eco_packages[0] if property_eco_packages else [] + input_measures = [] for recs in property_recommendations: @@ -71,7 +79,14 @@ def prepare_input_measures(property_recommendations, goal, needs_ventilation, fu recs = [r for r in recs if ~r["has_battery"]] # Only include measures with non-negative cost savings - recs_to_append = [rec for rec in recs if rec["energy_cost_savings"] >= 0] + if eco_measures: + recs_to_append = [ + rec for rec in recs if (rec["energy_cost_savings"] >= 0) or (rec["measure_type"] in eco_measures) + ] + else: + recs_to_append = [ + rec for rec in recs if (rec["energy_cost_savings"] >= 0) + ] if not recs_to_append: continue diff --git a/recommendations/tests/test_data/heating_recommendations_data.py b/recommendations/tests/test_data/heating_recommendations_data.py index f4b4c0a6..37c854c3 100644 --- a/recommendations/tests/test_data/heating_recommendations_data.py +++ b/recommendations/tests/test_data/heating_recommendations_data.py @@ -86,7 +86,7 @@ testing_examples = [ 'uprn-source': 'Address Matched', }, "heating_measure_types": [ - "high_heat_retention_storage_heater", + "high_heat_retention_storage_heaters", ], "notes": "This property has electric room heaters and is off gas so a boiler recommendation is not appropriate." "We would expect a high heat retention storage recommendation. The property is a flat and therefore" @@ -134,7 +134,7 @@ testing_examples = [ 'tenure': 'owner-occupied', 'fixed-lighting-outlets-count': 6.0, 'low-energy-fixed-light-count': 4.0, 'uprn': 100090311351.0, 'uprn-source': 'Address Matched', 'property-type_y': None, 'built-form_y': None, }, - "heating_measure_types": ['high_heat_retention_storage_heater', 'air_source_heat_pump'], + "heating_measure_types": ['high_heat_retention_storage_heaters', 'air_source_heat_pump'], "notes": "This test has electric storage heaters with automatic charge control - we recommend hhr storage" "heaters in this case, but because there are already electic storage heaters in place, we " "note, in the description of the recommendation, that this upgrade may be possible by retrofitting" @@ -275,7 +275,7 @@ testing_examples = [ 'uprn': 43088770.0, 'uprn-source': 'Address Matched', }, "heating_measure_types": [ - 'high_heat_retention_storage_heater', + 'high_heat_retention_storage_heaters', ], "notes": "This property is a flat so we don't have an ASHP recommendation. It also doesn't have access to the " "mains and so it can't have a gas boiler. We don't expect any controls recommendations" @@ -370,7 +370,7 @@ testing_examples = [ }, "heating_measure_types": [ 'boiler_upgrade', - 'high_heat_retention_storage_heater', + 'high_heat_retention_storage_heaters', 'boiler_upgrade' ], "notes": "This property has assumed electric heating and is mid-terrace house. It has a mains gas connection." @@ -416,7 +416,7 @@ testing_examples = [ }, "heating_measure_types": [ 'air_source_heat_pump', - 'high_heat_retention_storage_heater', + 'high_heat_retention_storage_heaters', ], "notes": "This property has an oil boiler and doesn't have a mains gas connection so we can only recommend" "an air source heat pump and HHR (since if the home has a non-gas boiler, we recommend HHR)" @@ -463,7 +463,7 @@ testing_examples = [ }, "heating_measure_types": [ 'boiler_upgrade', - 'high_heat_retention_storage_heater', + 'high_heat_retention_storage_heaters', 'air_source_heat_pump', 'boiler_upgrade' # TTZs ], @@ -512,7 +512,7 @@ testing_examples = [ "heating_measure_types": [ 'boiler_upgrade', 'boiler_upgrade', - 'high_heat_retention_storage_heater', + 'high_heat_retention_storage_heaters', ], "notes": "This property has assumed electric heaters. Boiler upgrade, HHR are recommended. We don't recommend" "an ASHP off of the bat because it's mid-terrace." @@ -557,7 +557,7 @@ testing_examples = [ }, "heating_measure_types": [ 'boiler_upgrade', - 'high_heat_retention_storage_heater', + 'high_heat_retention_storage_heaters', 'boiler_upgrade' ], "notes": "This has a form of assumed electric heating and has a mains connection so we recommend HHR, boiler" @@ -605,7 +605,7 @@ testing_examples = [ "heating_measure_types": [ 'boiler_upgrade', 'boiler_upgrade', - 'high_heat_retention_storage_heater', + 'high_heat_retention_storage_heaters', ], "notes": "This property already has storage heaters with manual charge control. The home is mid terrace so" "the ashp is not suitable" @@ -651,7 +651,7 @@ testing_examples = [ 'uprn-source': 'Address Matched', 'sheating-energy-eff': None, 'sheating-env-eff': None }, "heating_measure_types": [ - 'high_heat_retention_storage_heater', + 'high_heat_retention_storage_heaters', 'air_source_heat_pump', ], "notes": "This property has an LFG boiler but it doesn't have a mains gas connection so we can only recommend" @@ -696,7 +696,7 @@ testing_examples = [ 'uprn-source': 'Energy Assessor', 'sheating-energy-eff': None, 'sheating-env-eff': None }, "heating_measure_types": [ - 'high_heat_retention_storage_heater', + 'high_heat_retention_storage_heaters', 'air_source_heat_pump', ], "notes": "This property has electric boilers in place, but does not have a mains connection so we don't " @@ -744,7 +744,7 @@ testing_examples = [ }, "heating_measure_types": [ 'air_source_heat_pump', - 'high_heat_retention_storage_heater' + 'high_heat_retention_storage_heaters' ], "notes": "This property has a dual fuel boiler and no mains gas connection. We recommend ASHP and HHR, but" "no gas condensing boiler" @@ -788,7 +788,7 @@ testing_examples = [ }, "heating_measure_types": [ 'air_source_heat_pump', - 'high_heat_retention_storage_heater', + 'high_heat_retention_storage_heaters', ], "notes": "This property has a coal boiler and no mains gas connection. We recommend ASHP and HHR, but" "no gas condensing boiler" @@ -835,7 +835,7 @@ testing_examples = [ }, "heating_measure_types": [ 'air_source_heat_pump', - 'high_heat_retention_storage_heater', + 'high_heat_retention_storage_heaters', ], "notes": "This property has a smokeless fuel boiler and no mains gas connection. We recommend ASHP and HHR, but" "no gas condensing boiler" @@ -880,7 +880,7 @@ testing_examples = [ }, "heating_measure_types": [ 'air_source_heat_pump', - 'high_heat_retention_storage_heater', + 'high_heat_retention_storage_heaters', ], "notes": "This property has a wood pellets boiler and no mains gas connection. We recommend ASHP and HHR, but" "no gas condensing boiler" @@ -925,7 +925,7 @@ testing_examples = [ 'uprn-source': 'Address Matched', 'sheating-energy-eff': None, 'sheating-env-eff': None }, "heating_measure_types": [ - 'high_heat_retention_storage_heater', + 'high_heat_retention_storage_heaters', 'air_source_heat_pump', ], "notes": "This is an end-terrace house, without mains gas connection, so we recommend is HHR & ASHP" @@ -1010,7 +1010,7 @@ testing_examples = [ }, "heating_measure_types": [ 'air_source_heat_pump', - 'high_heat_retention_storage_heater', + 'high_heat_retention_storage_heaters', 'time_temperature_zone_control', ], "notes": "This property has dual heating. A boiler and electric storage heaters. The heating is efficient so" @@ -1056,8 +1056,8 @@ testing_examples = [ "heating_measure_types": [ 'air_source_heat_pump', 'boiler_upgrade', - 'boiler_upgrade+high_heat_retention_storage_heater', - 'high_heat_retention_storage_heater', + 'boiler_upgrade+high_heat_retention_storage_heaters', + 'high_heat_retention_storage_heaters', 'time_temperature_zone_control' ], "notes": "This property is a modified version of the previous dual heating property, where we lower the" @@ -1104,7 +1104,7 @@ testing_examples = [ }, "heating_measure_types": [ 'air_source_heat_pump', - 'high_heat_retention_storage_heater' + 'high_heat_retention_storage_heaters' ], "notes": "This property has anthracite heating without mains. " "We recommend ASHP and HHR, but no gas condensing boiler" @@ -1151,7 +1151,7 @@ testing_examples = [ "heating_measure_types": [ 'boiler_upgrade', 'boiler_upgrade', - 'high_heat_retention_storage_heater' + 'high_heat_retention_storage_heaters' ], "notes": "This property has room heaters with two different fuel sources, so we recommend HHR, ASHP, and a " "boiler upgrade" @@ -1238,7 +1238,7 @@ testing_examples = [ }, "heating_measure_types": [ 'air_source_heat_pump', - 'high_heat_retention_storage_heater' + 'high_heat_retention_storage_heaters' ], "notes": "The property has warm air electricaire heating, so we recommend ASHP and HHR" }, diff --git a/recommendations/tests/test_heating_recommendations.py b/recommendations/tests/test_heating_recommendations.py index b18839aa..93acdefa 100644 --- a/recommendations/tests/test_heating_recommendations.py +++ b/recommendations/tests/test_heating_recommendations.py @@ -105,3 +105,108 @@ class TestHeatingRecommendations: {x["measure_type"] for x in recommender.heating_recommendations} == set(test_case["heating_measure_types"]) ) + + +@pytest.mark.parametrize( + "floor_area, epc_primary, expected_band, expected_model", + [ + # Case 1 – Typical pre-2000 house, gas heating + ( + 93.75, + 270.19, + (2.5, 4.6), # expected rough band (low, high) + 5, # chosen model + ), + # Case 2 – Efficient new-build (low EPC energy) + ( + 93.75, + 142.28, + (1.4, 2.4), + 3, # assume 3 or 5 kW model covers this + ), + ], +) +def test_estimate_peak_kw_basic(floor_area, epc_primary, expected_band, expected_model): + """ + Ensure the peak load estimate is within a sensible range and + that the model selection logic picks the correct bracket. + """ + + load_band = HeatingRecommender.estimate_peak_kw( + floor_area_m2=floor_area, + epc_primary_kwh_per_m2_yr=epc_primary, + primary_to_delivered_factor=1.55, # electricity + space_heat_fraction_range=(0.35, 0.60), + hdd_base_dd=2000.0, + t_indoor_C=21.0, + t_design_ext_C=-1.0, + ) + + # Assert range sanity + assert expected_band[0] * 0.8 <= load_band[0] <= expected_band[1] * 1.2 + assert expected_band[0] <= load_band[1] <= expected_band[1] * 1.2 + + # Pick model + model = HeatingRecommender.pick_model(load_band, models_kw=(3, 5, 6, 8.5, 11.2)) + assert model == expected_model + + +def test_estimate_peak_kw_with_hlp(): + """ + Test direct HLP input path (best-quality data). + """ + hlp = 1.5 # W/m²K typical for semi-detached + floor_area = 100 + load_band = HeatingRecommender.estimate_peak_kw( + floor_area_m2=floor_area, + heat_loss_parameter_W_per_m2K=hlp, + t_indoor_C=21, + t_design_ext_C=-2, + ) + # Should return identical low/high values since it's direct + assert isinstance(load_band, tuple) + assert abs(load_band[0] - load_band[1]) < 1e-6 + # Expected peak = 1.5 * 100 * 23 / 1000 = 3.45 kW + assert pytest.approx(load_band[0], rel=0.05) == 3.45 + + +def test_estimate_peak_kw_with_space_heat_demand(): + """ + Test the space-heating-demand path. + """ + floor_area = 120 + space_heat_kwh_m2 = 100 + load_band = HeatingRecommender.estimate_peak_kw( + floor_area_m2=floor_area, + space_heat_kwh_per_m2_yr=space_heat_kwh_m2, + hdd_base_dd=2100, + t_indoor_C=21, + t_design_ext_C=-3, + ) + # Rough expected peak ~ (100*120*1000)/(2100*24) * 24 /1000 = 5.4 kW + assert 4.5 < load_band[0] < 6.0 + assert abs(load_band[0] - load_band[1]) < 1e-6 + + +def test_pick_model_boundaries(): + """ + Ensure pick_model correctly selects the smallest model covering the upper band. + """ + assert HeatingRecommender.pick_model((2.0, 4.9), models_kw=(3, 5, 6, 8.5)) == 5 + assert HeatingRecommender.pick_model((5.0, 5.0), models_kw=(3, 5, 6, 8.5)) == 5 + assert HeatingRecommender.pick_model((5.0, 6.1), models_kw=(3, 5, 6, 8.5)) == 6 + assert HeatingRecommender.pick_model((8.6, 9.0), models_kw=(3, 5, 6, 8.5, 11.2)) == 11.2 + assert HeatingRecommender.pick_model((20, 25), models_kw=(3, 5, 6, 8.5, 11.2)) is None + + +def test_parameter_validation_and_defaults(): + """ + Validate that the function handles missing or minimal parameters properly. + """ + # Minimal path using primary energy only + load_band = HeatingRecommender.estimate_peak_kw( + floor_area_m2=80, + epc_primary_kwh_per_m2_yr=250, + ) + assert isinstance(load_band, tuple) + assert load_band[0] < load_band[1] From cb70cbf1dac7640bbb4ad49c898e9894f194070e Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 6 Nov 2025 13:08:19 +0000 Subject: [PATCH 034/202] fixed find my epc fetch bug getting heating control instead of heating system --- backend/engine/engine.py | 1 - etl/find_my_epc/RetrieveFindMyEpc.py | 6 ++++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/backend/engine/engine.py b/backend/engine/engine.py index 271effab..3b90f623 100644 --- a/backend/engine/engine.py +++ b/backend/engine/engine.py @@ -663,7 +663,6 @@ async def model_engine(body: PlanTriggerRequest): input_properties = OpenUprnClient.set_spatial_data(input_properties, bucket_name=get_settings().DATA_BUCKET) [p.set_features(cleaned=cleaned, kwh_client=kwh_client, kwh_predictions=kwh_preds) for p in input_properties] - # TODO: If a property is semi-detached, we might get roof surfaces for the main building + the neighbour # TODO: If we can't get high image quality, should we use the solar API? Maybe just for semi-detached units with # extensions, since it doesn't seem to do a great job diff --git a/etl/find_my_epc/RetrieveFindMyEpc.py b/etl/find_my_epc/RetrieveFindMyEpc.py index 4a9cf24d..b8c24cb8 100644 --- a/etl/find_my_epc/RetrieveFindMyEpc.py +++ b/etl/find_my_epc/RetrieveFindMyEpc.py @@ -73,6 +73,12 @@ class RetrieveFindMyEpc: def get_feature_row_text(feature_name, index=0): matches = [row for row in rows if row.find("th") and feature_name in row.find("th").text] if len(matches) > index: + # A commonly seen case is when feature_name is Main heating and we want to make sure we get + # main heating and not main heating control + if feature_name == "Main heating": + matches = [ + row for row in matches if row.find("th") and row.find("th").text.strip() == "Main heating" + ] cells = matches[index].find_all("td") description = self.get_text(cells[0]) rating = self.get_text(cells[1]) From 4640fa77bf5e70e2860b63cfe2470c009a6d8144 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 6 Nov 2025 17:36:02 +0000 Subject: [PATCH 035/202] handing HHRSH heating upgrade outside of ECO project --- backend/Property.py | 3 +- backend/app/assumptions.py | 3 +- backend/engine/engine.py | 18 +++++-- .../optimiser/funding_optimiser.py | 53 ++++++++++++++++--- 4 files changed, 65 insertions(+), 12 deletions(-) diff --git a/backend/Property.py b/backend/Property.py index 23e885d1..609a9d75 100644 --- a/backend/Property.py +++ b/backend/Property.py @@ -1302,7 +1302,8 @@ class Property: # If there is no existing solar PV, the photo-supply field will be None or a missing value # We use inspections data to tell us this - if self.inspections: + + if getattr(self.inspections, "roof_orientation", None): has_no_existing_solar_pv = self.inspections.roof_orientation.value not in [ "already has solar pv", "roof too small", "no roof" ] diff --git a/backend/app/assumptions.py b/backend/app/assumptions.py index a0234f75..37d9164e 100644 --- a/backend/app/assumptions.py +++ b/backend/app/assumptions.py @@ -77,7 +77,8 @@ DESCRIPTIONS_TO_FUEL_TYPES = { "Electric ceiling heating, electric": {"fuel": "Electricity", "cop": 1}, "Air source heat pump, warm air, electric": { "fuel": "Electricity", "cop": AVERAGE_ASHP_EFFICIENCY / 100 - } + }, + "Electric heat pump for water heating only": {"fuel": "Electricity", "cop": 1}, } # These are the measure types where if there is a ventilation recommendation, we force the inclusion of it diff --git a/backend/engine/engine.py b/backend/engine/engine.py index 3b90f623..fc620388 100644 --- a/backend/engine/engine.py +++ b/backend/engine/engine.py @@ -911,7 +911,8 @@ async def model_engine(body: PlanTriggerRequest): housing_type=body.housing_type, budget=body.budget, target_gain=gain, - funding=funding + funding=funding, + work_package=eco_packages[p.id][2] ) # Given the solutions we select the optimal one @@ -944,8 +945,19 @@ async def model_engine(body: PlanTriggerRequest): # This is the list of measures that we will recommend scheme = optimal_solution["scheme"] - funded_measures = optimal_solution["items"] if scheme != "none" else [] - solution = optimal_solution["items"] + optimal_solution["unfunded_items"] + + # We create this full list of selected measures, which is used in the next section for setting + # default measures + solution = deepcopy(optimal_solution["items"]) + deepcopy(optimal_solution["unfunded_items"]) + funded_measures = deepcopy(optimal_solution["items"]) if scheme != "none" else [] + unfunded_measures = deepcopy(optimal_solution["unfunded_items"]) + # If we have an EPC D + HHRSH project, we move HHRSH out of funded measures + if eco_packages.get(p.id)[2] == "solar_hhrsh_eco4" and p.data["current-energy-rating"] == "D": + unfunded_measures.extend( + [x for x in funded_measures if x["type"] == "high_heat_retention_storage_heaters"] + ) + funded_measures = [x for x in funded_measures if x["type"] != "high_heat_retention_storage_heaters"] + # This is the total amount of funding that the project will produce (EXCLUDING uplifts) (£) project_funding = optimal_solution["full_project_funding"] if scheme == "eco4" else \ optimal_solution["partial_project_funding"] diff --git a/recommendations/optimiser/funding_optimiser.py b/recommendations/optimiser/funding_optimiser.py index 6a0b1d0c..f9fbbfd6 100644 --- a/recommendations/optimiser/funding_optimiser.py +++ b/recommendations/optimiser/funding_optimiser.py @@ -198,7 +198,25 @@ def _ensure_unfunded_costs(groups): return groups -def optimise_with_funding_paths(p, input_measures, housing_type, funding: Funding, budget=None, target_gain=None): +def _get_already_installed_gain(selected_measures, needs_pre_eco_hhrsh_upgrade): + """ + Calculate already installed gain, with special case for pre-ECO4 HHRSH upgrade. + :param selected_measures: List of selected measures + :param needs_pre_eco_hhrsh_upgrade: Boolean indicating if pre-ECO4 HHRSH upgrade is needed + :return: + """ + if needs_pre_eco_hhrsh_upgrade: + return sum( + [x["gain"] for x in selected_measures if + x["already_installed"] or x["type"] == "high_heat_retention_storage_heaters"] + ) + + return sum([x["gain"] for x in selected_measures if x["already_installed"]]) + + +def optimise_with_funding_paths( + p, input_measures, housing_type, funding: Funding, budget=None, target_gain=None, work_package=None +): """ run_optimizer(sub_measures, budget, target_gain) -> (picked_options, sub_cost, sub_gain) """ @@ -227,7 +245,9 @@ def optimise_with_funding_paths(p, input_measures, housing_type, funding: Fundin }) # This function will filter down on innovation measures if we are social EPC D - funding_paths, optimisation_input_measures = make_funding_paths(p, input_measures, housing_type, funding) + funding_paths, optimisation_input_measures = make_funding_paths( + p, input_measures, housing_type, funding, work_package + ) # We now produce a fabric only path for ECO4 # We add in generic insulation funding paths (where there is no fixed measure) @@ -244,6 +264,10 @@ def optimise_with_funding_paths(p, input_measures, housing_type, funding: Fundin ] + funding_paths ) + needs_pre_eco_hhrsh_upgrade = ( + (p.data["current-energy-rating"] == "D") and work_package == "solar_hhrsh_eco4" + ) + for path_spec in funding_paths: # ECO4 fabric only path = special case @@ -281,8 +305,11 @@ def optimise_with_funding_paths(p, input_measures, housing_type, funding: Fundin scheme = _path_scheme([path_spec]) - # We sum of gain, for already installed measures - already_installed_gain = sum([x["gain"] for x in picked if x["already_installed"]]) + # We sum of gain, for already installed measures. In this, we also include HHRSH, when we have + # an EPC D property that needs HHRSH but HHRSH isn't an eligible measure + already_installed_gain = _get_already_installed_gain( + picked, needs_pre_eco_hhrsh_upgrade + ) solutions.append( { @@ -422,7 +449,11 @@ def optimise_with_funding_paths(p, input_measures, housing_type, funding: Fundin total_gain += unfunded_gain # We now grab the "already installed gain" - already_installed_gain = sum([x["gain"] for x in total_picks if x["already_installed"]]) + # We sum of gain, for already installed measures. In this, we also include HHRSH, when we have + # an EPC D property that needs HHRSH but HHRSH isn't an eligible measure + already_installed_gain = _get_already_installed_gain( + total_picks, needs_pre_eco_hhrsh_upgrade + ) solutions.append({ "fixed_ids": fixed_ids, @@ -837,7 +868,7 @@ def _make_generic_gbis_funding_paths(input_gbis_measures, funding_paths): return funding_paths + gbis_funding_paths -def make_funding_paths(p, input_measures, housing_type, funding: Funding): +def make_funding_paths(p, input_measures, housing_type, funding: Funding, work_package=None): """ This function generates funding paths based on the input measures and the tenure of the property. It checks for the presence of specific measures and creates paths that include necessary insulation measures @@ -848,6 +879,8 @@ def make_funding_paths(p, input_measures, housing_type, funding: Funding): :param input_measures: :param housing_type: :param funding: The funding object that provides methods to check eligibility and calculate funding. + :param work_package: Optional work package information. We handle the case of an EPC D property needing a heating + upgrade, where the heating upgrade needs to be conducted before the solar PV work :return: """ @@ -890,6 +923,12 @@ def make_funding_paths(p, input_measures, housing_type, funding: Funding): group_of_innovation_measures = [] group_of_gbis_innovation_measures = [] for measure in measures: + + if measure["type"] == "high_heat_retention_storage_heaters" and work_package == "solar_hhrsh_eco4": + # With this work type, if the property is EPC D and doesn't have an eligible heating system + # we install HHRSH as a pre-requisite measure, before the ECO4 project if complete. + group_of_innovation_measures.append(measure) + if measure["innovation_uplift"] or measure["type"] in remaining_insulation_type or measure[ "already_installed"]: group_of_innovation_measures.append(measure) @@ -906,7 +945,7 @@ def make_funding_paths(p, input_measures, housing_type, funding: Funding): input_gbis_measures_innovation.extend(group_of_gbis_innovation_measures) funding_paths = _make_solar_heating_funding_paths( - p, input_measures_innovation, funding_paths, remaining_insulation_type, housing_type, funding + p, input_measures_innovation, funding_paths, remaining_insulation_type, housing_type, funding, ) # Can only be innovation GBIS measures From 3edf5549af4beb53c6a41b1d3191c3c1101f2489 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 7 Nov 2025 18:42:17 +0000 Subject: [PATCH 036/202] Implementing HHRSH upgrade EPC D projects for ECO4 --- backend/engine/engine.py | 23 --------- .../optimiser/funding_optimiser.py | 49 ++++++++++++++++++- 2 files changed, 48 insertions(+), 24 deletions(-) diff --git a/backend/engine/engine.py b/backend/engine/engine.py index fc620388..dafcf01e 100644 --- a/backend/engine/engine.py +++ b/backend/engine/engine.py @@ -915,22 +915,6 @@ async def model_engine(body: PlanTriggerRequest): work_package=eco_packages[p.id][2] ) - # Given the solutions we select the optimal one - # 1) If the scheme is ECO4, the full project funding and uplift are deducted from the cost - # 2) If the sheme is GBIS, the partial project funding and uplift are deducted from the cost - # 3) Otherwise, no funding is deducted from the cost - solutions["cost_less_full_project_funding"] = np.where( - solutions["scheme"] == "none", - solutions["total_cost"], - np.where( - solutions["scheme"] == "eco4", - solutions["total_cost"] - solutions["full_project_funding"] - solutions["total_uplift"], - solutions["total_cost"] - solutions["partial_project_funding"] - solutions["total_uplift"] - ) - ) - - solutions = solutions.sort_values("cost_less_full_project_funding", ascending=True) - # If the solution isn't eligible, we can't really consider it solutions = solutions[ (solutions["is_eligible"] & (solutions["scheme"] != "none")) | (solutions["scheme"] == "none") @@ -950,13 +934,6 @@ async def model_engine(body: PlanTriggerRequest): # default measures solution = deepcopy(optimal_solution["items"]) + deepcopy(optimal_solution["unfunded_items"]) funded_measures = deepcopy(optimal_solution["items"]) if scheme != "none" else [] - unfunded_measures = deepcopy(optimal_solution["unfunded_items"]) - # If we have an EPC D + HHRSH project, we move HHRSH out of funded measures - if eco_packages.get(p.id)[2] == "solar_hhrsh_eco4" and p.data["current-energy-rating"] == "D": - unfunded_measures.extend( - [x for x in funded_measures if x["type"] == "high_heat_retention_storage_heaters"] - ) - funded_measures = [x for x in funded_measures if x["type"] != "high_heat_retention_storage_heaters"] # This is the total amount of funding that the project will produce (EXCLUDING uplifts) (£) project_funding = optimal_solution["full_project_funding"] if scheme == "eco4" else \ diff --git a/recommendations/optimiser/funding_optimiser.py b/recommendations/optimiser/funding_optimiser.py index f9fbbfd6..4ac96f00 100644 --- a/recommendations/optimiser/funding_optimiser.py +++ b/recommendations/optimiser/funding_optimiser.py @@ -214,6 +214,30 @@ def _get_already_installed_gain(selected_measures, needs_pre_eco_hhrsh_upgrade): return sum([x["gain"] for x in selected_measures if x["already_installed"]]) +def _move_hhrsh_to_unfunded(picked, unfunded_picked, needs_pre_eco_hhrsh_upgrade): + """ + This function handles the case of moving HHRSH to unfunded picks if needed, where we have an ECO4 project + where an unfunded measure needs to be installed first. + :param picked: List of picked measures + :param unfunded_picked: List of unfunded picked measures + :param needs_pre_eco_hhrsh_upgrade: Boolean indicating if pre-ECO4 HHRSH upgrade is needed + :return: + """ + + if not needs_pre_eco_hhrsh_upgrade: + return picked, unfunded_picked + + # We append HHRSH to unfunded items + hhrsh_measure = [x for x in picked if x["type"] == "high_heat_retention_storage_heaters"] + if not hhrsh_measure: + raise ValueError("Expected HHRSH measure to be in total picks") + unfunded_picked += hhrsh_measure + # Remove from total picks + picked = [x for x in picked if x["type"] != "high_heat_retention_storage_heaters"] + + return picked, unfunded_picked + + def optimise_with_funding_paths( p, input_measures, housing_type, funding: Funding, budget=None, target_gain=None, work_package=None ): @@ -310,6 +334,8 @@ def optimise_with_funding_paths( already_installed_gain = _get_already_installed_gain( picked, needs_pre_eco_hhrsh_upgrade ) + # If we need a pre-eco4 HHRSH upgrade, we move HHRSH to unfunded items + picked, unfunded_picked = _move_hhrsh_to_unfunded(picked, [], needs_pre_eco_hhrsh_upgrade) solutions.append( { @@ -322,7 +348,7 @@ def optimise_with_funding_paths( "is_eligible": _is_eligible_funding_package( scheme, float(p.data["current-energy-efficiency"]), sub_gain ), - "unfunded_items": [], + "unfunded_items": unfunded_picked, "already_installed_gain": already_installed_gain } ) @@ -455,6 +481,11 @@ def optimise_with_funding_paths( total_picks, needs_pre_eco_hhrsh_upgrade ) + # If we need a pre-eco4 HHRSH upgrade, we move HHRSH to unfunded items + total_picks, unfunded_picked = _move_hhrsh_to_unfunded( + total_picks, unfunded_picked, needs_pre_eco_hhrsh_upgrade + ) + solutions.append({ "fixed_ids": fixed_ids, "items": total_picks, @@ -510,6 +541,22 @@ def optimise_with_funding_paths( solutions["total_uplift"] = solutions.apply(lambda x: get_total_uplift(x), axis=1) solutions["total_uplift_score"] = solutions.apply(lambda x: get_total_innovation_score(x), axis=1) + # Given the solutions we select the optimal one + # 1) If the scheme is ECO4, the full project funding and uplift are deducted from the cost + # 2) If the sheme is GBIS, the partial project funding and uplift are deducted from the cost + # 3) Otherwise, no funding is deducted from the cost + solutions["cost_less_full_project_funding"] = np.where( + solutions["scheme"] == "none", + solutions["total_cost"], + np.where( + solutions["scheme"] == "eco4", + solutions["total_cost"] - solutions["full_project_funding"] - solutions["total_uplift"], + solutions["total_cost"] - solutions["partial_project_funding"] - solutions["total_uplift"] + ) + ) + + solutions = solutions.sort_values("cost_less_full_project_funding", ascending=True) + return solutions From 19a766f442535f3a885128ff97d5de1603891396 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 7 Nov 2025 19:41:42 +0000 Subject: [PATCH 037/202] added restrictions on heating systems only for ESH, fixed bug in funding solutiosn --- backend/SearchEpc.py | 23 +++++++++++++--- backend/engine/engine.py | 26 ++++++++++++++++--- .../optimiser/funding_optimiser.py | 10 +++---- 3 files changed, 47 insertions(+), 12 deletions(-) diff --git a/backend/SearchEpc.py b/backend/SearchEpc.py index 60999e94..82899a81 100644 --- a/backend/SearchEpc.py +++ b/backend/SearchEpc.py @@ -156,6 +156,7 @@ class SearchEpc: size=None, property_type=None, fast=False, + heating_system: [str, None] = None ): """ Address lines 1 and postcode are mandatory fields. The other address lines are optional @@ -180,6 +181,9 @@ class SearchEpc: self.house_number = self.get_house_number(self.address1) self.numeric_house_number = self.extract_numeric_housenumber_part(self.house_number) + # property attributes + self.heating_system = heating_system + self.max_retries = max_retries if max_retries is not None else self.MAX_RETRIES self.client = EpcClient(auth_token=auth_token) @@ -571,7 +575,8 @@ class SearchEpc: lmks_to_drop: list[str] | None = None, built_form: str = "", property_type: str = "", - exclude_old: bool = False + exclude_old: bool = False, + heating_system: [str, None] = None ): """ Fetches and processes EPC data for a given initial postcode, applying successive trimming @@ -591,6 +596,7 @@ class SearchEpc: :param built_form: The 'built-form' value to be used for filtering the EPC data. :param property_type: The 'property-type' value to be used for filtering the EPC data. :param exclude_old: Flag to exclude EPC data older than 10 years. + :param heating_system: Optional heating system type for additional filtering. :return: """ @@ -703,6 +709,11 @@ class SearchEpc: epc_data["property-type"] == estimation_property_type) ] + if heating_system is not None: + epc_data = epc_data[ + epc_data["mainheat-description"] == heating_system + ] + if not epc_data.empty: return epc_data # Return the filtered data if it's not empty @@ -712,7 +723,7 @@ class SearchEpc: # If loop finishes without a valid response, raise an exception raise Exception("Unable to find postcode data after trimming - investigate me") - def estimate_epc(self, property_type, built_form, lmks_to_drop=None, exclude_old=False): + def estimate_epc(self, property_type, built_form, lmks_to_drop=None, exclude_old=False, heating_system=None): """ For a property that does not have an EPC, we retrieve the EPC data for the closest properties and estimate the EPC for the property in question. @@ -726,6 +737,8 @@ class SearchEpc: :param lmks_to_drop: This is a list of LMK keys that should be dropped from the estimation process. This is used as an override for testing, to drop EPCs for the property we are testing :param exclude_old: Used to drop any expired EPCs (more than 10 years old) + :param heating_system: The heating system of the property we are estimating, if known. Will aim to filter EPCs + to matching heating systems :return: """ @@ -736,7 +749,8 @@ class SearchEpc: lmks_to_drop=lmks_to_drop, built_form=built_form, property_type=property_type, - exclude_old=exclude_old + exclude_old=exclude_old, + heating_system=heating_system ) # Check if it's a new build EPC. A property that doesn't have an EPC is not going to be a new build @@ -906,7 +920,8 @@ class SearchEpc: # We can try and estimate estimated_epc = self.estimate_epc( property_type=self.ordnance_survey_client.property_type, - built_form=self.ordnance_survey_client.built_form + built_form=self.ordnance_survey_client.built_form, + heating_system=self.heating_system ) self.newest_epc = estimated_epc self.older_epcs = [] diff --git a/backend/engine/engine.py b/backend/engine/engine.py index dafcf01e..1a1e75b8 100644 --- a/backend/engine/engine.py +++ b/backend/engine/engine.py @@ -3,6 +3,7 @@ import json from copy import deepcopy from datetime import datetime +from sqlalchemy import Nullable from tqdm import tqdm import pandas as pd import numpy as np @@ -59,7 +60,7 @@ from recommendations.recommendation_utils import convert_thickness_to_numeric, g logger = setup_logger() BATCH_SIZE = 5 -SCORING_BATCH_SIZE = 100 +SCORING_BATCH_SIZE = 300 def extract_portfolio_aggregation_data( @@ -373,6 +374,24 @@ def get_funding_data(): return project_scores_matrix, partial_project_scores_matrix, whlg_eligible_postcodes +def parse_heating_system(config): + """ + Helper function to extract a heating system, which can be used to estimate EPC. This is a very limited, + placeholder function to cover some initial immediate cases. + :return: + """ + + ll_heating = config.get("landlord_heating_system", None) + if not ll_heating: + return None + + if ll_heating == "electric storage heaters": + # Return with the same format at the EPC + return "Electric storage heaters" + + return None + + async def model_engine(body: PlanTriggerRequest): logger.info("Model Engine triggered with body: %s", json.loads(body.model_dump_json())) @@ -502,8 +521,8 @@ async def model_engine(body: PlanTriggerRequest): address1 = config.get("domna_full_address", None) address1 = str(int(address1)) if isinstance(address1, float) else str(address1) - full_address = config["domna_full_address"] if body.file_format == "domna_asset_list" else None + heating_system = parse_heating_system(config) epc_searcher = SearchEpc( address1=address1, @@ -511,7 +530,8 @@ async def model_engine(body: PlanTriggerRequest): uprn=uprn, auth_token=get_settings().EPC_AUTH_TOKEN, os_api_key="", - full_address=full_address + full_address=full_address, + heating_system=heating_system ) epc_searcher.ordnance_survey_client.built_form = config.get("built_form", None) epc_searcher.ordnance_survey_client.property_type = config.get("property_type", None) diff --git a/recommendations/optimiser/funding_optimiser.py b/recommendations/optimiser/funding_optimiser.py index 4ac96f00..417363cd 100644 --- a/recommendations/optimiser/funding_optimiser.py +++ b/recommendations/optimiser/funding_optimiser.py @@ -9,6 +9,7 @@ In the future, we will adapt this into a class-based structure to allow for more from copy import deepcopy import pandas as pd +import numpy as np from backend.app.plan.schemas import ( WALL_INSULATION_MEASURES, ROOF_INSULATION_MEASURES, ECO4_ELIGIBILE_FABRIC_MEASURES @@ -401,9 +402,7 @@ def optimise_with_funding_paths( # If we have a budget, we need to ensure the subproblem respects it so we remove the fixed cost (which # may already be over budget) and the fixed gain (which may not be achievable) - if fixed_gain > target_gain: - picked, sub_cost, sub_gain = ([], 0.0, 0.0) - elif fixed_gain <= target_gain and not sub_measures: + if (fixed_gain > target_gain) or (fixed_gain <= target_gain and not sub_measures): picked, sub_cost, sub_gain = ([], 0.0, 0.0) else: picked, sub_cost, sub_gain = run_optimizer( @@ -412,8 +411,9 @@ def optimise_with_funding_paths( sub_target_gain=target_gain - fixed_gain if target_gain is not None else None ) - if picked is None: - continue + # if picked is None: + # # If we have something in sub_measures, then we have a partial solution, just not enough to + # continue scheme = _path_scheme(path_spec) From fa30ecf1f557179533ff38655261b26316f8f8e6 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 7 Nov 2025 23:33:30 +0000 Subject: [PATCH 038/202] handled failture case for descriptions to fuel types and added a note to fix this in the future --- backend/app/assumptions.py | 1 + recommendations/Recommendations.py | 6 +++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/backend/app/assumptions.py b/backend/app/assumptions.py index 37d9164e..0a7daf22 100644 --- a/backend/app/assumptions.py +++ b/backend/app/assumptions.py @@ -79,6 +79,7 @@ DESCRIPTIONS_TO_FUEL_TYPES = { "fuel": "Electricity", "cop": AVERAGE_ASHP_EFFICIENCY / 100 }, "Electric heat pump for water heating only": {"fuel": "Electricity", "cop": 1}, + "Ground source heat pump, warm air, electric": {"fueld": "Electricity", "cop": AVERAGE_ASHP_EFFICIENCY / 100}, } # These are the measure types where if there is a ventilation recommendation, we force the inclusion of it diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py index fa8fe256..4bdcd739 100644 --- a/recommendations/Recommendations.py +++ b/recommendations/Recommendations.py @@ -688,7 +688,11 @@ class Recommendations: } raise NotImplementedError("Handle this case") - mapped = descriptions_to_fuel_types[heating_description] + mapped = descriptions_to_fuel_types.get(heating_description, None) + if mapped is None: + # TODO: This is a non-ideal placeholder but we put something in place for a process that falls over + # fairly regularly. A task has been added to planner to refactor this + mapped = {"fuel": 'Unmapped', "cop": 0.9} heating_fuel = mapped["fuel"] if hotwater_description in [ From 0f2a064f4059727fae4b1fd4352c0914726b4c11 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 7 Nov 2025 23:54:30 +0000 Subject: [PATCH 039/202] fixed fuel type --- backend/app/assumptions.py | 2 +- recommendations/Recommendations.py | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/backend/app/assumptions.py b/backend/app/assumptions.py index 0a7daf22..66660e06 100644 --- a/backend/app/assumptions.py +++ b/backend/app/assumptions.py @@ -79,7 +79,7 @@ DESCRIPTIONS_TO_FUEL_TYPES = { "fuel": "Electricity", "cop": AVERAGE_ASHP_EFFICIENCY / 100 }, "Electric heat pump for water heating only": {"fuel": "Electricity", "cop": 1}, - "Ground source heat pump, warm air, electric": {"fueld": "Electricity", "cop": AVERAGE_ASHP_EFFICIENCY / 100}, + "Ground source heat pump, warm air, electric": {"fuel": "Electricity", "cop": AVERAGE_ASHP_EFFICIENCY / 100}, } # These are the measure types where if there is a ventilation recommendation, we force the inclusion of it diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py index 4bdcd739..f2dc5804 100644 --- a/recommendations/Recommendations.py +++ b/recommendations/Recommendations.py @@ -19,9 +19,12 @@ from backend.ml_models.AnnualBillSavings import AnnualBillSavings from backend.apis.GoogleSolarApi import GoogleSolarApi import backend.app.assumptions as assumptions from backend.app.plan.schemas import SPECIFIC_MEASURES, MEASURE_MAP, NON_INVASIVE_SPECIFIC_MEASURES +from utils.logger import setup_logger STARTING_DUMMY_ID_VALUE = -9999 +logger = setup_logger() + class Recommendations: """ @@ -692,7 +695,9 @@ class Recommendations: if mapped is None: # TODO: This is a non-ideal placeholder but we put something in place for a process that falls over # fairly regularly. A task has been added to planner to refactor this + logger.warning("Heating description not mapped: %s", heating_description) mapped = {"fuel": 'Unmapped', "cop": 0.9} + heating_fuel = mapped["fuel"] if hotwater_description in [ From 4151b58dea629f2bd5bc684330494e572bfe9853 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 10 Nov 2025 20:46:57 +0000 Subject: [PATCH 040/202] Fixing boiler upgrade recommendation --- backend/app/plan/router.py | 13 +--- etl/webscrape/Zoopla.py | 92 +++++++++++++++++++++++++-- recommendations/HeatingRecommender.py | 6 +- 3 files changed, 90 insertions(+), 21 deletions(-) diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index 8c502021..af57e35a 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -43,18 +43,6 @@ async def trigger_plan_entrypoint(body: PlanTriggerRequest): logger.error("Failed to parse request body: %s", e) return {"message": "Invalid request"}, 400 - # TODO: Warm up the lambdas here - # from backend.ml_models.api import ModelApi - # model_api = ModelApi( - # portfolio_id=body.portfolio_id, - # timestamp="2020-01-01T00:00:00", - # prediction_buckets=[], - # max_retries=1 - # ) - # await model_api.async_warm_up_lambdas( - # model_prefies=model_api.KWH_MODEL_PREFIXES + model_api.MODEL_PREFIXES - # ) - # If file_format is domna_asset_list and type is xlsx, read and chunk it if data.get("file_format") == "domna_asset_list" and data.get("file_type") == "xlsx": try: @@ -94,6 +82,7 @@ async def trigger_plan_entrypoint(body: PlanTriggerRequest): data["scenario_id"] = scenario_id for i in range(total_chunks): + # Create an entry in the request logs table index_start = i * chunk_size index_end = min((i + 1) * chunk_size, total_rows) diff --git a/etl/webscrape/Zoopla.py b/etl/webscrape/Zoopla.py index 2c446dc8..4c0443f1 100644 --- a/etl/webscrape/Zoopla.py +++ b/etl/webscrape/Zoopla.py @@ -6,6 +6,8 @@ import random import os from multiprocessing import Pool from tqdm import tqdm +import re +import json ENGINES = ["safari", "chrome"] CACHE_DIR = "zoopla_cache" @@ -17,13 +19,69 @@ def random_delay(): time.sleep(random.uniform(0.5, 2)) +def extract_feature(soup, icon_id): + tag = soup.find("use", href=f"#{icon_id}") + if tag: + parent = tag.find_parent("div", class_="_1pbf8i53") + if parent: + text = parent.get_text(strip=True) + return text + return None + + +def extract_embedded_json(text): + """ + Extract embedded property JSON containing attributes, energy, estimates, and sales history. + """ + # Try to grab everything after "attributes" + match = re.search( + r'"attributes"\s*:\s*\{.*?\}\s*,.*?"historicSales".*?\]', + text, + re.DOTALL + ) + if match: + snippet = "{" + match.group(0) + "}" + snippet = re.sub(r"\\u0022", '"', snippet) + snippet = re.sub(r",(\s*[}\]])", r"\1", snippet) + try: + return json.loads(snippet) + except json.JSONDecodeError: + pass + + # fallback for independent keys + result = {} + for key in [ + "attributes", "energy", "rentEstimate", + "saleEstimate", "saleHistory", "historicSales" + ]: + key_match = re.search(rf'"{key}"\s*:\s*(\{{.*?\}}|\[.*?\])', text, re.DOTALL) + if key_match: + try: + result[key] = json.loads(key_match.group(1)) + except Exception: + pass + return result + + def scrape_all_estimates(session, url): """Scrape valuation estimates for one Zoopla property URL.""" resp = session.get(url, impersonate=random.choice(ENGINES)) + html = resp.text page_source = BeautifulSoup(resp.text, "html.parser") estimates = page_source.find_all("div", {"data-testid": "sale-estimate"}) + + data = extract_embedded_json(html) + is_blocked = len(estimates) == 0 - return estimates, is_blocked, resp.text + + return { + "estimates": estimates, + "is_blocked": is_blocked, + "response_html": html, + "attributes": data.get("attributes"), + "rent": data.get("rentEstimate"), + "historicSales": data.get("historicSales"), + } def extract_estimates(estimates): @@ -50,19 +108,36 @@ def parallel_task(url): html = open(cache_path, "r").read() page_source = BeautifulSoup(html, "html.parser") estimates = page_source.find_all("div", {"data-testid": "sale-estimate"}) + data = extract_embedded_json(html) + history_sales = data.get("historicSales", [{}]) + if len(history_sales) == 0: + history_sales = [{}] + if estimates: low, mid, high = extract_estimates(estimates) - return {"URL": url, "Low Estimate": low, "Middle Estimate": mid, "High Estimate": high} + return { + "URL": url, "Low Estimate": low, "Middle Estimate": mid, "High Estimate": high, + **data.get("attributes", {}), **data.get("rentEstimate", {}), + **history_sales[0] + } # Otherwise scrape live with StealthSession() as session: attempts = 0 while attempts < 5: - estimates, is_blocked, html = scrape_all_estimates(session, url) - if not is_blocked and estimates: - open(cache_path, "w").write(html) - low, mid, high = extract_estimates(estimates) - return {"URL": url, "Low Estimate": low, "Middle Estimate": mid, "High Estimate": high} + output = scrape_all_estimates(session, url) + if not output["is_blocked"] and output["estimates"]: + open(cache_path, "w").write(output["html"]) + low, mid, high = extract_estimates(output["estimates"]) + history_sales = output.get("historicSales", [{}]) + if len(history_sales) == 0: + history_sales = [{}] + return { + "URL": url, "Low Estimate": low, "Middle Estimate": mid, "High Estimate": high, + **output.get("attributes", {}), + **output.get("rent", {}), + **history_sales[0] + } attempts += 1 print(f"[Attempt {attempts}] Blocked or empty for {url}") random_delay() @@ -108,6 +183,9 @@ if __name__ == "__main__": ) df = pd.DataFrame(estimates_list) + + print(df.head()) + df["uprn"] = df["URL"].str.extract(r"uprn/(\d+)/") df["valuation"] = df["Middle Estimate"].apply(parse_price) diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py index b8a1b5a7..d84a47b5 100644 --- a/recommendations/HeatingRecommender.py +++ b/recommendations/HeatingRecommender.py @@ -1257,9 +1257,11 @@ class HeatingRecommender: # If there is not a system change, we add the boiler recommendation at point. self.heating_recommendations.extend([boiler_recommendation]) - if system_change: + if system_change and len(boiler_recommendation): # We combine the heating and controls recommendations, in the case of a system change - # If this is true, we set SAP points to None and survey to False for the boiler recommendation + # If this is true, we set SAP points to None and survey to False for the boiler recommendation. + # We check if we actually have a boiler recommendation as we may not if the heating and hot water + # are already efficient enough combined_recommendations = [] for controls_recommendation in controls_recommender.recommendation: From cce58e0152e526f5637a239af02bae4327e3659b Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 10 Nov 2025 22:01:21 +0000 Subject: [PATCH 041/202] added implementation for associated uprn filtering --- backend/SearchEpc.py | 38 +++++++++-- backend/app/db/functions/address_functions.py | 64 +++++++++++++++++++ backend/app/db/models/addresses.py | 34 ++++++++++ backend/engine/engine.py | 15 ++++- 4 files changed, 142 insertions(+), 9 deletions(-) create mode 100644 backend/app/db/functions/address_functions.py create mode 100644 backend/app/db/models/addresses.py diff --git a/backend/SearchEpc.py b/backend/SearchEpc.py index 82899a81..eb2b0b23 100644 --- a/backend/SearchEpc.py +++ b/backend/SearchEpc.py @@ -156,7 +156,8 @@ class SearchEpc: size=None, property_type=None, fast=False, - heating_system: [str, None] = None + heating_system: [str, None] = None, + associated_uprns: [List[int] | None] = None ): """ Address lines 1 and postcode are mandatory fields. The other address lines are optional @@ -172,6 +173,11 @@ class SearchEpc: :param size: int, optional, the number of results to return. If not provided, defaults to 25 which is the api's default :param property_type: str, optional, the property type of the property, if known before hand + :param fast: bool, optional, if true, the extract_epc_data method will skip some processing to return + results faster + :param heating_system: str, optional, the heating system of the property, if known before hand + :param associated_uprns: list of int, optional, list of associated uprns for the property. E.g. other + units in a block of flats """ self.address1 = address1 @@ -180,6 +186,7 @@ class SearchEpc: self.uprn = uprn self.house_number = self.get_house_number(self.address1) self.numeric_house_number = self.extract_numeric_housenumber_part(self.house_number) + self.associated_uprns = associated_uprns if associated_uprns is not None else [] # property attributes self.heating_system = heating_system @@ -576,7 +583,8 @@ class SearchEpc: built_form: str = "", property_type: str = "", exclude_old: bool = False, - heating_system: [str, None] = None + heating_system: [str, None] = None, + associated_uprns: [List[int] | None] = None ): """ Fetches and processes EPC data for a given initial postcode, applying successive trimming @@ -597,9 +605,12 @@ class SearchEpc: :param property_type: The 'property-type' value to be used for filtering the EPC data. :param exclude_old: Flag to exclude EPC data older than 10 years. :param heating_system: Optional heating system type for additional filtering. + :param associated_uprns: Optional list of associated UPRNs for additional filtering. :return: """ + associated_uprns_to_apply = [] if associated_uprns is None else associated_uprns.copy() + property_type_api_map = { "Bungalow": "bungalow", "Flat": "flat", @@ -701,7 +712,16 @@ class SearchEpc: has_missing_built_form = not estimation_built_form - if is_maisonette_with_bad_built_form or is_park_home_without_built_form or has_missing_built_form: + # If we have associated UPRNS, we just filter as such, otherwise + # we filter with built form and property type + if any(str(x) in epc_data["uprn"].astype(str).values for x in associated_uprns_to_apply): + # We check at least one UPRN is in the data + epc_data = epc_data[epc_data["uprn"].isin(associated_uprns_to_apply)] + # After we run this, we empty associated_uprns_to_apply. + # That ensures we don't keep re-applying this filter if we shorten the postcode again + # since we'll keep ending up in the same results + associated_uprns_to_apply = [] + elif is_maisonette_with_bad_built_form or is_park_home_without_built_form or has_missing_built_form: epc_data = epc_data[epc_data["property-type"] == estimation_property_type] else: epc_data = epc_data[ @@ -723,7 +743,10 @@ class SearchEpc: # If loop finishes without a valid response, raise an exception raise Exception("Unable to find postcode data after trimming - investigate me") - def estimate_epc(self, property_type, built_form, lmks_to_drop=None, exclude_old=False, heating_system=None): + def estimate_epc( + self, property_type, built_form, lmks_to_drop=None, exclude_old=False, heating_system=None, + associated_uprns=None + ): """ For a property that does not have an EPC, we retrieve the EPC data for the closest properties and estimate the EPC for the property in question. @@ -739,6 +762,7 @@ class SearchEpc: :param exclude_old: Used to drop any expired EPCs (more than 10 years old) :param heating_system: The heating system of the property we are estimating, if known. Will aim to filter EPCs to matching heating systems + :param associated_uprns: List of associated UPRNs for the property. E.g. other units in a block of flats :return: """ @@ -750,7 +774,8 @@ class SearchEpc: built_form=built_form, property_type=property_type, exclude_old=exclude_old, - heating_system=heating_system + heating_system=heating_system, + associated_uprns=associated_uprns ) # Check if it's a new build EPC. A property that doesn't have an EPC is not going to be a new build @@ -921,7 +946,8 @@ class SearchEpc: estimated_epc = self.estimate_epc( property_type=self.ordnance_survey_client.property_type, built_form=self.ordnance_survey_client.built_form, - heating_system=self.heating_system + heating_system=self.heating_system, + associated_uprns=self.associated_uprns ) self.newest_epc = estimated_epc self.older_epcs = [] diff --git a/backend/app/db/functions/address_functions.py b/backend/app/db/functions/address_functions.py new file mode 100644 index 00000000..34dc48c7 --- /dev/null +++ b/backend/app/db/functions/address_functions.py @@ -0,0 +1,64 @@ +from sqlalchemy.orm import Session +from sqlalchemy.exc import SQLAlchemyError +from sqlalchemy import func +from backend.app.db.models.addresses import PostcodeSearch +from utils.logger import setup_logger + +logger = setup_logger() + + +def _get_associated_records(results, uprn, uprn_key="UPRN"): + matched_record = [] + for x in results: + if "DPA" in x: + if x["DPA"].get(uprn_key) == str(uprn): + matched_record.append(x["DPA"]) + else: + if x["LPI"].get(uprn_key) == str(uprn): + matched_record.append(x["LPI"]) + + return matched_record + + +def get_associated_uprns(session: Session, postcode: str, uprn: str): + """ + Given a postcode and UPRN, for a remote assessment, fetch all associated UPRNs, based + on parent UPRN. This will be properties in the same building + + Parent UPRN is referenced in the following docs: + https://static.geoplace.co.uk/downloads/GeoPlace-Data-Entry-Conventions-Best-Practice-for-Addresses.pdf + + :param session: The database session + :param postcode: The postcode string to search for + :param uprn: The UPRN string to match + :return: The matching PostcodeSearch record, or None if not found + """ + try: + + record = ( + session.query(PostcodeSearch) + .filter(func.upper(PostcodeSearch.postcode) == postcode) + .first() + ) + + matched_record = _get_associated_records(results=record.result_data["results"], uprn=uprn) + + if len(matched_record) != 1: + logger.error("Something went wrong, about to return nothing") + return [] + + if not matched_record[0].get("PARENT_UPRN"): + logger.info("No parent UPRN found, cannot get associated records") + return [] + + associated_records = _get_associated_records( + results=record.result_data["results"], uprn=matched_record[0]["PARENT_UPRN"], uprn_key="PARENT_UPRN" + ) + # We now fetch all UPRNS with the same parent UPRN + associated_uprns = [int(x["UPRN"]) for x in associated_records if x["UPRN"] != str(uprn)] + + return associated_uprns + + except SQLAlchemyError as e: + session.rollback() + raise e diff --git a/backend/app/db/models/addresses.py b/backend/app/db/models/addresses.py new file mode 100644 index 00000000..51e9540f --- /dev/null +++ b/backend/app/db/models/addresses.py @@ -0,0 +1,34 @@ +from sqlalchemy import ( + Column, + Integer, + String, + JSON, + TIMESTAMP, + func, + UniqueConstraint, +) +from sqlalchemy.orm import declarative_base + +Base = declarative_base() + + +class PostcodeSearch(Base): + __tablename__ = "postcode_search" + + id = Column(Integer, primary_key=True, autoincrement=True) + + # Normalized postcode (uppercase, no spaces) + postcode = Column(String, nullable=False, unique=True) + + # Full OS Places API response (stored as JSONB) + result_data = Column(JSON, nullable=False) + + # Timestamp for when the entry was first created + created_at = Column(TIMESTAMP(timezone=False), server_default=func.now(), nullable=False) + + __table_args__ = ( + UniqueConstraint("postcode", name="uq_postcode_search_postcode"), + ) + + def __repr__(self): + return f"" diff --git a/backend/engine/engine.py b/backend/engine/engine.py index 1a1e75b8..1cd379b9 100644 --- a/backend/engine/engine.py +++ b/backend/engine/engine.py @@ -1,9 +1,7 @@ -import ast import json from copy import deepcopy from datetime import datetime -from sqlalchemy import Nullable from tqdm import tqdm import pandas as pd import numpy as np @@ -26,6 +24,8 @@ from backend.app.db.functions.recommendations_functions import ( ) from backend.app.db.functions.funding_functions import upload_funding from backend.app.db.functions.energy_assessment_functions import get_latest_assessment_by_uprn +from backend.app.db.functions.address_functions import get_associated_uprns + from backend.app.db.models.portfolio import rating_lookup from backend.app.plan.schemas import PlanTriggerRequest, WALL_INSULATION_MEASURES, ROOF_INSULATION_MEASURES from backend.app.plan.utils import ( @@ -524,6 +524,14 @@ async def model_engine(body: PlanTriggerRequest): full_address = config["domna_full_address"] if body.file_format == "domna_asset_list" else None heating_system = parse_heating_system(config) + associated_uprns = [] + if (body.event_type == "remote_assessment") and config.get("property_type") == "Flat": + # We're running a remote assessment for a flat - we go and grab the associated + # UPRNS for other units in the same building + associated_uprns = get_associated_uprns( + session, postcode=config["postcode"], uprn=uprn + ) + epc_searcher = SearchEpc( address1=address1, postcode=config["postcode"], @@ -531,7 +539,8 @@ async def model_engine(body: PlanTriggerRequest): auth_token=get_settings().EPC_AUTH_TOKEN, os_api_key="", full_address=full_address, - heating_system=heating_system + heating_system=heating_system, + associated_uprns=associated_uprns ) epc_searcher.ordnance_survey_client.built_form = config.get("built_form", None) epc_searcher.ordnance_survey_client.property_type = config.get("property_type", None) From 0daa1592d7f218d73acab5e02d5b14906f85b6aa Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Tue, 11 Nov 2025 14:06:35 +0000 Subject: [PATCH 042/202] added lambda files --- .devcontainer/Dockerfile | 37 +++++++++ .devcontainer/devcontainer.json | 30 +++++++ .devcontainer/docker-compose.yml | 18 ++++ .devcontainer/post-install.sh | 27 ++++++ .../whlg_calculator/docker/.dockerignore | 21 +++++ .../lambda/whlg_calculator/docker/Dockerfile | 25 ++++++ .../lambda/whlg_calculator/docker/app.py | 3 + .../lambda/whlg_calculator/docker/ecr.tf | 63 ++++++++++++++ .../lambda/whlg_calculator/docker/main.tf | 0 .../lambda/whlg_calculator/docker/provider.tf | 15 ++++ deployment/lambda/whlg_calculator/main.tf | 0 deployment/lambda/whlg_calculator/provider.tf | 15 ++++ deployment/lambda/whlg_calculator/vars.tf | 5 ++ .../lambda/whlg_calculator/whlg_lambda.tf | 83 +++++++++++++++++++ 14 files changed, 342 insertions(+) create mode 100644 .devcontainer/Dockerfile create mode 100644 .devcontainer/devcontainer.json create mode 100644 .devcontainer/docker-compose.yml create mode 100644 .devcontainer/post-install.sh create mode 100644 deployment/lambda/whlg_calculator/docker/.dockerignore create mode 100644 deployment/lambda/whlg_calculator/docker/Dockerfile create mode 100644 deployment/lambda/whlg_calculator/docker/app.py create mode 100644 deployment/lambda/whlg_calculator/docker/ecr.tf create mode 100644 deployment/lambda/whlg_calculator/docker/main.tf create mode 100644 deployment/lambda/whlg_calculator/docker/provider.tf create mode 100644 deployment/lambda/whlg_calculator/main.tf create mode 100644 deployment/lambda/whlg_calculator/provider.tf create mode 100644 deployment/lambda/whlg_calculator/vars.tf create mode 100644 deployment/lambda/whlg_calculator/whlg_lambda.tf diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile new file mode 100644 index 00000000..4d898973 --- /dev/null +++ b/.devcontainer/Dockerfile @@ -0,0 +1,37 @@ +FROM python:3.12-bullseye + +ARG USER=vscode +ARG DEBIAN_FRONTEND=noninteractive + +# 1) Toolchain + utilities for building libpostal +RUN apt-get update && apt-get install -y --no-install-recommends \ + sudo jq vim curl git ca-certificates \ + build-essential pkg-config automake autoconf libtool \ + && rm -rf /var/lib/apt/lists/* + +# 2) Build and install libpostal from source +RUN git clone --depth 1 https://github.com/openvenues/libpostal /tmp/libpostal \ + && cd /tmp/libpostal \ + && ./bootstrap.sh \ + && ./configure --datadir=/usr/local/share/libpostal \ + && make -j"$(nproc)" \ + && make install \ + && ldconfig \ + && rm -rf /tmp/libpostal + +# 3) Create the user and grant sudo privileges +RUN useradd -m -s /usr/bin/bash ${USER} \ + && echo "${USER} ALL=(ALL) NOPASSWD: ALL" >/etc/sudoers.d/${USER} \ + && chmod 0440 /etc/sudoers.d/${USER} + +# 4) Python deps +ENV PIP_NO_CACHE_DIR=1 PIP_DISABLE_PIP_VERSION_CHECK=1 +ADD asset_list/requirements.txt requirements.txt +RUN pip install -r requirements.txt + +# 5) Workdir +WORKDIR /workspaces/model + +# 6) Make Python find your package +# Add project root to PYTHONPATH for all processes +ENV PYTHONPATH=/workspaces/model:${PYTHONPATH} diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json new file mode 100644 index 00000000..49bd6f83 --- /dev/null +++ b/.devcontainer/devcontainer.json @@ -0,0 +1,30 @@ +{ + "name": "Basic Python", + "dockerComposeFile": "docker-compose.yml", + "service": "model", + "remoteUser": "vscode", + "workspaceFolder": "/workspaces/model", + "postStartCommand": "bash .devcontainer/post-install.sh", + "mounts": [ + // Optional, just makes getting from Downloads (local env) easier + "source=${localEnv:HOME},target=/workspaces/home,type=bind" + ], + "customizations": { + "vscode": { + "settings": { + "files.defaultWorkspace": "/workspaces/model" + }, + "extensions": [ + "ms-python.python", + "ms-toolsai.jupyter", + "mechatroner.rainbow-csv", + "ms-toolsai.datawrangler", + "lindacong.vscode-book-reader", + "4ops.terraform", + "fabiospampinato.vscode-todo-plus", + "jgclark.vscode-todo-highlight", + "corentinartaud.pdfpreview" + ] + } + } +} diff --git a/.devcontainer/docker-compose.yml b/.devcontainer/docker-compose.yml new file mode 100644 index 00000000..7f60d34d --- /dev/null +++ b/.devcontainer/docker-compose.yml @@ -0,0 +1,18 @@ +version: '3.8' + +services: + model: + user: "${UID}:${GID}" + build: + context: .. + dockerfile: .devcontainer/Dockerfile + command: sleep infinity + volumes: + - ..:/workspaces/model + networks: + - model-net + +networks: + model-net: + driver: bridge + diff --git a/.devcontainer/post-install.sh b/.devcontainer/post-install.sh new file mode 100644 index 00000000..d9fc3a9e --- /dev/null +++ b/.devcontainer/post-install.sh @@ -0,0 +1,27 @@ +# #!/bin/bash +# poetry install; + +# # Get the Poetry virtual environment path +# VENV_PATH=$(poetry env info --path 2>/dev/null) + +# if [ -z "$VENV_PATH" ]; then +# echo "No Poetry environment found. Did you run 'poetry install'?" +# exit 1 +# fi + +# # Ensure VS Code settings directory exists +# SETTINGS_DIR="/home/vscode/.vscode-server/data/Machine" +# SETTINGS_FILE="$SETTINGS_DIR/settings.json" + +# mkdir -p "$SETTINGS_DIR" + +# # If settings.json doesn't exist, create a default one +# if [ ! -f "$SETTINGS_FILE" ]; then +# echo "{}" > "$SETTINGS_FILE" +# fi + +# # Update VS Code settings to use the Poetry virtual environment +# jq --arg venv "$VENV_PATH/bin/python" '.["python.defaultInterpreterPath"] = $venv' \ +# "$SETTINGS_FILE" > "$SETTINGS_FILE.tmp" && mv "$SETTINGS_FILE.tmp" "$SETTINGS_FILE" + +# echo "✅ Updated VS Code to use Poetry environment: $VENV_PATH" diff --git a/deployment/lambda/whlg_calculator/docker/.dockerignore b/deployment/lambda/whlg_calculator/docker/.dockerignore new file mode 100644 index 00000000..d587d341 --- /dev/null +++ b/deployment/lambda/whlg_calculator/docker/.dockerignore @@ -0,0 +1,21 @@ +# Ignore junk and large files +*.pdf +*.csv +*.xml +*.parquet +*.ipynb +*.mp4 +*.mov +*.jpg +*.png +*.zip +*.tar.gz +__pycache__/ +*.pyc +*.pyo +*.pyd +build/ +dist/ +.etl_cache/ +tests/ +docs/ diff --git a/deployment/lambda/whlg_calculator/docker/Dockerfile b/deployment/lambda/whlg_calculator/docker/Dockerfile new file mode 100644 index 00000000..cdd1f8a3 --- /dev/null +++ b/deployment/lambda/whlg_calculator/docker/Dockerfile @@ -0,0 +1,25 @@ +FROM public.ecr.aws/lambda/python:3.12 + +# Install Poetry (you could pin a version if you like) +RUN curl -sSL https://install.python-poetry.org | python3 - + +# Add Poetry to PATH +ENV PATH="/root/.local/bin:$PATH" + +# Set working directory +WORKDIR /var/task + +# Copy Poetry files first to leverage Docker layer caching +COPY pyproject.toml poetry.lock README.md ./ +COPY etl/ etl/ + + +# Install dependencies into /var/task +RUN poetry config virtualenvs.create false \ + && poetry install --only main --no-interaction --no-ansi + +# Copy app code +COPY deployment/lambda/extractor_and_loader/docker/app.py ./ + +# Set Lambda handler +CMD ["app.handler"] \ No newline at end of file diff --git a/deployment/lambda/whlg_calculator/docker/app.py b/deployment/lambda/whlg_calculator/docker/app.py new file mode 100644 index 00000000..4dcf1a8e --- /dev/null +++ b/deployment/lambda/whlg_calculator/docker/app.py @@ -0,0 +1,3 @@ +def handler(event, context): + print("Hello and welcome to the WHLG Calculator") + print("Please contact the tech team for implementation") \ No newline at end of file diff --git a/deployment/lambda/whlg_calculator/docker/ecr.tf b/deployment/lambda/whlg_calculator/docker/ecr.tf new file mode 100644 index 00000000..a1501dff --- /dev/null +++ b/deployment/lambda/whlg_calculator/docker/ecr.tf @@ -0,0 +1,63 @@ +# ECR repo +resource "aws_ecr_repository" "whlg_calc_adhoc_ecr" { + name = "whlg_calc_adhoc_ecr" +} + +# ECR policy to allow Lambda access +resource "aws_ecr_repository_policy" "whlg_calc_adhoc_ecr_access" { + repository = aws_ecr_repository.whlg_calc_adhoc_ecr.name + + policy = jsonencode({ + Version = "2008-10-17", + Statement = [{ + Sid = "AllowLambdaPull", + Effect = "Allow", + Principal = { + Service = "lambda.amazonaws.com" + }, + Action = [ + "ecr:GetDownloadUrlForLayer", + "ecr:BatchGetImage", + "ecr:BatchCheckLayerAvailability" + ] + }] + }) +} + + + +# ECR lifecycle policy to delete tagged images older than 14 days +resource "aws_ecr_lifecycle_policy" "whlg_calc_adhoc_loader_lifecycle" { + repository = aws_ecr_repository.whlg_calc_adhoc_ecr.name + + policy = jsonencode({ + "rules": [ + { + "rulePriority": 2, + "description": "Expire images older than 14 days", + "selection": { + "tagStatus": "untagged", + "countType": "sinceImagePushed", + "countUnit": "days", + "countNumber": 1 + }, + "action": { + "type": "expire" + } + }, + { + "rulePriority": 1, + "description": "Keep last 5 images", + "selection": { + "tagStatus": "tagged", + "tagPrefixList": ["feature"], + "countType": "imageCountMoreThan", + "countNumber": 5 + }, + "action": { + "type": "expire" + } + } + ] + }) +} \ No newline at end of file diff --git a/deployment/lambda/whlg_calculator/docker/main.tf b/deployment/lambda/whlg_calculator/docker/main.tf new file mode 100644 index 00000000..e69de29b diff --git a/deployment/lambda/whlg_calculator/docker/provider.tf b/deployment/lambda/whlg_calculator/docker/provider.tf new file mode 100644 index 00000000..5f0fef0f --- /dev/null +++ b/deployment/lambda/whlg_calculator/docker/provider.tf @@ -0,0 +1,15 @@ +terraform { + required_providers { + aws = { + source = "hashicorp/aws" + version = "~> 6.3.0" + } + } + backend "s3" { + bucket = "whlg-calc-tf-state" + region = "eu-west-2" + key = "env:/dev/lambda/ecr/whlg-calc.tfstate" + } + + required_version = ">= 1.2.0" +} diff --git a/deployment/lambda/whlg_calculator/main.tf b/deployment/lambda/whlg_calculator/main.tf new file mode 100644 index 00000000..e69de29b diff --git a/deployment/lambda/whlg_calculator/provider.tf b/deployment/lambda/whlg_calculator/provider.tf new file mode 100644 index 00000000..df9abf1c --- /dev/null +++ b/deployment/lambda/whlg_calculator/provider.tf @@ -0,0 +1,15 @@ +terraform { + required_providers { + aws = { + source = "hashicorp/aws" + version = "~> 6.3.0" + } + } + backend "s3" { + bucket = "whlg-calc-tf-state" + region = "eu-west-2" + key = "env:/dev/lambda/eachlambda/whlg_calc_lambda.tfstate" + } + + required_version = ">= 1.2.0" +} diff --git a/deployment/lambda/whlg_calculator/vars.tf b/deployment/lambda/whlg_calculator/vars.tf new file mode 100644 index 00000000..ecdf359d --- /dev/null +++ b/deployment/lambda/whlg_calculator/vars.tf @@ -0,0 +1,5 @@ +variable "lambda_image_tag" { + description = "Docker image tag (e.g. GitHub SHA)" + type = string + default = "local-dev-latest" +} \ No newline at end of file diff --git a/deployment/lambda/whlg_calculator/whlg_lambda.tf b/deployment/lambda/whlg_calculator/whlg_lambda.tf new file mode 100644 index 00000000..0a5433a9 --- /dev/null +++ b/deployment/lambda/whlg_calculator/whlg_lambda.tf @@ -0,0 +1,83 @@ +# Reference existing IAM role +data "aws_iam_role" "lambda_exec_role" { + name = "lambda-exec-role" +} + +# Reference existing ECR repository +data "aws_ecr_repository" "whlg_calc_adhoc_ecr" { + name = "whlg_calc_adhoc_ecr" +} + +# SQS queue +resource "aws_sqs_queue" "whlg_calc_adhoc_queue" { + name = "whlg_calc_adhoc-queue" + visibility_timeout_seconds = 1800 # 30 minutes (>= 300s and ~6x Lambda timeout) +} + + +# Custom IAM policy specific to lambda_example +resource "aws_iam_policy" "whlg_calc_adhoc_policy" { + name = "walthamforest_adhoc_policy_lambda" + + policy = jsonencode({ + Version = "2012-10-17", + Statement = [ + { + Effect = "Allow", + Action = [ + "sqs:ReceiveMessage", + "sqs:DeleteMessage", + "sqs:GetQueueAttributes", + "sqs:GetQueueUrl", + "sqs:ChangeMessageVisibility" + ], + Resource = aws_sqs_queue.whlg_calc_adhoc_queue.arn + }, + { + Effect = "Allow", + Action = [ + "ecr:GetDownloadUrlForLayer", + "ecr:BatchGetImage", + "ecr:BatchCheckLayerAvailability" + ], + Resource = data.aws_ecr_repository.whlg_calc_adhoc_ecr.arn + }, + { + Effect = "Allow", + Action = ["ecr:GetAuthorizationToken"], + Resource = "*" + } + ] + }) +} + +resource "aws_iam_role_policy_attachment" "whlg_calc_adhoc_policy_attach" { + role = data.aws_iam_role.lambda_exec_role.name + policy_arn = aws_iam_policy.whlg_calc_adhoc_policy.arn +} + +# Lambda function +resource "aws_lambda_function" "whlg_calc_adhoc" { + function_name = "whlg_calc_adhoc" + role = data.aws_iam_role.lambda_exec_role.arn + package_type = "Image" + image_uri = "${data.aws_ecr_repository.whlg_calc_adhoc_ecr.repository_url}:${var.lambda_image_tag}" + # Increase timeout (max 900 sec / 15 min) + # timeout = 300 # e.g. 5 minutes + + # Increase memory (default 128 MB) + memory_size = 2048 # try 1024 or 2048 MB to start + + # environment { + # variables = { + # DATABASE_URL = "postgresql://postgres:makingwarmhomes@terraform-20250331175522503500000002.cdgzupxvdyp0.eu-west-2.rds.amazonaws.com:5432/surveyDB" + # } + # } +} + +# SQS trigger +resource "aws_lambda_event_source_mapping" "whlg_calc_adhoc_trigger" { + event_source_arn = aws_sqs_queue.whlg_calc_adhoc_queue.arn + function_name = aws_lambda_function.whlg_calc_adhoc.arn + batch_size = 1 +} From 91db4fb86c24e1a76fcdfe9af002011794644a7c Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Tue, 11 Nov 2025 14:29:03 +0000 Subject: [PATCH 043/202] lambda --- .../actions/lambda-deploy/action.yml | 86 +++++++++++++++++++ .../actions/terraform-deploy/action.yml | 55 ++++++++++++ .github/workflows/lambda_main.yml | 33 +++++++ 3 files changed, 174 insertions(+) create mode 100644 .github/workflows/actions/lambda-deploy/action.yml create mode 100644 .github/workflows/actions/terraform-deploy/action.yml create mode 100644 .github/workflows/lambda_main.yml diff --git a/.github/workflows/actions/lambda-deploy/action.yml b/.github/workflows/actions/lambda-deploy/action.yml new file mode 100644 index 00000000..3ca0fc8d --- /dev/null +++ b/.github/workflows/actions/lambda-deploy/action.yml @@ -0,0 +1,86 @@ +name: "Build and Push Lambda Image to ECR" +description: "Reusable action for building and pushing lambda Docker image to ECR" + +inputs: + ecr_name: + description: "Lambda name / ECR repo name" + required: true + dockerfile_path: + description: "Path to Dockerfile" + required: true + ecr_tf_dir: + description: "Path to ECR terraform directory" + required: true + lambda_tf_dir: + description: "Path to Lambda terraform directory" + required: true + aws-access-key-id: + description: "AWS access key" + required: true + aws-secret-access-key: + description: "AWS secret key" + required: true + aws-region: + description: "AWS region" + required: true + git-sha: + description: "Git commit SHA" + required: true + git-ref: + description: "Git ref name" + required: true + +runs: + using: "composite" + steps: + - uses: actions/checkout@v4 + + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@v4 + with: + aws-access-key-id: ${{ inputs.aws-access-key-id }} + aws-secret-access-key: ${{ inputs.aws-secret-access-key }} + aws-region: ${{ inputs.aws-region }} + + - name: Log in to Amazon ECR + id: login-ecr + uses: aws-actions/amazon-ecr-login@v2 + + - name: Deploy ECR + uses: ./.github/workflows/actions/terraform-deploy + with: + working_directory: ${{ inputs.ecr_tf_dir }} + aws-access-key-id: ${{ inputs.aws-access-key-id }} + aws-secret-access-key: ${{ inputs.aws-secret-access-key }} + aws-region: ${{ inputs.aws-region }} + - name: Set Docker image tag + id: set_tag + shell: bash + run: | + SHORT_SHA=$(echo "${{ inputs.git-sha }}" | cut -c1-7) + BRANCH=$(echo "${{ inputs.git-ref }}" | tr '/' '-') + TAG="${BRANCH}-${SHORT_SHA}" + echo "IMAGE_TAG=${TAG}" >> $GITHUB_ENV + echo "tag=$TAG" >> $GITHUB_OUTPUT + + - name: Build and push Docker image + shell: bash + run: | + IMAGE_URI=${{ steps.login-ecr.outputs.registry }}/${{ inputs.ecr_name }}:${{ steps.set_tag.outputs.tag }} + echo "Building Docker image for ${{ inputs.ecr_name }}..." + docker build -t $IMAGE_URI -f ${{ inputs.dockerfile_path }} . + + echo "Pushing to ECR..." + docker push $IMAGE_URI + + - name: Deploy Lambda + uses: ./.github/workflows/actions/terraform-deploy + with: + working_directory: ${{ inputs.lambda_tf_dir }} + aws-access-key-id: ${{ inputs.aws-access-key-id }} + aws-secret-access-key: ${{ inputs.aws-secret-access-key }} + aws-region: ${{ inputs.aws-region }} + lambda-image-tag: ${{ steps.set_tag.outputs.tag }} + + + diff --git a/.github/workflows/actions/terraform-deploy/action.yml b/.github/workflows/actions/terraform-deploy/action.yml new file mode 100644 index 00000000..56133299 --- /dev/null +++ b/.github/workflows/actions/terraform-deploy/action.yml @@ -0,0 +1,55 @@ +name: "Terraform Plan Shared Config" +description: "Plans shared Terraform config for Lambdas" + +inputs: + working_directory: + description: "Directory containing Terraform config" + required: true + aws-access-key-id: + description: "AWS access key" + required: true + aws-secret-access-key: + description: "AWS secret key" + required: true + aws-region: + description: "AWS region" + required: true + lambda-image-tag: + description: "Tag of the Lambda image (e.g., GitHub SHA)" + required: false + +runs: + using: "composite" + steps: + - uses: actions/checkout@v4 + + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@v4 + with: + aws-access-key-id: ${{ inputs.aws-access-key-id }} + aws-secret-access-key: ${{ inputs.aws-secret-access-key }} + aws-region: ${{ inputs.aws-region }} + + - name: Setup Terraform + uses: hashicorp/setup-terraform@v3 + + - name: Terraform Init + working-directory: ${{ inputs.working_directory }} + shell: bash + run: terraform init -reconfigure + + - name: Terraform Plan + working-directory: ${{ inputs.working_directory }} + shell: bash + run: | + if [ -n "${{ inputs.lambda-image-tag }}" ]; then + terraform plan -out=tfplan -var="lambda_image_tag=${{ inputs.lambda-image-tag }}" + else + terraform plan -out=tfplan + fi + + - name: Terraform Apply + working-directory: ${{ inputs.working_directory }} + shell: bash + run: terraform apply -auto-approve tfplan + diff --git a/.github/workflows/lambda_main.yml b/.github/workflows/lambda_main.yml new file mode 100644 index 00000000..73645ac5 --- /dev/null +++ b/.github/workflows/lambda_main.yml @@ -0,0 +1,33 @@ +# Please note, this github work flows assumes that shared-terrform is deployed in aws env +# The shared-terraform files lives in https://github.com/Hestia-Homes/survey-extraction/tree/main/deployment/lambda/lambda_shared + +name: Deploy Lambdas +on: + push: + branches: [main, feautre/whlg_lambda] + +env: + AWS_REGION: eu-west-2 + +jobs: + whlg-calc: + runs-on: ubuntu-latest + permissions: + id-token: write + contents: read + + steps: + - name: Checkout repo + uses: actions/checkout@v4 + - name: Build and deploy Warm Homes Local Grant Calc (whlg-calc) + uses: ./.github/workflows/actions/lambda-deploy + with: + ecr_name: whlg_calc_adhoc_ecr + dockerfile_path: ./deployment/lambda/whlg_calculator/docker/Dockerfile + ecr_tf_dir: ./deployment/lambda/whlg_calculator/docker/ + lambda_tf_dir: ./deployment/lambda/whlg_calculator/ + aws-access-key-id: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY}} + aws-region: eu-west-2 + git-sha: ${{ github.sha }} + git-ref: ${{ github.ref_name }} \ No newline at end of file From f10473cbf3952ac76ab3e30b977aa290bb2b20ff Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Tue, 11 Nov 2025 14:30:11 +0000 Subject: [PATCH 044/202] wrong branch --- .github/workflows/lambda_main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/lambda_main.yml b/.github/workflows/lambda_main.yml index 73645ac5..960adbe5 100644 --- a/.github/workflows/lambda_main.yml +++ b/.github/workflows/lambda_main.yml @@ -4,7 +4,7 @@ name: Deploy Lambdas on: push: - branches: [main, feautre/whlg_lambda] + branches: [main, feature/whlg_lambda] env: AWS_REGION: eu-west-2 From 91276919be422224e865d6c2a3121a7cb79080ad Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 13 Nov 2025 13:50:38 +0000 Subject: [PATCH 045/202] Adding template for new routes --- backend/app/whlg/__init__.py | 0 backend/app/whlg/route.py | 47 ++ backend/app/whlg/schema.py | 0 backend/tests/test_integration.py | 1061 ++++++++++++++--------------- 4 files changed, 577 insertions(+), 531 deletions(-) create mode 100644 backend/app/whlg/__init__.py create mode 100644 backend/app/whlg/route.py create mode 100644 backend/app/whlg/schema.py diff --git a/backend/app/whlg/__init__.py b/backend/app/whlg/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/backend/app/whlg/route.py b/backend/app/whlg/route.py new file mode 100644 index 00000000..21d417c5 --- /dev/null +++ b/backend/app/whlg/route.py @@ -0,0 +1,47 @@ +import boto3 +import json +import math +import asyncio +import random + +from datetime import datetime + +from fastapi import APIRouter, Depends +from backend.app.dependencies import validate_token +from backend.app.plan.schemas import PlanTriggerRequest +from backend.app.config import get_settings +from sqlalchemy.orm import sessionmaker +from utils.logger import setup_logger +from backend.app.db.connection import db_engine + +from backend.app.db.functions.recommendations_functions import create_scenario + +logger = setup_logger() + +router = APIRouter( + prefix="/whlg", + tags=["whlg"], + dependencies=[Depends(validate_token)], + responses={404: {"description": "Not found"}} +) + + +@router.post("/") +async def whlg_entrypoint(body): + # body needs to include postcode, UPRN [task ID?] + # + # Refer to the plan trigger route for code + # 1) Create an event schema and store it in the schemas file + # 2) Build the tasks functions + # 3) Read in the funding csx. This can be found as such: + # whlg_eligible_postcodes = read_csv_from_s3( + # bucket_name=get_settings().DATA_BUCKET, + # filepath="funding/whlg eligible postcodes.csv", + # ) + # whlg_eligible_postcodes = pd.DataFrame(whlg_eligible_postcodes) + # Check the postcode against this file + # We need to store this somewhere????!!!??!??!?!?!?!??!??!??!??!??!??!??!??!??!??! Create a new table! + # Update subtask to be complete + # Once this is complete, build the logs stuff, add the cloudwatch logs ID to the database + + print("We're gonna do stuff!") diff --git a/backend/app/whlg/schema.py b/backend/app/whlg/schema.py new file mode 100644 index 00000000..e69de29b diff --git a/backend/tests/test_integration.py b/backend/tests/test_integration.py index 60778132..1ba80223 100644 --- a/backend/tests/test_integration.py +++ b/backend/tests/test_integration.py @@ -1,532 +1,531 @@ -# import ast -# import json -# from copy import deepcopy -# from dataclasses import replace -# from datetime import datetime -# -# import random -# from tqdm import tqdm -# import pandas as pd -# import numpy as np -# from etl.epc.Record import EPCRecord -# from backend.SearchEpc import SearchEpc -# from sqlalchemy.exc import IntegrityError, OperationalError -# from sqlalchemy.orm import sessionmaker -# from starlette.responses import Response -# -# from backend.app.config import get_settings, get_prediction_buckets -# from backend.app.db.connection import db_engine -# from backend.app.db.functions.materials_functions import get_materials -# from backend.app.db.functions.portfolio_functions import aggregate_portfolio_recommendations -# from backend.app.db.functions.property_functions import ( -# create_property, create_property_details_epc, create_property_targets, update_property_data, -# update_or_create_property_spatial_details -# ) -# from backend.app.db.functions.recommendations_functions import ( -# create_plan, upload_recommendations, create_scenario -# ) -# from backend.app.db.functions.funding_functions import upload_funding -# from backend.app.db.functions.energy_assessment_functions import get_latest_assessment_by_uprn -# from backend.app.db.models.portfolio import rating_lookup -# from backend.app.plan.schemas import PlanTriggerRequest, WALL_INSULATION_MEASURES, ROOF_INSULATION_MEASURES -# from backend.app.plan.utils import get_cleaned -# from backend.app.utils import sap_to_epc -# import backend.app.assumptions as assumptions -# -# from backend.ml_models.api import ModelApi -# from backend.Property import Property -# from backend.apis.GoogleSolarApi import GoogleSolarApi -# -# from recommendations.optimiser.CostOptimiser import CostOptimiser -# from recommendations.optimiser.GainOptimiser import GainOptimiser -# import recommendations.optimiser.optimiser_functions as optimiser_functions -# from recommendations.Recommendations import Recommendations -# from utils.logger import setup_logger -# from utils.s3 import read_dataframe_from_s3_parquet, read_csv_from_s3, read_excel_from_s3 -# from backend.ml_models.Valuation import PropertyValuation -# -# from etl.bill_savings.KwhData import KwhData -# from etl.spatial.OpenUprnClient import OpenUprnClient -# from etl.find_my_epc.RetrieveFindMyEpc import RetrieveFindMyEpc -# -# from backend.Funding import Funding -# from recommendations.optimiser.funding_optimiser import optimise_with_funding_paths -# from recommendations.recommendation_utils import convert_thickness_to_numeric, get_wall_u_value -# -# # Input data (temp) -# import pickle -# -# import pandas as pd -# -# with open("local_data_for_deletion.pkl", 'rb') as f: -# local_data = pickle.load(f) -# -# cleaning_data = local_data["cleaning_data"] -# materials = local_data["materials"] -# cleaned = local_data["cleaned"] -# project_scores_matrix = local_data["project_scores_matrix"] -# partial_project_scores_matrix = local_data["partial_project_scores_matrix"] -# whlg_eligible_postcodes = local_data["whlg_eligible_postcodes"] -# -# with open("kwh_client_for_deletion.pkl", "rb") as f: -# kwh_client = pickle.load(f) -# -# epc_data = pd.read_csv( -# "/Users/khalimconn-kowlessar/Downloads/all-domestic-certificates/domestic-E06000002-Middlesbrough/certificates -# .csv", -# low_memory=False -# ) -# -# # TODO: Store this for cleaning -# costs_by_floor_area = epc_data[ -# pd.to_datetime(epc_data["LODGEMENT_DATE"]) >= "2024-01-01" -# ][["TOTAL_FLOOR_AREA", "CURRENT_ENERGY_EFFICIENCY", "LIGHTING_COST_CURRENT", "HEATING_COST_CURRENT", -# "HOT_WATER_COST_CURRENT"]].copy() -# -# costs_by_floor_area.columns = [c.lower().replace("_", "-") for c in costs_by_floor_area.columns] -# for c in ["lighting-cost-current", "heating-cost-current", "hot-water-cost-current"]: -# costs_by_floor_area[c + "_scaled"] = costs_by_floor_area[c] / costs_by_floor_area["total-floor-area"] -# -# costs_by_floor_area = costs_by_floor_area.groupby("current-energy-efficiency")[ -# ["lighting-cost-current_scaled", "heating-cost-current_scaled", "hot-water-cost-current_scaled"] -# ].mean().reset_index() -# -# sample_epc_data = epc_data[pd.to_datetime(epc_data["LODGEMENT_DATE"]) >= "2015-01-01"].drop_duplicates("UPRN").sample( -# 1000).reset_index(drop=True) -# -# # TODO: In Property find_energy_sources, sort out biomass community heating - what fuel type -# # TODO: We might be able to remove find_energy_sources entirely and remove estimate_electrical_consumption. It's used -# # in the google solar api but is it really needed? I don't think it's super accurate. It might be better to -# # just use an average energy consumption by floor area for UK households? -# # Load the input properties -# input_properties = [] -# for row_id, config in tqdm(sample_epc_data.iterrows(), total=len(sample_epc_data)): -# epc = { -# k.lower().replace("_", "-"): v if not pd.isnull(v) else None for k, v in config.items() -# } -# # Avoid the data load inside of EPCRecord - something we should pull out -# for x in ["number-habitable-rooms", "floor-height", "number-heated-rooms"]: -# if pd.isnull(epc[x]): -# if x == "floor-height": -# epc[x] = 2.4 -# if x == "number-habitable-rooms": -# epc[x] = 3 -# if x == "number-heated-rooms": -# epc[x] = 3 -# -# epc_records = {'original_epc': epc, 'full_sap_epc': {}, 'old_data': []} -# -# prepared_epc = EPCRecord( -# epc_records=epc_records, -# run_mode="newdata", -# cleaning_data=cleaning_data, -# ) -# -# input_properties.append( -# Property( -# id=row_id, -# is_new=True, -# address=epc["address"], -# postcode=epc["postcode"], -# epc_record=prepared_epc, -# already_installed={}, -# property_valuation={}, -# non_invasive_recommendations=[], -# energy_assessment=None, -# **Property.extract_kwargs(config), # TODO: Depraecate this -# ) -# ) -# -# # For each property, insert the default solar configuration -# for p in tqdm(input_properties): -# solar_api = GoogleSolarApi( -# api_key=None, solar_materials=[m for m in materials if m["type"] == "solar_pv"], max_retries=5 -# ) -# panel_performance = solar_api.default_panel_performance(property_instance=p) -# p.set_solar_panel_configuration( -# solar_panel_configuration={ -# "insights_data": None, "panel_performance": panel_performance, "unit_share_of_energy": 1 -# }, -# ) -# -# # We mock kwh preds -# mocked_kwh_predictions = {"heating_kwh_predictions": [], "hotwater_kwh_predictions": []} -# for p in tqdm(input_properties): -# mocked_kwh_predictions["heating_kwh_predictions"].append({ -# "id": p.uprn, "predictions": random.sample(range(100, 3000), 1)[0] -# }) -# mocked_kwh_predictions["hotwater_kwh_predictions"].append({ -# "id": p.uprn, "predictions": random.sample(range(100, 3000), 1)[0] -# }) -# mocked_kwh_predictions["heating_kwh_predictions"] = pd.DataFrame(mocked_kwh_predictions["heating_kwh_predictions"]) -# mocked_kwh_predictions["hotwater_kwh_predictions"] = pd.DataFrame(mocked_kwh_predictions["hotwater_kwh_predictions"]) -# -# # TODO: We might want to implement this generally, via an ETL process +import ast +import json +from copy import deepcopy +from dataclasses import replace +from datetime import datetime + +import random +from tqdm import tqdm +import pandas as pd +import numpy as np +from etl.epc.Record import EPCRecord +from backend.SearchEpc import SearchEpc +from sqlalchemy.exc import IntegrityError, OperationalError +from sqlalchemy.orm import sessionmaker +from starlette.responses import Response + +from backend.app.config import get_settings, get_prediction_buckets +from backend.app.db.connection import db_engine +from backend.app.db.functions.materials_functions import get_materials +from backend.app.db.functions.portfolio_functions import aggregate_portfolio_recommendations +from backend.app.db.functions.property_functions import ( + create_property, create_property_details_epc, create_property_targets, update_property_data, + update_or_create_property_spatial_details +) +from backend.app.db.functions.recommendations_functions import ( + create_plan, upload_recommendations, create_scenario +) +from backend.app.db.functions.funding_functions import upload_funding +from backend.app.db.functions.energy_assessment_functions import get_latest_assessment_by_uprn +from backend.app.db.models.portfolio import rating_lookup +from backend.app.plan.schemas import PlanTriggerRequest, WALL_INSULATION_MEASURES, ROOF_INSULATION_MEASURES +from backend.app.plan.utils import get_cleaned +from backend.app.utils import sap_to_epc +import backend.app.assumptions as assumptions + +from backend.ml_models.api import ModelApi +from backend.Property import Property +from backend.apis.GoogleSolarApi import GoogleSolarApi + +from recommendations.optimiser.CostOptimiser import CostOptimiser +from recommendations.optimiser.GainOptimiser import GainOptimiser +import recommendations.optimiser.optimiser_functions as optimiser_functions +from recommendations.Recommendations import Recommendations +from utils.logger import setup_logger +from utils.s3 import read_dataframe_from_s3_parquet, read_csv_from_s3, read_excel_from_s3 +from backend.ml_models.Valuation import PropertyValuation + +from etl.bill_savings.KwhData import KwhData +from etl.spatial.OpenUprnClient import OpenUprnClient +from etl.find_my_epc.RetrieveFindMyEpc import RetrieveFindMyEpc + +from backend.Funding import Funding +from recommendations.optimiser.funding_optimiser import optimise_with_funding_paths +from recommendations.recommendation_utils import convert_thickness_to_numeric, get_wall_u_value + +# Input data (temp) +import pickle + +import pandas as pd + +with open("local_data_for_deletion.pkl", 'rb') as f: + local_data = pickle.load(f) + +cleaning_data = local_data["cleaning_data"] +materials = local_data["materials"] +cleaned = local_data["cleaned"] +project_scores_matrix = local_data["project_scores_matrix"] +partial_project_scores_matrix = local_data["partial_project_scores_matrix"] +whlg_eligible_postcodes = local_data["whlg_eligible_postcodes"] + +with open("kwh_client_for_deletion.pkl", "rb") as f: + kwh_client = pickle.load(f) + +epc_data = pd.read_csv( + "/Users/khalimconn-kowlessar/Downloads/domestic-E06000002-Middlesbrough/certificates.csv", + low_memory=False +) + +# TODO: Store this for cleaning +costs_by_floor_area = epc_data[ + pd.to_datetime(epc_data["LODGEMENT_DATE"]) >= "2024-01-01" + ][["TOTAL_FLOOR_AREA", "CURRENT_ENERGY_EFFICIENCY", "LIGHTING_COST_CURRENT", "HEATING_COST_CURRENT", + "HOT_WATER_COST_CURRENT"]].copy() + +costs_by_floor_area.columns = [c.lower().replace("_", "-") for c in costs_by_floor_area.columns] +for c in ["lighting-cost-current", "heating-cost-current", "hot-water-cost-current"]: + costs_by_floor_area[c + "_scaled"] = costs_by_floor_area[c] / costs_by_floor_area["total-floor-area"] + +costs_by_floor_area = costs_by_floor_area.groupby("current-energy-efficiency")[ + ["lighting-cost-current_scaled", "heating-cost-current_scaled", "hot-water-cost-current_scaled"] +].mean().reset_index() + +sample_epc_data = epc_data[pd.to_datetime(epc_data["LODGEMENT_DATE"]) >= "2015-01-01"].drop_duplicates("UPRN").sample( + 10000).reset_index(drop=True) + +# TODO: In Property find_energy_sources, sort out biomass community heating - what fuel type +# TODO: We might be able to remove find_energy_sources entirely and remove estimate_electrical_consumption. It's used +# in the google solar api but is it really needed? I don't think it's super accurate. It might be better to +# just use an average energy consumption by floor area for UK households? +# Load the input properties +input_properties = [] +for row_id, config in tqdm(sample_epc_data.iterrows(), total=len(sample_epc_data)): + epc = { + k.lower().replace("_", "-"): v if not pd.isnull(v) else None for k, v in config.items() + } + # Avoid the data load inside of EPCRecord - something we should pull out + for x in ["number-habitable-rooms", "floor-height", "number-heated-rooms"]: + if pd.isnull(epc[x]): + if x == "floor-height": + epc[x] = 2.4 + if x == "number-habitable-rooms": + epc[x] = 3 + if x == "number-heated-rooms": + epc[x] = 3 + + epc_records = {'original_epc': epc, 'full_sap_epc': {}, 'old_data': []} + + prepared_epc = EPCRecord( + epc_records=epc_records, + run_mode="newdata", + cleaning_data=cleaning_data, + ) + + input_properties.append( + Property( + id=row_id, + is_new=True, + address=epc["address"], + postcode=epc["postcode"], + epc_record=prepared_epc, + already_installed={}, + property_valuation={}, + non_invasive_recommendations=[], + energy_assessment=None, + **Property.extract_kwargs(config), # TODO: Depraecate this + ) + ) + +# For each property, insert the default solar configuration +for p in tqdm(input_properties): + solar_api = GoogleSolarApi( + api_key=None, solar_materials=[m for m in materials if m["type"] == "solar_pv"], max_retries=5 + ) + panel_performance = solar_api.default_panel_performance(property_instance=p) + p.set_solar_panel_configuration( + solar_panel_configuration={ + "insights_data": None, "panel_performance": panel_performance, "unit_share_of_energy": 1 + }, + ) + +# We mock kwh preds +mocked_kwh_predictions = {"heating_kwh_predictions": [], "hotwater_kwh_predictions": []} +for p in tqdm(input_properties): + mocked_kwh_predictions["heating_kwh_predictions"].append({ + "id": p.uprn, "predictions": random.sample(range(100, 3000), 1)[0] + }) + mocked_kwh_predictions["hotwater_kwh_predictions"].append({ + "id": p.uprn, "predictions": random.sample(range(100, 3000), 1)[0] + }) +mocked_kwh_predictions["heating_kwh_predictions"] = pd.DataFrame(mocked_kwh_predictions["heating_kwh_predictions"]) +mocked_kwh_predictions["hotwater_kwh_predictions"] = pd.DataFrame(mocked_kwh_predictions["hotwater_kwh_predictions"]) + +# TODO: We might want to implement this generally, via an ETL process +for p in input_properties: + for col in ["lighting-cost-current", "heating-cost-current", "hot-water-cost-current"]: + if pd.isnull(p.data[col]): + min_diff = abs( + (costs_by_floor_area["current-energy-efficiency"] - p.data["current-energy-efficiency"]) + ).min() + df = costs_by_floor_area[ + abs((costs_by_floor_area["current-energy-efficiency"] - p.data[ + "current-energy-efficiency"])) == min_diff + ] + if df.shape[0] > 1: + df = df.head(1) + p.data[col] = (df[col + "_scaled"] * p.data["total-floor-area"]).values[0] + +[ + p.set_features(cleaned=cleaned, kwh_client=kwh_client, kwh_predictions=mocked_kwh_predictions) for p in + input_properties +] # for p in input_properties: -# for col in ["lighting-cost-current", "heating-cost-current", "hot-water-cost-current"]: -# if pd.isnull(p.data[col]): -# min_diff = abs( -# (costs_by_floor_area["current-energy-efficiency"] - p.data["current-energy-efficiency"]) -# ).min() -# df = costs_by_floor_area[ -# abs((costs_by_floor_area["current-energy-efficiency"] - p.data[ -# "current-energy-efficiency"])) == min_diff -# ] -# if df.shape[0] > 1: -# df = df.head(1) -# p.data[col] = (df[col + "_scaled"] * p.data["total-floor-area"]).values[0] -# -# [ -# p.set_features(cleaned=cleaned, kwh_client=kwh_client, kwh_predictions=mocked_kwh_predictions) for p in -# input_properties -# ] -# # for p in input_properties: -# # p.set_features(cleaned=cleaned, kwh_client=kwh_client, kwh_predictions=mocked_kwh_predictions) -# -# # Run the recommendations -# recommendations = {} -# recommendations_scoring_data = [] -# representative_recommendations = {} -# for p in tqdm(input_properties): -# if p.data["property-type"] == "House" and pd.isnull(p.data["built-form"]): -# p.data["built-form"] = "Semi-Detached" -# recommender = Recommendations( -# property_instance=p, -# materials=materials, -# exclusions=[], -# inclusions=[], -# default_u_values=True -# ) -# property_recommendations, property_representative_recommendations = recommender.recommend() -# -# if not property_recommendations: -# continue -# -# recommendations[p.id] = property_recommendations -# representative_recommendations[p.id] = property_representative_recommendations -# -# p.create_base_difference_epc_record(cleaned_lookup=cleaned) -# p.adjust_difference_record_with_recommendations( -# property_recommendations, property_representative_recommendations -# ) -# -# recommendations_scoring_data.extend(p.recommendations_scoring_data) -# -# recommendations_scoring_data = pd.DataFrame(recommendations_scoring_data) -# recommendations_scoring_data = recommendations_scoring_data.drop( -# columns=[ -# "rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending", -# "carbon_ending" -# ] -# ) -# -# model_predictions_mocked = { -# "sap_change_predictions": None, -# "heat_demand_predictions": None, -# "carbon_change_predictions": None, -# "heating_kwh_predictions": None, -# "hotwater_kwh_predictions": None, -# } -# -# for k in model_predictions_mocked.keys(): -# model_predictions_mocked[k] = recommendations_scoring_data[["id"]].copy() -# model_predictions_mocked[k][['property_id', 'recommendation_id']] = ( -# model_predictions_mocked[k]['id'].str.split('+', expand=True) -# ) -# model_predictions_mocked[k]['phase'] = model_predictions_mocked[k]['recommendation_id'].apply( -# ModelApi.extract_phase) -# -# if k in ["heating_kwh_predictions", "hotwater_kwh_predictions"]: -# model_predictions_mocked[k]["predictions"] = random.choices(range(100, 3000), -# k=len(recommendations_scoring_data)) -# continue -# -# model_predictions_mocked[k] = model_predictions_mocked[k].sort_values(["property_id", "phase"], ascending=True) -# preds = [] -# for p_id in model_predictions_mocked[k]["property_id"].unique(): -# # We add some amount each time -# p = [p for p in input_properties if str(p.id) == p_id][0] -# if k == "sap_change_predictions": -# start = p.data["current-energy-efficiency"] -# elif k == "heat_demand_predictions": -# start = p.data["energy-consumption-current"] -# else: -# start = p.data["co2-emissions-current"] -# df = model_predictions_mocked[k][model_predictions_mocked[k]["property_id"] == p_id].copy() -# # Add some amount each time -# to_add = random.choices(range(0, 15), k=len(df)) -# to_add = np.cumsum(to_add) -# df["predictions"] = start + to_add -# preds.append(df) -# preds = pd.concat(preds) -# model_predictions_mocked[k] = preds -# -# for property_id in tqdm(recommendations.keys(), total=len(recommendations)): -# property_instance = [p for p in input_properties if p.id == property_id][0] -# -# recommendations_with_impact, impact_summary = ( -# Recommendations.calculate_recommendation_impact( -# property_instance=property_instance, -# all_predictions=model_predictions_mocked, -# recommendations=recommendations, -# representative_recommendations=representative_recommendations -# ) -# ) -# -# # We use the impact_summary to update the simulation_epcs with the new SAP, heat demand, carbon, cost etc -# # at each phase -# property_instance.update_simulation_epcs(impact_summary) -# recommendations[property_id] = recommendations_with_impact -# -# for property_id in tqdm([p.id for p in input_properties]): -# property_recommendations = recommendations.get(property_id, []) -# property_instance = [p for p in input_properties if p.id == property_id][0] -# -# property_current_energy_bill = ( -# Recommendations.calculate_recommendation_tenant_savings( -# property_instance=property_instance, -# kwh_simulation_predictions=model_predictions_mocked, -# property_recommendations=property_recommendations, -# ashp_cop=2.8 -# ) -# ) -# property_instance.current_energy_bill = property_current_energy_bill -# -# body = PlanTriggerRequest( -# **{'budget': None, 'goal': 'Increasing EPC', 'housing_type': 'Social', 'goal_value': 'B', 'portfolio_id': 0, -# 'trigger_file_path': '', 'already_installed_file_path': '', -# 'patches_file_path': None, 'non_invasive_recommendations_file_path': None, -# 'valuation_file_path': '', -# 'required_measures': [], 'scenario_name': 'EPC B', 'scenario_id': None, -# 'multi_plan': True, 'optimise': True, 'default_u_values': True, 'ashp_cop': 2.8, -# 'event_type': 'remote_assessment', 'simulate_sap_10': False, 'file_type': None, 'file_format': None, -# 'sheet_name': None, 'sheet_count': None, 'index_start': None, 'index_end': None} -# ) -# -# for p in tqdm(input_properties): -# if not recommendations.get(p.id): -# continue -# -# # we need to double unlist because we have a list of lists -# property_measure_types = {rec["type"] for recs in recommendations[p.id] for rec in recs} -# property_required_measures = [m for m in recommendations[p.id] if m[0]["type"] in body.required_measures] -# measures_to_optimise = [m for m in recommendations[p.id] if m[0]["type"] not in body.required_measures] -# -# # If a measure requiring ventilation is selected, and the property does not have ventilation, we enfore -# # its inclusion -# needs_ventilation = any( -# x in property_measure_types for x in assumptions.measures_needing_ventilation -# ) and not p.has_ventilation -# -# if not measures_to_optimise: -# # Nothing to do, we just reshape the recommendations -# recommendations[p.id] = optimiser_functions.flatten_recommendations_with_defaults( -# p.id, recommendations, set() -# ) -# continue -# -# fixed_gain = optimiser_functions.calculate_fixed_gain( -# property_required_measures, recommendations, p, needs_ventilation -# ) -# gain = optimiser_functions.calculate_gain(body=body, p=p, fixed_gain=fixed_gain) -# -# funding = Funding( -# tenure="Social", -# project_scores_matrix=project_scores_matrix, -# partial_project_scores_matrix=partial_project_scores_matrix, -# whlg_eligible_postcodes=whlg_eligible_postcodes, -# eco4_social_cavity_abs_rate=12.5, -# eco4_social_solid_abs_rate=17, -# eco4_private_cavity_abs_rate=12.5, -# eco4_private_solid_abs_rate=17, -# gbis_social_cavity_abs_rate=21, -# gbis_social_solid_abs_rate=25, -# gbis_private_cavity_abs_rate=21, -# gbis_private_solid_abs_rate=28, -# ) -# -# li_thickness = convert_thickness_to_numeric( -# p.roof["insulation_thickness"], p.roof["is_pitched"], p.roof["is_flat"] -# ) -# current_wall_u_value = p.walls["thermal_transmittance"] -# if current_wall_u_value is None: -# current_wall_u_value = get_wall_u_value( -# clean_description=p.walls["clean_description"], -# age_band=p.age_band, -# is_granite_or_whinstone=p.walls["is_granite_or_whinstone"], -# is_sandstone_or_limestone=p.walls["is_sandstone_or_limestone"], -# ) -# -# # We insert the innovation uplift -# measures_to_optimise_with_uplift = deepcopy(measures_to_optimise) -# -# # TODO: Turn this into a function and store the innovaiton uplift -# for group in measures_to_optimise_with_uplift: -# for r in group: -# -# if r["type"] in ["mechanical_ventilation", "low_energy_lighting", "secondary_heating", -# "extension_cavity_wall_insulation", "draught_proofing", "sealing_open_fireplace"]: -# ( -# r["partial_project_score"], -# r["partial_project_funding"], -# r["innovation_uplift"], -# r["uplift_project_score"], -# ) = ( -# 0, 0, 0, 0 -# ) -# continue -# -# ( -# r["partial_project_score"], r["partial_project_funding"], r["innovation_uplift"], -# r["uplift_project_score"] -# ) = funding.get_innovation_uplift( -# measure=r, -# starting_sap=p.data["current-energy-efficiency"], -# floor_area=p.floor_area, -# is_cavity=p.walls["is_cavity_wall"], -# current_wall_uvalue=current_wall_u_value, -# is_partial="partial" in p.walls["clean_description"].lower(), -# existing_li_thickness=li_thickness, -# mainheating=p.main_heating, -# main_fuel=p.main_fuel, -# mainheat_energy_eff=p.data["mainheat-energy-eff"], -# ) -# -# input_measures = optimiser_functions.prepare_input_measures( -# measures_to_optimise_with_uplift, body.goal, needs_ventilation, funding=True -# ) -# -# # When the goal is Increasing EPC, we can run the funding optimiser -# if body.goal == "Increasing EPC": -# -# solutions = optimise_with_funding_paths( -# p=p, -# input_measures=input_measures, -# housing_type=body.housing_type, -# budget=body.budget, -# target_gain=gain, -# funding=funding -# ) -# -# # Given the solutions we select the optimal one -# solutions["cost_less_full_project_funding"] = np.where( -# solutions["scheme"] == "eco4", -# solutions["total_cost"] - solutions["full_project_funding"] - solutions["total_uplift"], -# solutions["total_cost"] - solutions["partial_project_funding"] - solutions["total_uplift"] -# ) -# -# solutions["cost_less_full_project_funding"] = ( -# solutions["total_cost"] - solutions["full_project_funding"] - solutions["total_uplift"] -# ) -# solutions = solutions.sort_values("cost_less_full_project_funding", ascending=True) -# -# if solutions["meets_upgrade_target"].any(): -# # If we have a solution that meets the upgrade target, we select that one -# optimal_solution = solutions[solutions["meets_upgrade_target"]].iloc[0] -# else: -# # Pick the cheapest -# optimal_solution = solutions.iloc[0] -# -# # This is the list of measures that we will recommend -# scheme = optimal_solution["scheme"] -# funded_measures = optimal_solution["items"] if scheme != "none" else [] -# solution = optimal_solution["items"] + optimal_solution["unfunded_items"] -# # This is the total amount of funding that the project will produce (including uplifts) (£) -# project_funding = optimal_solution["full_project_funding"] if scheme == "eco4" else \ -# optimal_solution["partial_project_funding"] -# # This is the total amount of funding associated to the uplift (£) -# total_uplift = optimal_solution["total_uplift"] -# # This is the funding scheme selected -# # This is the full project ABS -# full_project_score = optimal_solution["project_score"] -# # This is the partial project ABS -# partial_project_score = optimal_solution["partial_project_score"] -# # This is the uplift score ABS -# uplift_project_score = optimal_solution["total_uplift_score"] -# else: -# # We optimise and then we determine eligibility for funding, based on the measures selected -# optimiser = ( -# GainOptimiser( -# input_measures, max_cost=body.budget, max_gain=gain, allow_slack=False -# ) if body.budget else CostOptimiser(input_measures, min_gain=gain) -# ) -# optimiser.setup() -# optimiser.solve() -# solution = optimiser.solution -# -# recommendation_types = [] -# for measures in input_measures: -# for measure in measures: -# recommendation_types.append(measure["type"]) -# recommendation_types = set(recommendation_types) -# -# has_wall_insulation_recommendation = any( -# (m in recommendation_types or "+".join([m, "mechanical_ventilation"])) for m in -# WALL_INSULATION_MEASURES -# ) -# has_roof_insulation_recommendation = any( -# (m in recommendation_types or "+".join([m, "mechanical_ventilation"])) for m in -# ROOF_INSULATION_MEASURES -# ) -# -# funding.check_funding( -# measures=solution, -# starting_sap=p.data["current-energy-efficiency"], -# ending_sap=p.data["current-energy-efficiency"] + sum([x["gain"] for x in solution]), -# floor_area=p.floor_area, -# mainheat_description=p.main_heating["clean_description"], -# heating_control_description=p.main_heating_controls["clean_description"], -# is_cavity=p.walls["is_cavity_wall"], -# current_wall_uvalue=current_wall_u_value, -# is_partial="partial" in p.walls["clean_description"].lower(), -# existing_li_thickness=li_thickness, -# mainheating=p.main_heating, -# main_fuel=p.main_fuel, -# mainheat_energy_eff=p.data["mainheat-energy-eff"], -# has_wall_insulation_recommendation=has_wall_insulation_recommendation, -# has_roof_insulation_recommendation=has_roof_insulation_recommendation, -# ) -# -# # Determine the scheme -# scheme = "none" -# if funding.eco4_eligible: -# scheme = "eco4" -# if scheme == "none" and funding.gbis_eligible: -# scheme = "gbis" -# -# funded_measures = solution if scheme in ["gbis", "eco4"] else [] -# project_funding = 0 if funding.full_project_abs is not None else funding.full_project_abs -# total_uplift = funding.eco4_uplift -# full_project_score = 0 if funding.full_project_abs is not None else funding.full_project_abs -# partial_project_score = funding.partial_project_abs -# uplift_project_score = funding.eco4_uplift if scheme == "eco4" else funding.gbis_uplift -# -# selected = {r["id"] for r in solution} -# -# if property_required_measures: -# solution = optimiser_functions.add_required_measures( -# property_id=p.id, property_required_measures=property_required_measures, -# recommendations=recommendations, selected=selected, -# ) -# -# # Add best practice measures (ventilation/trickle vents) -# selected = optimiser_functions.add_best_practice_measures(p.id, solution, recommendations, selected) -# # Final flattening - Don't do this! -# # recommendations[p.id] = optimiser_functions.flatten_recommendations_with_defaults( -# # p.id, recommendations, selected -# # ) -# -# # TODO: functionise -# for measure in funded_measures: -# if "+mechanical_ventilation" in measure["type"]: -# measure["type"] = measure["type"].split("+mechanical_ventilation")[0] -# -# p.insert_funding( -# scheme=scheme, -# funded_measures=funded_measures, -# project_funding=project_funding, -# total_uplift=total_uplift, -# full_project_score=full_project_score, -# partial_project_score=partial_project_score, -# uplift_project_score=uplift_project_score -# ) +# p.set_features(cleaned=cleaned, kwh_client=kwh_client, kwh_predictions=mocked_kwh_predictions) + +# Run the recommendations +recommendations = {} +recommendations_scoring_data = [] +representative_recommendations = {} +for p in tqdm(input_properties): + if p.data["property-type"] == "House" and pd.isnull(p.data["built-form"]): + p.data["built-form"] = "Semi-Detached" + recommender = Recommendations( + property_instance=p, + materials=materials, + exclusions=[], + inclusions=[], + default_u_values=True + ) + property_recommendations, property_representative_recommendations = recommender.recommend() + + if not property_recommendations: + continue + + recommendations[p.id] = property_recommendations + representative_recommendations[p.id] = property_representative_recommendations + + p.create_base_difference_epc_record(cleaned_lookup=cleaned) + p.adjust_difference_record_with_recommendations( + property_recommendations, property_representative_recommendations + ) + + recommendations_scoring_data.extend(p.recommendations_scoring_data) + +recommendations_scoring_data = pd.DataFrame(recommendations_scoring_data) +recommendations_scoring_data = recommendations_scoring_data.drop( + columns=[ + "rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending", + "carbon_ending" + ] +) + +model_predictions_mocked = { + "sap_change_predictions": None, + "heat_demand_predictions": None, + "carbon_change_predictions": None, + "heating_kwh_predictions": None, + "hotwater_kwh_predictions": None, +} + +for k in model_predictions_mocked.keys(): + model_predictions_mocked[k] = recommendations_scoring_data[["id"]].copy() + model_predictions_mocked[k][['property_id', 'recommendation_id']] = ( + model_predictions_mocked[k]['id'].str.split('+', expand=True) + ) + model_predictions_mocked[k]['phase'] = model_predictions_mocked[k]['recommendation_id'].apply( + ModelApi.extract_phase) + + if k in ["heating_kwh_predictions", "hotwater_kwh_predictions"]: + model_predictions_mocked[k]["predictions"] = random.choices(range(100, 3000), + k=len(recommendations_scoring_data)) + continue + + model_predictions_mocked[k] = model_predictions_mocked[k].sort_values(["property_id", "phase"], ascending=True) + preds = [] + for p_id in model_predictions_mocked[k]["property_id"].unique(): + # We add some amount each time + p = [p for p in input_properties if str(p.id) == p_id][0] + if k == "sap_change_predictions": + start = p.data["current-energy-efficiency"] + elif k == "heat_demand_predictions": + start = p.data["energy-consumption-current"] + else: + start = p.data["co2-emissions-current"] + df = model_predictions_mocked[k][model_predictions_mocked[k]["property_id"] == p_id].copy() + # Add some amount each time + to_add = random.choices(range(0, 15), k=len(df)) + to_add = np.cumsum(to_add) + df["predictions"] = start + to_add + preds.append(df) + preds = pd.concat(preds) + model_predictions_mocked[k] = preds + +for property_id in tqdm(recommendations.keys(), total=len(recommendations)): + property_instance = [p for p in input_properties if p.id == property_id][0] + + recommendations_with_impact, impact_summary = ( + Recommendations.calculate_recommendation_impact( + property_instance=property_instance, + all_predictions=model_predictions_mocked, + recommendations=recommendations, + representative_recommendations=representative_recommendations + ) + ) + + # We use the impact_summary to update the simulation_epcs with the new SAP, heat demand, carbon, cost etc + # at each phase + property_instance.update_simulation_epcs(impact_summary) + recommendations[property_id] = recommendations_with_impact + +for property_id in tqdm([p.id for p in input_properties]): + property_recommendations = recommendations.get(property_id, []) + property_instance = [p for p in input_properties if p.id == property_id][0] + + property_current_energy_bill = ( + Recommendations.calculate_recommendation_tenant_savings( + property_instance=property_instance, + kwh_simulation_predictions=model_predictions_mocked, + property_recommendations=property_recommendations, + ashp_cop=2.8 + ) + ) + property_instance.current_energy_bill = property_current_energy_bill + +body = PlanTriggerRequest( + **{'budget': None, 'goal': 'Increasing EPC', 'housing_type': 'Social', 'goal_value': 'B', 'portfolio_id': 0, + 'trigger_file_path': '', 'already_installed_file_path': '', + 'patches_file_path': None, 'non_invasive_recommendations_file_path': None, + 'valuation_file_path': '', + 'required_measures': [], 'scenario_name': 'EPC B', 'scenario_id': None, + 'multi_plan': True, 'optimise': True, 'default_u_values': True, 'ashp_cop': 2.8, + 'event_type': 'remote_assessment', 'simulate_sap_10': False, 'file_type': None, 'file_format': None, + 'sheet_name': None, 'sheet_count': None, 'index_start': None, 'index_end': None} +) + +for p in tqdm(input_properties): + if not recommendations.get(p.id): + continue + + # we need to double unlist because we have a list of lists + property_measure_types = {rec["type"] for recs in recommendations[p.id] for rec in recs} + property_required_measures = [m for m in recommendations[p.id] if m[0]["type"] in body.required_measures] + measures_to_optimise = [m for m in recommendations[p.id] if m[0]["type"] not in body.required_measures] + + # If a measure requiring ventilation is selected, and the property does not have ventilation, we enfore + # its inclusion + needs_ventilation = any( + x in property_measure_types for x in assumptions.measures_needing_ventilation + ) and not p.has_ventilation + + if not measures_to_optimise: + # Nothing to do, we just reshape the recommendations + recommendations[p.id] = optimiser_functions.flatten_recommendations_with_defaults( + p.id, recommendations, set() + ) + continue + + fixed_gain = optimiser_functions.calculate_fixed_gain( + property_required_measures, recommendations, p, needs_ventilation + ) + gain = optimiser_functions.calculate_gain(body=body, p=p, fixed_gain=fixed_gain) + + funding = Funding( + tenure="Social", + project_scores_matrix=project_scores_matrix, + partial_project_scores_matrix=partial_project_scores_matrix, + whlg_eligible_postcodes=whlg_eligible_postcodes, + eco4_social_cavity_abs_rate=12.5, + eco4_social_solid_abs_rate=17, + eco4_private_cavity_abs_rate=12.5, + eco4_private_solid_abs_rate=17, + gbis_social_cavity_abs_rate=21, + gbis_social_solid_abs_rate=25, + gbis_private_cavity_abs_rate=21, + gbis_private_solid_abs_rate=28, + ) + + li_thickness = convert_thickness_to_numeric( + p.roof["insulation_thickness"], p.roof["is_pitched"], p.roof["is_flat"] + ) + current_wall_u_value = p.walls["thermal_transmittance"] + if current_wall_u_value is None: + current_wall_u_value = get_wall_u_value( + clean_description=p.walls["clean_description"], + age_band=p.age_band, + is_granite_or_whinstone=p.walls["is_granite_or_whinstone"], + is_sandstone_or_limestone=p.walls["is_sandstone_or_limestone"], + ) + + # We insert the innovation uplift + measures_to_optimise_with_uplift = deepcopy(measures_to_optimise) + + # TODO: Turn this into a function and store the innovaiton uplift + for group in measures_to_optimise_with_uplift: + for r in group: + + if r["type"] in ["mechanical_ventilation", "low_energy_lighting", "secondary_heating", + "extension_cavity_wall_insulation", "draught_proofing", "sealing_open_fireplace"]: + ( + r["partial_project_score"], + r["partial_project_funding"], + r["innovation_uplift"], + r["uplift_project_score"], + ) = ( + 0, 0, 0, 0 + ) + continue + + ( + r["partial_project_score"], r["partial_project_funding"], r["innovation_uplift"], + r["uplift_project_score"] + ) = funding.get_innovation_uplift( + measure=r, + starting_sap=p.data["current-energy-efficiency"], + floor_area=p.floor_area, + is_cavity=p.walls["is_cavity_wall"], + current_wall_uvalue=current_wall_u_value, + is_partial="partial" in p.walls["clean_description"].lower(), + existing_li_thickness=li_thickness, + mainheating=p.main_heating, + main_fuel=p.main_fuel, + mainheat_energy_eff=p.data["mainheat-energy-eff"], + ) + + input_measures = optimiser_functions.prepare_input_measures( + measures_to_optimise_with_uplift, body.goal, needs_ventilation, funding=True + ) + + # When the goal is Increasing EPC, we can run the funding optimiser + if body.goal == "Increasing EPC": + + solutions = optimise_with_funding_paths( + p=p, + input_measures=input_measures, + housing_type=body.housing_type, + budget=body.budget, + target_gain=gain, + funding=funding + ) + + # Given the solutions we select the optimal one + solutions["cost_less_full_project_funding"] = np.where( + solutions["scheme"] == "eco4", + solutions["total_cost"] - solutions["full_project_funding"] - solutions["total_uplift"], + solutions["total_cost"] - solutions["partial_project_funding"] - solutions["total_uplift"] + ) + + solutions["cost_less_full_project_funding"] = ( + solutions["total_cost"] - solutions["full_project_funding"] - solutions["total_uplift"] + ) + solutions = solutions.sort_values("cost_less_full_project_funding", ascending=True) + + if solutions["meets_upgrade_target"].any(): + # If we have a solution that meets the upgrade target, we select that one + optimal_solution = solutions[solutions["meets_upgrade_target"]].iloc[0] + else: + # Pick the cheapest + optimal_solution = solutions.iloc[0] + + # This is the list of measures that we will recommend + scheme = optimal_solution["scheme"] + funded_measures = optimal_solution["items"] if scheme != "none" else [] + solution = optimal_solution["items"] + optimal_solution["unfunded_items"] + # This is the total amount of funding that the project will produce (including uplifts) (£) + project_funding = optimal_solution["full_project_funding"] if scheme == "eco4" else \ + optimal_solution["partial_project_funding"] + # This is the total amount of funding associated to the uplift (£) + total_uplift = optimal_solution["total_uplift"] + # This is the funding scheme selected + # This is the full project ABS + full_project_score = optimal_solution["project_score"] + # This is the partial project ABS + partial_project_score = optimal_solution["partial_project_score"] + # This is the uplift score ABS + uplift_project_score = optimal_solution["total_uplift_score"] + else: + # We optimise and then we determine eligibility for funding, based on the measures selected + optimiser = ( + GainOptimiser( + input_measures, max_cost=body.budget, max_gain=gain, allow_slack=False + ) if body.budget else CostOptimiser(input_measures, min_gain=gain) + ) + optimiser.setup() + optimiser.solve() + solution = optimiser.solution + + recommendation_types = [] + for measures in input_measures: + for measure in measures: + recommendation_types.append(measure["type"]) + recommendation_types = set(recommendation_types) + + has_wall_insulation_recommendation = any( + (m in recommendation_types or "+".join([m, "mechanical_ventilation"])) for m in + WALL_INSULATION_MEASURES + ) + has_roof_insulation_recommendation = any( + (m in recommendation_types or "+".join([m, "mechanical_ventilation"])) for m in + ROOF_INSULATION_MEASURES + ) + + funding.check_funding( + measures=solution, + starting_sap=p.data["current-energy-efficiency"], + ending_sap=p.data["current-energy-efficiency"] + sum([x["gain"] for x in solution]), + floor_area=p.floor_area, + mainheat_description=p.main_heating["clean_description"], + heating_control_description=p.main_heating_controls["clean_description"], + is_cavity=p.walls["is_cavity_wall"], + current_wall_uvalue=current_wall_u_value, + is_partial="partial" in p.walls["clean_description"].lower(), + existing_li_thickness=li_thickness, + mainheating=p.main_heating, + main_fuel=p.main_fuel, + mainheat_energy_eff=p.data["mainheat-energy-eff"], + has_wall_insulation_recommendation=has_wall_insulation_recommendation, + has_roof_insulation_recommendation=has_roof_insulation_recommendation, + ) + + # Determine the scheme + scheme = "none" + if funding.eco4_eligible: + scheme = "eco4" + if scheme == "none" and funding.gbis_eligible: + scheme = "gbis" + + funded_measures = solution if scheme in ["gbis", "eco4"] else [] + project_funding = 0 if funding.full_project_abs is not None else funding.full_project_abs + total_uplift = funding.eco4_uplift + full_project_score = 0 if funding.full_project_abs is not None else funding.full_project_abs + partial_project_score = funding.partial_project_abs + uplift_project_score = funding.eco4_uplift if scheme == "eco4" else funding.gbis_uplift + + selected = {r["id"] for r in solution} + + if property_required_measures: + solution = optimiser_functions.add_required_measures( + property_id=p.id, property_required_measures=property_required_measures, + recommendations=recommendations, selected=selected, + ) + + # Add best practice measures (ventilation/trickle vents) + selected = optimiser_functions.add_best_practice_measures(p.id, solution, recommendations, selected) + # Final flattening - Don't do this! + # recommendations[p.id] = optimiser_functions.flatten_recommendations_with_defaults( + # p.id, recommendations, selected + # ) + + # TODO: functionise + for measure in funded_measures: + if "+mechanical_ventilation" in measure["type"]: + measure["type"] = measure["type"].split("+mechanical_ventilation")[0] + + p.insert_funding( + scheme=scheme, + funded_measures=funded_measures, + project_funding=project_funding, + total_uplift=total_uplift, + full_project_score=full_project_score, + partial_project_score=partial_project_score, + uplift_project_score=uplift_project_score + ) From 5ab4d5a6d850c407f6678fbd83937cef8266b6f4 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Fri, 14 Nov 2025 13:36:09 +0000 Subject: [PATCH 046/202] add my code to main --- .devcontainer/Dockerfile | 23 ++++--- .devcontainer/devcontainer.json | 3 +- .devcontainer/post-install.sh | 37 ++++------ .vscode/settings.json | 19 +++++ backend/app/config.py | 10 ++- backend/app/db/connection.py | 6 ++ backend/app/db/functions/tasks/Tasks.py | 10 +++ backend/app/db/functions/whlg_functions.py | 80 ++++++++++++++++++++++ backend/app/db/models/tasks.py | 37 ++++++++++ backend/app/db/models/whlg.py | 15 ++++ backend/app/local/router.py | 5 ++ backend/app/main.py | 73 +++++++++++++++++++- backend/app/requirements/requirements.txt | 5 ++ backend/app/whlg/route.py | 47 ------------- backend/app/whlg/router.py | 78 +++++++++++++++++++++ backend/app/whlg/schema.py | 4 ++ backend/run_curl.sh | 11 +++ backend/run_local.sh | 6 ++ 18 files changed, 382 insertions(+), 87 deletions(-) create mode 100644 .vscode/settings.json create mode 100644 backend/app/db/functions/tasks/Tasks.py create mode 100644 backend/app/db/functions/whlg_functions.py create mode 100644 backend/app/db/models/tasks.py create mode 100644 backend/app/db/models/whlg.py delete mode 100644 backend/app/whlg/route.py create mode 100644 backend/app/whlg/router.py create mode 100644 backend/run_curl.sh create mode 100644 backend/run_local.sh diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index 4d898973..c061c9f8 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -9,15 +9,15 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ build-essential pkg-config automake autoconf libtool \ && rm -rf /var/lib/apt/lists/* -# 2) Build and install libpostal from source -RUN git clone --depth 1 https://github.com/openvenues/libpostal /tmp/libpostal \ - && cd /tmp/libpostal \ - && ./bootstrap.sh \ - && ./configure --datadir=/usr/local/share/libpostal \ - && make -j"$(nproc)" \ - && make install \ - && ldconfig \ - && rm -rf /tmp/libpostal +# # 2) Build and install libpostal from source +# RUN git clone --depth 1 https://github.com/openvenues/libpostal /tmp/libpostal \ +# && cd /tmp/libpostal \ +# && ./bootstrap.sh \ +# && ./configure --datadir=/usr/local/share/libpostal \ +# && make -j"$(nproc)" \ +# && make install \ +# && ldconfig \ +# && rm -rf /tmp/libpostal # 3) Create the user and grant sudo privileges RUN useradd -m -s /usr/bin/bash ${USER} \ @@ -26,7 +26,10 @@ RUN useradd -m -s /usr/bin/bash ${USER} \ # 4) Python deps ENV PIP_NO_CACHE_DIR=1 PIP_DISABLE_PIP_VERSION_CHECK=1 -ADD asset_list/requirements.txt requirements.txt +# Model +# ADD asset_list/requirements.txt requirements.txt +# FASTAPI backend +ADD backend/app/requirements/requirements.txt requirements.txt RUN pip install -r requirements.txt # 5) Workdir diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 49bd6f83..91a76c3d 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -23,7 +23,8 @@ "4ops.terraform", "fabiospampinato.vscode-todo-plus", "jgclark.vscode-todo-highlight", - "corentinartaud.pdfpreview" + "corentinartaud.pdfpreview", + "ms-python.vscode-python-envs" ] } } diff --git a/.devcontainer/post-install.sh b/.devcontainer/post-install.sh index d9fc3a9e..dc6da006 100644 --- a/.devcontainer/post-install.sh +++ b/.devcontainer/post-install.sh @@ -1,27 +1,14 @@ -# #!/bin/bash -# poetry install; +mkdir -p ~/.ipython/profile_default/startup -# # Get the Poetry virtual environment path -# VENV_PATH=$(poetry env info --path 2>/dev/null) +cat << 'EOF' > ~/.ipython/profile_default/startup/00-load-env.py +from dotenv import load_dotenv +import os -# if [ -z "$VENV_PATH" ]; then -# echo "No Poetry environment found. Did you run 'poetry install'?" -# exit 1 -# fi - -# # Ensure VS Code settings directory exists -# SETTINGS_DIR="/home/vscode/.vscode-server/data/Machine" -# SETTINGS_FILE="$SETTINGS_DIR/settings.json" - -# mkdir -p "$SETTINGS_DIR" - -# # If settings.json doesn't exist, create a default one -# if [ ! -f "$SETTINGS_FILE" ]; then -# echo "{}" > "$SETTINGS_FILE" -# fi - -# # Update VS Code settings to use the Poetry virtual environment -# jq --arg venv "$VENV_PATH/bin/python" '.["python.defaultInterpreterPath"] = $venv' \ -# "$SETTINGS_FILE" > "$SETTINGS_FILE.tmp" && mv "$SETTINGS_FILE.tmp" "$SETTINGS_FILE" - -# echo "✅ Updated VS Code to use Poetry environment: $VENV_PATH" +# Adjust path as needed +env_path = "/workspaces/model/backend/.env" +if os.path.exists(env_path): + load_dotenv(env_path) + print("✔ Loaded .env into Jupyter kernel") +else: + print("⚠ No .env file found to load") +EOF \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 00000000..27782c10 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,19 @@ +{ + "jupyter.interactiveWindow.textEditor.executeSelection": true, + "python.REPL.sendToNativeREPL": true, + "notebook.output.scrolling": true, + "terminal.integrated.defaultProfile.linux": "bash", + "editor.rulers": [67], + "terminal.integrated.profiles.linux": { + "bash": { + "path": "/bin/bash" + } + }, + + // Hot reload setting that needs to be in user settings + // "jupyter.runStartupCommands": [ + // "%load_ext autoreload", "%autoreload 2" + // ] + + +} \ No newline at end of file diff --git a/backend/app/config.py b/backend/app/config.py index b53d5223..98e1c447 100644 --- a/backend/app/config.py +++ b/backend/app/config.py @@ -1,5 +1,7 @@ from functools import lru_cache from pydantic_settings import BaseSettings +from typing import Optional + class Settings(BaseSettings): @@ -35,9 +37,13 @@ class Settings(BaseSettings): # Other S3 buckts ENERGY_ASSESSMENTS_BUCKET: str - class Config: - env_file = "backend/.env" + # Optional AWS creds (only required in local) + AWS_ACCESS_KEY_ID: Optional[str] = None + AWS_SECRET_KEY_ID: Optional[str] = None + AWS_DEFAULT_REGION: Optional[str] = None + class Config: + env_file = "backend.env" @lru_cache() def get_settings(): diff --git a/backend/app/db/connection.py b/backend/app/db/connection.py index 9efdfd25..fbec9102 100644 --- a/backend/app/db/connection.py +++ b/backend/app/db/connection.py @@ -1,5 +1,6 @@ from sqlalchemy import create_engine from backend.app.config import get_settings +from sqlmodel import Session connection_string = "postgresql+{drivername}://{username}:{password}@{server}:{port}/{dbname}" db_string = connection_string.format( @@ -12,3 +13,8 @@ db_string = connection_string.format( ) db_engine = create_engine(db_string, pool_size=5, max_overflow=5) + +def get_db_session(): + if db_engine is None: + raise RuntimeError("Database is not configured. Set DATABASE_URL in environment variables.") + return Session(db_engine) diff --git a/backend/app/db/functions/tasks/Tasks.py b/backend/app/db/functions/tasks/Tasks.py new file mode 100644 index 00000000..d3f06d33 --- /dev/null +++ b/backend/app/db/functions/tasks/Tasks.py @@ -0,0 +1,10 @@ + +class TasksInterface: + def __init__(self): + pass + + + +class SubTaskInterface: + def __init__(self) + pass \ No newline at end of file diff --git a/backend/app/db/functions/whlg_functions.py b/backend/app/db/functions/whlg_functions.py new file mode 100644 index 00000000..e318d004 --- /dev/null +++ b/backend/app/db/functions/whlg_functions.py @@ -0,0 +1,80 @@ +from backend.app.db.connection import get_db_session +from backend.app.db.models.whlg import Whlg + + +def upsert_whlg_postcode(postcode: str): + """ + Manually upsert a postcode into the WHLG table. + No unique constraint is required. + """ + + cleaned = postcode.lower().replace(" ", "") + + with get_db_session() as session: + # Check if record exists + existing = session.query(Whlg).filter(Whlg.postcode == cleaned).first() + + if existing: + return existing # nothing to update, just return it + + # Insert a new row + record = Whlg(postcode=cleaned) + session.add(record) + session.commit() + session.refresh(record) + + return record + + +# One time script to upload 400,000 records in one go with the pay +# of pandas and one insert +from backend.app.db.connection import get_db_session +from backend.app.db.models.whlg import Whlg +from sqlalchemy import select +from sqlalchemy.orm import Session + + +def upload_whlg_from_dataframe(df): + """ + FAST bulk insert of WHLG postcodes (400k+ rows). + No unique constraint needed. + """ + + if "Postcode" not in df.columns: + raise ValueError("DataFrame must contain a 'Postcode' column") + + # 1. Clean incoming postcodes + cleaned_postcodes = ( + df["Postcode"] + .astype(str) + .str.lower() + .str.replace(" ", "", regex=False) + .dropna() + .unique() + .tolist() + ) + + with get_db_session() as session: + # 2. Fetch existing postcodes once (VERY FAST) + existing = session.exec(select(Whlg.postcode)).all() + existing_set = set(existing) + + # 3. Determine which are new + new_postcodes = [ + pc for pc in cleaned_postcodes if pc not in existing_set + ] + + if not new_postcodes: + return {"inserted": 0, "skipped_existing": len(cleaned_postcodes)} + + # 4. Bulk insert new postcodes in one shot + objects = [Whlg(postcode=pc) for pc in new_postcodes] + + session.bulk_save_objects(objects) + session.commit() + + return { + "inserted": len(new_postcodes), + "skipped_existing": len(cleaned_postcodes) - len(new_postcodes), + "total_provided": len(cleaned_postcodes) + } diff --git a/backend/app/db/models/tasks.py b/backend/app/db/models/tasks.py new file mode 100644 index 00000000..ed5b3710 --- /dev/null +++ b/backend/app/db/models/tasks.py @@ -0,0 +1,37 @@ +from __future__ import annotations + +from typing import Optional, List +from datetime import datetime +from uuid import UUID, uuid4 + +from sqlmodel import SQLModel, Field, Relationship + + +class Task(SQLModel, table=True): + __tablename__ = "tasks" + + id: UUID = Field( + default_factory=uuid4, + primary_key=True, + index=True, + ) + + taskSource: str = Field(alias="task_source") + + jobStarted: Optional[datetime] = Field( + default=None, alias="job_started" + ) + jobCompleted: Optional[datetime] = Field( + default=None, alias="job_completed" + ) + + status: str = Field(default="In Progress") + service: Optional[str] = None + + updatedAt: datetime = Field( + default_factory=datetime.utcnow, + alias="updated_at", + ) + + # Relationship + subTasks: List["SubTask"] = Relationship(back_populates="task") diff --git a/backend/app/db/models/whlg.py b/backend/app/db/models/whlg.py new file mode 100644 index 00000000..29d907e4 --- /dev/null +++ b/backend/app/db/models/whlg.py @@ -0,0 +1,15 @@ +import uuid +from typing import Optional +from sqlmodel import SQLModel, Field + + +class Whlg(SQLModel, table=True): + __tablename__ = "whlg" + + id: Optional[int] = Field( + default=None, + primary_key=True, + index=True, + ) + + postcode: str = Field(nullable=False) \ No newline at end of file diff --git a/backend/app/local/router.py b/backend/app/local/router.py index 4ebb490c..0977be04 100644 --- a/backend/app/local/router.py +++ b/backend/app/local/router.py @@ -31,6 +31,11 @@ def create_dummy_token(secret: str) -> str: return token +@router.get("/") +async def dummy_token(): + return {"hello": "world"} + + @router.get("/dummy-token") async def dummy_token(): settings = get_settings() diff --git a/backend/app/main.py b/backend/app/main.py index de6f0795..261e2f34 100644 --- a/backend/app/main.py +++ b/backend/app/main.py @@ -6,6 +6,7 @@ from fastapi.encoders import jsonable_encoder from starlette.exceptions import HTTPException as StarletteHTTPException from mangum import Mangum from backend.app.portfolio import router as portfolio_router +from backend.app.whlg import router as whlg_router from backend.app.plan import router as plan_router from backend.app.dependencies import validate_api_key from backend.app.config import get_settings @@ -13,7 +14,10 @@ from backend.app.config import get_settings logger = logging.getLogger("uvicorn.error") logging.basicConfig(level=logging.INFO) -app = FastAPI(dependencies=[Depends(validate_api_key)]) +if get_settings().ENVIRONMENT == "local": + app = FastAPI() +else: + app = FastAPI(dependencies=[Depends(validate_api_key)]) # Handle 422 errors (validation failures) @@ -52,10 +56,75 @@ async def log_requests(request: Request, call_next): app.include_router(portfolio_router.router, prefix="/v1") app.include_router(plan_router.router, prefix="/v1") +app.include_router(whlg_router.router, prefix="/v1") + +if get_settings().ENVIRONMENT == "local": + from app.local import router as local_router + app.include_router(local_router.router) + +handler = Mangum(app) +import logging +from fastapi.responses import JSONResponse +from fastapi import FastAPI, Depends, Request, status +from fastapi.exceptions import RequestValidationError +from fastapi.encoders import jsonable_encoder +from starlette.exceptions import HTTPException as StarletteHTTPException +from mangum import Mangum +from backend.app.portfolio import router as portfolio_router +from backend.app.whlg import router as whlg_router +from backend.app.plan import router as plan_router +from backend.app.dependencies import validate_api_key +from backend.app.config import get_settings + +logger = logging.getLogger("uvicorn.error") +logging.basicConfig(level=logging.INFO) + +if get_settings().ENVIRONMENT == "local": + app = FastAPI() +else: + app = FastAPI(dependencies=[Depends(validate_api_key)]) + + +# Handle 422 errors (validation failures) +@app.exception_handler(RequestValidationError) +async def validation_exception_handler(request: Request, exc: RequestValidationError): + logger.error(f"422 Validation Error at {request.url}") + logger.error(f"Body: {exc.body}") + logger.error(f"Validation Errors: {exc.errors()}") + return JSONResponse( + status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, + content=jsonable_encoder({ + "detail": exc.errors(), + "body": exc.body + }), + ) + + +# Handle generic HTTP exceptions (optional, useful for catching 404, 403, etc.) +@app.exception_handler(StarletteHTTPException) +async def http_exception_handler(request: Request, exc: StarletteHTTPException): + logger.warning(f"{exc.status_code} Error at {request.url} - Detail: {exc.detail}") + return JSONResponse( + status_code=exc.status_code, + content={"detail": exc.detail}, + ) + + +# Middleware to log requests +@app.middleware("http") +async def log_requests(request: Request, call_next): + logger.info(f"Incoming request: {request.method} {request.url}") + response = await call_next(request) + logger.info(f"Response status: {response.status_code}") + return response + + +app.include_router(portfolio_router.router, prefix="/v1") +app.include_router(plan_router.router, prefix="/v1") +app.include_router(whlg_router.router, prefix="/v1") if get_settings().ENVIRONMENT == "local": from app.local import router as local_router - app.include_router(local_router.router) handler = Mangum(app) diff --git a/backend/app/requirements/requirements.txt b/backend/app/requirements/requirements.txt index a213214d..8a151e83 100644 --- a/backend/app/requirements/requirements.txt +++ b/backend/app/requirements/requirements.txt @@ -12,3 +12,8 @@ boto3==1.35.44 openpyxl==3.1.2 # Basic pytz +uvicorn[standard] +pandas +ipykernel +sqlmodel + diff --git a/backend/app/whlg/route.py b/backend/app/whlg/route.py deleted file mode 100644 index 21d417c5..00000000 --- a/backend/app/whlg/route.py +++ /dev/null @@ -1,47 +0,0 @@ -import boto3 -import json -import math -import asyncio -import random - -from datetime import datetime - -from fastapi import APIRouter, Depends -from backend.app.dependencies import validate_token -from backend.app.plan.schemas import PlanTriggerRequest -from backend.app.config import get_settings -from sqlalchemy.orm import sessionmaker -from utils.logger import setup_logger -from backend.app.db.connection import db_engine - -from backend.app.db.functions.recommendations_functions import create_scenario - -logger = setup_logger() - -router = APIRouter( - prefix="/whlg", - tags=["whlg"], - dependencies=[Depends(validate_token)], - responses={404: {"description": "Not found"}} -) - - -@router.post("/") -async def whlg_entrypoint(body): - # body needs to include postcode, UPRN [task ID?] - # - # Refer to the plan trigger route for code - # 1) Create an event schema and store it in the schemas file - # 2) Build the tasks functions - # 3) Read in the funding csx. This can be found as such: - # whlg_eligible_postcodes = read_csv_from_s3( - # bucket_name=get_settings().DATA_BUCKET, - # filepath="funding/whlg eligible postcodes.csv", - # ) - # whlg_eligible_postcodes = pd.DataFrame(whlg_eligible_postcodes) - # Check the postcode against this file - # We need to store this somewhere????!!!??!??!?!?!?!??!??!??!??!??!??!??!??!??!??! Create a new table! - # Update subtask to be complete - # Once this is complete, build the logs stuff, add the cloudwatch logs ID to the database - - print("We're gonna do stuff!") diff --git a/backend/app/whlg/router.py b/backend/app/whlg/router.py new file mode 100644 index 00000000..3957a3f4 --- /dev/null +++ b/backend/app/whlg/router.py @@ -0,0 +1,78 @@ +import boto3 +import json +import math +import asyncio +import random + +from datetime import datetime + +from fastapi import APIRouter, Depends +from backend.app.dependencies import validate_token +from backend.app.plan.schemas import PlanTriggerRequest +from backend.app.config import get_settings +from sqlalchemy.orm import sessionmaker +from utils.logger import setup_logger +from backend.app.db.connection import db_engine +from backend.app.db.functions.recommendations_functions import create_scenario +import pandas as pd +from backend.app.whlg.schema import WHLGElligibilityRequest + +from utils.s3 import read_csv_from_s3 +from sqlalchemy.dialects.postgresql import insert +from backend.app.db.connection import get_db_session +from backend.app.db.models.whlg import Whlg +from backend.app.db.functions.whlg_functions import upsert_whlg_postcode + +logger = setup_logger() + + +if get_settings().ENVIRONMENT == "local": + router = APIRouter( + prefix="/whlg", + tags=["whlg"], + ) + +else: + router = APIRouter( + prefix="/whlg", + tags=["whlg"], + dependencies=[Depends(validate_token)], + responses={404: {"description": "Not found"}} + ) + +@router.get("/") +async def whlg_entrypoint(): + # body needs to include postcode, UPRN [task ID?] + # + # Refer to the plan trigger route for code + # 1) Create an event schema and store it in the schemas file + # 2) Build the tasks functions + # 3) Read in the funding csx. This can be found as such: + # whlg_eligible_postcodes = read_csv_from_s3( + # bucket_name=get_settings().DATA_BUCKET, + # filepath="funding/whlg eligible postcodes.csv", + # ) + # whlg_eligible_postcodes = pd.DataFrame(whlg_eligible_postcodes) + # Check the postcode against this file + # We need to store this somewhere????!!!??!??!?!?!?!??!??!??!??!??!??!??!??!??!??! Create a new table! + # Update subtask to be complete + # Once this is complete, build the logs stuff, add the cloudwatch logs ID to the database + return {"hello": "from whlg"} + + +@router.post("/eligible") +async def eligiable(body: WHLGElligibilityRequest): + postcode = body.postcode or "" + postcode = postcode.lower().replace(" ", "") + + whlg_eligible_postcodes = read_csv_from_s3( + bucket_name=get_settings().DATA_BUCKET, + filepath="funding/whlg eligible postcodes.csv", + ) + whlg_eligible_postcodes = pd.DataFrame(whlg_eligible_postcodes) + whlg_eligible_postcodes['Postcode'] = whlg_eligible_postcodes['Postcode'].str.replace(' ', '', regex=False) + + is_eligible = postcode in whlg_eligible_postcodes['Postcode'].values + return {"whlg_eligible": is_eligible} + + diff --git a/backend/app/whlg/schema.py b/backend/app/whlg/schema.py index e69de29b..648ecbf3 100644 --- a/backend/app/whlg/schema.py +++ b/backend/app/whlg/schema.py @@ -0,0 +1,4 @@ +from pydantic import BaseModel, Field + +class WHLGElligibilityRequest(BaseModel): + postcode: str = Field(..., example="B93 8SY") \ No newline at end of file diff --git a/backend/run_curl.sh b/backend/run_curl.sh new file mode 100644 index 00000000..22433e39 --- /dev/null +++ b/backend/run_curl.sh @@ -0,0 +1,11 @@ +curl -X POST "http://localhost:8000/v1/whlg/eligible" \ + -H "Content-Type: application/json" \ + -d '{"postcode": "B93 8SY"}' + +curl -X POST "http://localhost:8000/v1/whlg/eligible" \ + -H "Content-Type: application/json" \ + -d '{"postcode": "BN15 0FD"}' + +curl -X POST "http://localhost:8000/v1/whlg/eligible" \ + -H "Content-Type: application/json" \ + -d '{"postcode": "DY6 0LB"}' diff --git a/backend/run_local.sh b/backend/run_local.sh new file mode 100644 index 00000000..be45a54a --- /dev/null +++ b/backend/run_local.sh @@ -0,0 +1,6 @@ +set -a +source ./.env +set +a + +uvicorn app.main:app --reload + From 05740f82a4026dc7f5de23519576dea1498b2dec Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Fri, 14 Nov 2025 13:40:29 +0000 Subject: [PATCH 047/202] uploade without workflows --- .../actions/lambda-deploy/action.yml | 86 ------------------- .../actions/terraform-deploy/action.yml | 55 ------------ .github/workflows/lambda_main.yml | 33 ------- 3 files changed, 174 deletions(-) delete mode 100644 .github/workflows/actions/lambda-deploy/action.yml delete mode 100644 .github/workflows/actions/terraform-deploy/action.yml delete mode 100644 .github/workflows/lambda_main.yml diff --git a/.github/workflows/actions/lambda-deploy/action.yml b/.github/workflows/actions/lambda-deploy/action.yml deleted file mode 100644 index 3ca0fc8d..00000000 --- a/.github/workflows/actions/lambda-deploy/action.yml +++ /dev/null @@ -1,86 +0,0 @@ -name: "Build and Push Lambda Image to ECR" -description: "Reusable action for building and pushing lambda Docker image to ECR" - -inputs: - ecr_name: - description: "Lambda name / ECR repo name" - required: true - dockerfile_path: - description: "Path to Dockerfile" - required: true - ecr_tf_dir: - description: "Path to ECR terraform directory" - required: true - lambda_tf_dir: - description: "Path to Lambda terraform directory" - required: true - aws-access-key-id: - description: "AWS access key" - required: true - aws-secret-access-key: - description: "AWS secret key" - required: true - aws-region: - description: "AWS region" - required: true - git-sha: - description: "Git commit SHA" - required: true - git-ref: - description: "Git ref name" - required: true - -runs: - using: "composite" - steps: - - uses: actions/checkout@v4 - - - name: Configure AWS credentials - uses: aws-actions/configure-aws-credentials@v4 - with: - aws-access-key-id: ${{ inputs.aws-access-key-id }} - aws-secret-access-key: ${{ inputs.aws-secret-access-key }} - aws-region: ${{ inputs.aws-region }} - - - name: Log in to Amazon ECR - id: login-ecr - uses: aws-actions/amazon-ecr-login@v2 - - - name: Deploy ECR - uses: ./.github/workflows/actions/terraform-deploy - with: - working_directory: ${{ inputs.ecr_tf_dir }} - aws-access-key-id: ${{ inputs.aws-access-key-id }} - aws-secret-access-key: ${{ inputs.aws-secret-access-key }} - aws-region: ${{ inputs.aws-region }} - - name: Set Docker image tag - id: set_tag - shell: bash - run: | - SHORT_SHA=$(echo "${{ inputs.git-sha }}" | cut -c1-7) - BRANCH=$(echo "${{ inputs.git-ref }}" | tr '/' '-') - TAG="${BRANCH}-${SHORT_SHA}" - echo "IMAGE_TAG=${TAG}" >> $GITHUB_ENV - echo "tag=$TAG" >> $GITHUB_OUTPUT - - - name: Build and push Docker image - shell: bash - run: | - IMAGE_URI=${{ steps.login-ecr.outputs.registry }}/${{ inputs.ecr_name }}:${{ steps.set_tag.outputs.tag }} - echo "Building Docker image for ${{ inputs.ecr_name }}..." - docker build -t $IMAGE_URI -f ${{ inputs.dockerfile_path }} . - - echo "Pushing to ECR..." - docker push $IMAGE_URI - - - name: Deploy Lambda - uses: ./.github/workflows/actions/terraform-deploy - with: - working_directory: ${{ inputs.lambda_tf_dir }} - aws-access-key-id: ${{ inputs.aws-access-key-id }} - aws-secret-access-key: ${{ inputs.aws-secret-access-key }} - aws-region: ${{ inputs.aws-region }} - lambda-image-tag: ${{ steps.set_tag.outputs.tag }} - - - diff --git a/.github/workflows/actions/terraform-deploy/action.yml b/.github/workflows/actions/terraform-deploy/action.yml deleted file mode 100644 index 56133299..00000000 --- a/.github/workflows/actions/terraform-deploy/action.yml +++ /dev/null @@ -1,55 +0,0 @@ -name: "Terraform Plan Shared Config" -description: "Plans shared Terraform config for Lambdas" - -inputs: - working_directory: - description: "Directory containing Terraform config" - required: true - aws-access-key-id: - description: "AWS access key" - required: true - aws-secret-access-key: - description: "AWS secret key" - required: true - aws-region: - description: "AWS region" - required: true - lambda-image-tag: - description: "Tag of the Lambda image (e.g., GitHub SHA)" - required: false - -runs: - using: "composite" - steps: - - uses: actions/checkout@v4 - - - name: Configure AWS credentials - uses: aws-actions/configure-aws-credentials@v4 - with: - aws-access-key-id: ${{ inputs.aws-access-key-id }} - aws-secret-access-key: ${{ inputs.aws-secret-access-key }} - aws-region: ${{ inputs.aws-region }} - - - name: Setup Terraform - uses: hashicorp/setup-terraform@v3 - - - name: Terraform Init - working-directory: ${{ inputs.working_directory }} - shell: bash - run: terraform init -reconfigure - - - name: Terraform Plan - working-directory: ${{ inputs.working_directory }} - shell: bash - run: | - if [ -n "${{ inputs.lambda-image-tag }}" ]; then - terraform plan -out=tfplan -var="lambda_image_tag=${{ inputs.lambda-image-tag }}" - else - terraform plan -out=tfplan - fi - - - name: Terraform Apply - working-directory: ${{ inputs.working_directory }} - shell: bash - run: terraform apply -auto-approve tfplan - diff --git a/.github/workflows/lambda_main.yml b/.github/workflows/lambda_main.yml deleted file mode 100644 index 960adbe5..00000000 --- a/.github/workflows/lambda_main.yml +++ /dev/null @@ -1,33 +0,0 @@ -# Please note, this github work flows assumes that shared-terrform is deployed in aws env -# The shared-terraform files lives in https://github.com/Hestia-Homes/survey-extraction/tree/main/deployment/lambda/lambda_shared - -name: Deploy Lambdas -on: - push: - branches: [main, feature/whlg_lambda] - -env: - AWS_REGION: eu-west-2 - -jobs: - whlg-calc: - runs-on: ubuntu-latest - permissions: - id-token: write - contents: read - - steps: - - name: Checkout repo - uses: actions/checkout@v4 - - name: Build and deploy Warm Homes Local Grant Calc (whlg-calc) - uses: ./.github/workflows/actions/lambda-deploy - with: - ecr_name: whlg_calc_adhoc_ecr - dockerfile_path: ./deployment/lambda/whlg_calculator/docker/Dockerfile - ecr_tf_dir: ./deployment/lambda/whlg_calculator/docker/ - lambda_tf_dir: ./deployment/lambda/whlg_calculator/ - aws-access-key-id: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }} - aws-secret-access-key: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY}} - aws-region: eu-west-2 - git-sha: ${{ github.sha }} - git-ref: ${{ github.ref_name }} \ No newline at end of file From c617d603a360477c36a9a463c0c262d2de800b48 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Fri, 14 Nov 2025 13:41:24 +0000 Subject: [PATCH 048/202] uploade without workflows --- .../whlg_calculator/docker/.dockerignore | 21 ----- .../lambda/whlg_calculator/docker/Dockerfile | 25 ------ .../lambda/whlg_calculator/docker/app.py | 3 - .../lambda/whlg_calculator/docker/ecr.tf | 63 -------------- .../lambda/whlg_calculator/docker/main.tf | 0 .../lambda/whlg_calculator/docker/provider.tf | 15 ---- deployment/lambda/whlg_calculator/main.tf | 0 deployment/lambda/whlg_calculator/provider.tf | 15 ---- deployment/lambda/whlg_calculator/vars.tf | 5 -- .../lambda/whlg_calculator/whlg_lambda.tf | 83 ------------------- 10 files changed, 230 deletions(-) delete mode 100644 deployment/lambda/whlg_calculator/docker/.dockerignore delete mode 100644 deployment/lambda/whlg_calculator/docker/Dockerfile delete mode 100644 deployment/lambda/whlg_calculator/docker/app.py delete mode 100644 deployment/lambda/whlg_calculator/docker/ecr.tf delete mode 100644 deployment/lambda/whlg_calculator/docker/main.tf delete mode 100644 deployment/lambda/whlg_calculator/docker/provider.tf delete mode 100644 deployment/lambda/whlg_calculator/main.tf delete mode 100644 deployment/lambda/whlg_calculator/provider.tf delete mode 100644 deployment/lambda/whlg_calculator/vars.tf delete mode 100644 deployment/lambda/whlg_calculator/whlg_lambda.tf diff --git a/deployment/lambda/whlg_calculator/docker/.dockerignore b/deployment/lambda/whlg_calculator/docker/.dockerignore deleted file mode 100644 index d587d341..00000000 --- a/deployment/lambda/whlg_calculator/docker/.dockerignore +++ /dev/null @@ -1,21 +0,0 @@ -# Ignore junk and large files -*.pdf -*.csv -*.xml -*.parquet -*.ipynb -*.mp4 -*.mov -*.jpg -*.png -*.zip -*.tar.gz -__pycache__/ -*.pyc -*.pyo -*.pyd -build/ -dist/ -.etl_cache/ -tests/ -docs/ diff --git a/deployment/lambda/whlg_calculator/docker/Dockerfile b/deployment/lambda/whlg_calculator/docker/Dockerfile deleted file mode 100644 index cdd1f8a3..00000000 --- a/deployment/lambda/whlg_calculator/docker/Dockerfile +++ /dev/null @@ -1,25 +0,0 @@ -FROM public.ecr.aws/lambda/python:3.12 - -# Install Poetry (you could pin a version if you like) -RUN curl -sSL https://install.python-poetry.org | python3 - - -# Add Poetry to PATH -ENV PATH="/root/.local/bin:$PATH" - -# Set working directory -WORKDIR /var/task - -# Copy Poetry files first to leverage Docker layer caching -COPY pyproject.toml poetry.lock README.md ./ -COPY etl/ etl/ - - -# Install dependencies into /var/task -RUN poetry config virtualenvs.create false \ - && poetry install --only main --no-interaction --no-ansi - -# Copy app code -COPY deployment/lambda/extractor_and_loader/docker/app.py ./ - -# Set Lambda handler -CMD ["app.handler"] \ No newline at end of file diff --git a/deployment/lambda/whlg_calculator/docker/app.py b/deployment/lambda/whlg_calculator/docker/app.py deleted file mode 100644 index 4dcf1a8e..00000000 --- a/deployment/lambda/whlg_calculator/docker/app.py +++ /dev/null @@ -1,3 +0,0 @@ -def handler(event, context): - print("Hello and welcome to the WHLG Calculator") - print("Please contact the tech team for implementation") \ No newline at end of file diff --git a/deployment/lambda/whlg_calculator/docker/ecr.tf b/deployment/lambda/whlg_calculator/docker/ecr.tf deleted file mode 100644 index a1501dff..00000000 --- a/deployment/lambda/whlg_calculator/docker/ecr.tf +++ /dev/null @@ -1,63 +0,0 @@ -# ECR repo -resource "aws_ecr_repository" "whlg_calc_adhoc_ecr" { - name = "whlg_calc_adhoc_ecr" -} - -# ECR policy to allow Lambda access -resource "aws_ecr_repository_policy" "whlg_calc_adhoc_ecr_access" { - repository = aws_ecr_repository.whlg_calc_adhoc_ecr.name - - policy = jsonencode({ - Version = "2008-10-17", - Statement = [{ - Sid = "AllowLambdaPull", - Effect = "Allow", - Principal = { - Service = "lambda.amazonaws.com" - }, - Action = [ - "ecr:GetDownloadUrlForLayer", - "ecr:BatchGetImage", - "ecr:BatchCheckLayerAvailability" - ] - }] - }) -} - - - -# ECR lifecycle policy to delete tagged images older than 14 days -resource "aws_ecr_lifecycle_policy" "whlg_calc_adhoc_loader_lifecycle" { - repository = aws_ecr_repository.whlg_calc_adhoc_ecr.name - - policy = jsonencode({ - "rules": [ - { - "rulePriority": 2, - "description": "Expire images older than 14 days", - "selection": { - "tagStatus": "untagged", - "countType": "sinceImagePushed", - "countUnit": "days", - "countNumber": 1 - }, - "action": { - "type": "expire" - } - }, - { - "rulePriority": 1, - "description": "Keep last 5 images", - "selection": { - "tagStatus": "tagged", - "tagPrefixList": ["feature"], - "countType": "imageCountMoreThan", - "countNumber": 5 - }, - "action": { - "type": "expire" - } - } - ] - }) -} \ No newline at end of file diff --git a/deployment/lambda/whlg_calculator/docker/main.tf b/deployment/lambda/whlg_calculator/docker/main.tf deleted file mode 100644 index e69de29b..00000000 diff --git a/deployment/lambda/whlg_calculator/docker/provider.tf b/deployment/lambda/whlg_calculator/docker/provider.tf deleted file mode 100644 index 5f0fef0f..00000000 --- a/deployment/lambda/whlg_calculator/docker/provider.tf +++ /dev/null @@ -1,15 +0,0 @@ -terraform { - required_providers { - aws = { - source = "hashicorp/aws" - version = "~> 6.3.0" - } - } - backend "s3" { - bucket = "whlg-calc-tf-state" - region = "eu-west-2" - key = "env:/dev/lambda/ecr/whlg-calc.tfstate" - } - - required_version = ">= 1.2.0" -} diff --git a/deployment/lambda/whlg_calculator/main.tf b/deployment/lambda/whlg_calculator/main.tf deleted file mode 100644 index e69de29b..00000000 diff --git a/deployment/lambda/whlg_calculator/provider.tf b/deployment/lambda/whlg_calculator/provider.tf deleted file mode 100644 index df9abf1c..00000000 --- a/deployment/lambda/whlg_calculator/provider.tf +++ /dev/null @@ -1,15 +0,0 @@ -terraform { - required_providers { - aws = { - source = "hashicorp/aws" - version = "~> 6.3.0" - } - } - backend "s3" { - bucket = "whlg-calc-tf-state" - region = "eu-west-2" - key = "env:/dev/lambda/eachlambda/whlg_calc_lambda.tfstate" - } - - required_version = ">= 1.2.0" -} diff --git a/deployment/lambda/whlg_calculator/vars.tf b/deployment/lambda/whlg_calculator/vars.tf deleted file mode 100644 index ecdf359d..00000000 --- a/deployment/lambda/whlg_calculator/vars.tf +++ /dev/null @@ -1,5 +0,0 @@ -variable "lambda_image_tag" { - description = "Docker image tag (e.g. GitHub SHA)" - type = string - default = "local-dev-latest" -} \ No newline at end of file diff --git a/deployment/lambda/whlg_calculator/whlg_lambda.tf b/deployment/lambda/whlg_calculator/whlg_lambda.tf deleted file mode 100644 index 0a5433a9..00000000 --- a/deployment/lambda/whlg_calculator/whlg_lambda.tf +++ /dev/null @@ -1,83 +0,0 @@ -# Reference existing IAM role -data "aws_iam_role" "lambda_exec_role" { - name = "lambda-exec-role" -} - -# Reference existing ECR repository -data "aws_ecr_repository" "whlg_calc_adhoc_ecr" { - name = "whlg_calc_adhoc_ecr" -} - -# SQS queue -resource "aws_sqs_queue" "whlg_calc_adhoc_queue" { - name = "whlg_calc_adhoc-queue" - visibility_timeout_seconds = 1800 # 30 minutes (>= 300s and ~6x Lambda timeout) -} - - -# Custom IAM policy specific to lambda_example -resource "aws_iam_policy" "whlg_calc_adhoc_policy" { - name = "walthamforest_adhoc_policy_lambda" - - policy = jsonencode({ - Version = "2012-10-17", - Statement = [ - { - Effect = "Allow", - Action = [ - "sqs:ReceiveMessage", - "sqs:DeleteMessage", - "sqs:GetQueueAttributes", - "sqs:GetQueueUrl", - "sqs:ChangeMessageVisibility" - ], - Resource = aws_sqs_queue.whlg_calc_adhoc_queue.arn - }, - { - Effect = "Allow", - Action = [ - "ecr:GetDownloadUrlForLayer", - "ecr:BatchGetImage", - "ecr:BatchCheckLayerAvailability" - ], - Resource = data.aws_ecr_repository.whlg_calc_adhoc_ecr.arn - }, - { - Effect = "Allow", - Action = ["ecr:GetAuthorizationToken"], - Resource = "*" - } - ] - }) -} - -resource "aws_iam_role_policy_attachment" "whlg_calc_adhoc_policy_attach" { - role = data.aws_iam_role.lambda_exec_role.name - policy_arn = aws_iam_policy.whlg_calc_adhoc_policy.arn -} - -# Lambda function -resource "aws_lambda_function" "whlg_calc_adhoc" { - function_name = "whlg_calc_adhoc" - role = data.aws_iam_role.lambda_exec_role.arn - package_type = "Image" - image_uri = "${data.aws_ecr_repository.whlg_calc_adhoc_ecr.repository_url}:${var.lambda_image_tag}" - # Increase timeout (max 900 sec / 15 min) - # timeout = 300 # e.g. 5 minutes - - # Increase memory (default 128 MB) - memory_size = 2048 # try 1024 or 2048 MB to start - - # environment { - # variables = { - # DATABASE_URL = "postgresql://postgres:makingwarmhomes@terraform-20250331175522503500000002.cdgzupxvdyp0.eu-west-2.rds.amazonaws.com:5432/surveyDB" - # } - # } -} - -# SQS trigger -resource "aws_lambda_event_source_mapping" "whlg_calc_adhoc_trigger" { - event_source_arn = aws_sqs_queue.whlg_calc_adhoc_queue.arn - function_name = aws_lambda_function.whlg_calc_adhoc.arn - batch_size = 1 -} From d98ae7db7b7713ad1fc1a9d7d7d73c1f91cedfb6 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Fri, 14 Nov 2025 14:25:28 +0000 Subject: [PATCH 049/202] added task and sub task interface --- backend/app/db/functions/tasks/Tasks.py | 182 +++++++++++++++++++++++- backend/app/db/models/tasks.py | 36 +++++ backend/app/tasks/__init__.py | 0 backend/app/tasks/router.py | 87 +++++++++++ backend/app/tasks/schema.py | 21 +++ 5 files changed, 320 insertions(+), 6 deletions(-) create mode 100644 backend/app/tasks/__init__.py create mode 100644 backend/app/tasks/router.py create mode 100644 backend/app/tasks/schema.py diff --git a/backend/app/db/functions/tasks/Tasks.py b/backend/app/db/functions/tasks/Tasks.py index d3f06d33..18900c83 100644 --- a/backend/app/db/functions/tasks/Tasks.py +++ b/backend/app/db/functions/tasks/Tasks.py @@ -1,10 +1,180 @@ +from __future__ import annotations -class TasksInterface: - def __init__(self): - pass - +# ---- Standard Library ---- +from typing import Optional, Dict, Any +from datetime import datetime, timezone +from uuid import UUID +import json + +# ---- SQLModel / SQLAlchemy ---- +from sqlmodel import Session, select + +# ---- DB Session ---- +from backend.app.db.connection import get_db_session + +# ---- Models ---- +from backend.app.db.models.tasks import Task, SubTask +# ============================================================ +# SubTask Interface +# ============================================================ class SubTaskInterface: - def __init__(self) - pass \ No newline at end of file + """ + CRUD operations for SubTask + cascading Task progress updates. + """ + + def create_subtask(self, task_id: UUID, inputs: Optional[Dict[str, Any]] = None): + now = datetime.now(timezone.utc) + + with get_db_session() as session: + task = session.get(Task, task_id) + if not task: + raise ValueError(f"Task {task_id} not found") + + subtask = SubTask( + taskId=task_id, + inputs=json.dumps(inputs) if inputs else None, + jobStarted=now, + ) + + session.add(subtask) + session.commit() + session.refresh(subtask) + + # Recalculate the parent task status + self._update_task_progress(session, task_id) + + return subtask + + def update_subtask_status(self, subtask_id: UUID, status: str): + now = datetime.now(timezone.utc) + + with get_db_session() as session: + subtask = session.get(SubTask, subtask_id) + if not subtask: + raise ValueError(f"SubTask {subtask_id} not found") + + normalized = status.lower() + + # Start time + if normalized == "in progress" and subtask.jobStarted is None: + subtask.jobStarted = now + + # Completed time + if normalized == "complete": + subtask.jobCompleted = now + + subtask.status = normalized + subtask.updatedAt = now + + session.add(subtask) + session.commit() + + # Re-evaluate the task + self._update_task_progress(session, subtask.taskId) + + session.refresh(subtask) + return subtask + + # -------------------------------------------------------- + # Task Progress Calculation + # -------------------------------------------------------- + def _update_task_progress(self, session: Session, task_id: UUID): + task = session.get(Task, task_id) + if not task: + return + + subtasks = session.exec( + select(SubTask).where(SubTask.taskId == task_id) + ).all() + + if not subtasks: + return + + statuses = [st.status.lower() for st in subtasks] + now = datetime.now(timezone.utc) + + # Priority: + # failed > in progress > complete + if "failed" in statuses: + task.status = "failed" + task.jobCompleted = now + + elif all(s == "complete" for s in statuses): + task.status = "complete" + task.jobCompleted = now + + else: + task.status = "in progress" + if task.jobStarted is None: + task.jobStarted = now + task.jobCompleted = None # still running + + task.updatedAt = now + session.add(task) + session.commit() + + +# ============================================================ +# Task Interface +# ============================================================ +class TasksInterface: + """ + High-level operations for Task records. + """ + + def create_task( + self, + *, + task_source: str, + service: Optional[str] = None, + inputs: Optional[Dict[str, Any]] = None, + ): + now = datetime.now(timezone.utc) + + # Step 1: Create the task + with get_db_session() as session: + task = Task( + taskSource=task_source, + service=service, + jobStarted=now, + ) + + session.add(task) + session.commit() + session.refresh(task) + + # Step 2: Create first subtask using SubTaskInterface + subtask_interface = SubTaskInterface() + subtask = subtask_interface.create_subtask( + task_id=task.id, + inputs=inputs + ) + + return task.id, subtask.id + + def update_task_status(self, task_id: UUID, status: str): + now = datetime.now(timezone.utc) + + with get_db_session() as session: + task = session.get(Task, task_id) + if not task: + raise ValueError(f"Task {task_id} not found") + + normalized = status.lower() + + if normalized == "in progress" and task.jobStarted is None: + task.jobStarted = now + + if normalized == "complete": + task.jobCompleted = now + + task.status = normalized + task.updatedAt = now + + session.add(task) + session.commit() + session.refresh(task) + + return task diff --git a/backend/app/db/models/tasks.py b/backend/app/db/models/tasks.py index ed5b3710..d8007dcd 100644 --- a/backend/app/db/models/tasks.py +++ b/backend/app/db/models/tasks.py @@ -35,3 +35,39 @@ class Task(SQLModel, table=True): # Relationship subTasks: List["SubTask"] = Relationship(back_populates="task") + + +class SubTask(SQLModel, table=True): + __tablename__ = "sub_task" + + id: UUID = Field( + default_factory=uuid4, + primary_key=True, + index=True, + ) + + taskId: UUID = Field( + foreign_key="tasks.id", + alias="task_id", + ) + + jobStarted: Optional[datetime] = Field( + default=None, alias="job_started" + ) + jobCompleted: Optional[datetime] = Field( + default=None, alias="job_completed" + ) + + status: str = Field(default="In Progress") + + inputs: Optional[str] = None + outputs: Optional[str] = None + cloudLogsURL: Optional[str] = Field(alias="cloud_logs_url") + + updatedAt: datetime = Field( + default_factory=datetime.utcnow, + alias="updated_at", + ) + + # Relationship + task: Optional[Task] = Relationship(back_populates="subTasks") diff --git a/backend/app/tasks/__init__.py b/backend/app/tasks/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/backend/app/tasks/router.py b/backend/app/tasks/router.py new file mode 100644 index 00000000..d324f9ba --- /dev/null +++ b/backend/app/tasks/router.py @@ -0,0 +1,87 @@ +from fastapi import APIRouter, Depends, HTTPException +from uuid import UUID + +from backend.app.dependencies import validate_token +from backend.app.tasks.schema import ( + CreateTaskRequest, + UpdateTaskStatusRequest, + CreateSubTaskRequest, + UpdateSubTaskStatusRequest, +) + +from backend.app.db.functions.tasks.Tasks import TasksInterface, SubTaskInterface +from backend.app.db.connection import get_db_session +from backend.app.db.models.tasks import Task, SubTask +from sqlmodel import select + + +router = APIRouter( + prefix="/tasks", + tags=["tasks"], + dependencies=[Depends(validate_token)], +) + + +@router.post("/", summary="Create a new task and its first subtask") +async def create_task(req: CreateTaskRequest): + tasks = TasksInterface() + task_id, subtask_id = tasks.create_task( + task_source=req.task_source, + service=req.service, + inputs=req.inputs, + ) + return {"task_id": task_id, "subtask_id": subtask_id} + + +@router.get("/{task_id}", summary="Get a task and its subtasks") +async def get_task(task_id: UUID): + with get_db_session() as session: + task = session.get(Task, task_id) + if not task: + raise HTTPException(status_code=404, detail="Task not found") + + subtasks = session.exec( + select(SubTask).where(SubTask.taskId == task_id) + ).all() + + # Deserialize JSON inputs back to dict + formatted = [] + for st in subtasks: + formatted.append({ + **st.dict(), + "inputs": json.loads(st.inputs) if st.inputs else None + }) + + return { + "task": task, + "subtasks": formatted, + } + + +@router.put("/{task_id}/status", summary="Update a task's status") +async def update_task_status(task_id: UUID, req: UpdateTaskStatusRequest): + tasks = TasksInterface() + try: + updated = tasks.update_task_status(task_id, req.status) + return {"task_id": updated.id, "status": updated.status} + except ValueError as e: + raise HTTPException(status_code=404, detail=str(e)) + +@router.post("/{task_id}/subtasks", summary="Create a new subtask under a task") +async def create_subtask(task_id: UUID, req: CreateSubTaskRequest): + subtasks = SubTaskInterface() + try: + st = subtasks.create_subtask(task_id, req.inputs) + return {"subtask_id": st.id, "task_id": task_id} + except ValueError as e: + raise HTTPException(status_code=404, detail=str(e)) + + +@router.put("/subtasks/{subtask_id}/status", summary="Update a subtask's status") +async def update_subtask_status(subtask_id: UUID, req: UpdateSubTaskStatusRequest): + subtasks = SubTaskInterface() + try: + st = subtasks.update_subtask_status(subtask_id, req.status) + return {"subtask_id": st.id, "status": st.status} + except ValueError as e: + raise HTTPException(status_code=404, detail=str(e)) diff --git a/backend/app/tasks/schema.py b/backend/app/tasks/schema.py new file mode 100644 index 00000000..66be61e7 --- /dev/null +++ b/backend/app/tasks/schema.py @@ -0,0 +1,21 @@ +from typing import Optional, Any, Dict +from uuid import UUID +from pydantic import BaseModel + + +class CreateTaskRequest(BaseModel): + task_source: str + service: Optional[str] = None + inputs: Optional[Dict[str, Any]] = None # JSON object + + +class UpdateTaskStatusRequest(BaseModel): + status: str + + +class CreateSubTaskRequest(BaseModel): + inputs: Optional[Dict[str, Any]] = None # JSON object + + +class UpdateSubTaskStatusRequest(BaseModel): + status: str From 47be3ffea37ee03f820c33fcbf0a37c974638594 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Fri, 14 Nov 2025 14:26:17 +0000 Subject: [PATCH 050/202] added tasks interface --- backend/app/main.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/backend/app/main.py b/backend/app/main.py index 261e2f34..f0ab4d86 100644 --- a/backend/app/main.py +++ b/backend/app/main.py @@ -8,6 +8,7 @@ from mangum import Mangum from backend.app.portfolio import router as portfolio_router from backend.app.whlg import router as whlg_router from backend.app.plan import router as plan_router +from backend.app.tasks import router as tasks_router from backend.app.dependencies import validate_api_key from backend.app.config import get_settings @@ -57,6 +58,7 @@ async def log_requests(request: Request, call_next): app.include_router(portfolio_router.router, prefix="/v1") app.include_router(plan_router.router, prefix="/v1") app.include_router(whlg_router.router, prefix="/v1") +app.include_router(tasks_router.router, prefix="/v1") if get_settings().ENVIRONMENT == "local": from app.local import router as local_router From 68a5de28e2b5b34d82e12eb3e1c869c00db4594b Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Fri, 14 Nov 2025 15:02:33 +0000 Subject: [PATCH 051/202] added tasks so Khalim can reeview it --- backend/app/db/functions/tasks/Tasks.py | 155 ++++++++++++++++++++---- backend/app/tasks/router.py | 54 ++++++++- backend/app/tasks/schema.py | 5 + 3 files changed, 189 insertions(+), 25 deletions(-) diff --git a/backend/app/db/functions/tasks/Tasks.py b/backend/app/db/functions/tasks/Tasks.py index 18900c83..06e1c6fe 100644 --- a/backend/app/db/functions/tasks/Tasks.py +++ b/backend/app/db/functions/tasks/Tasks.py @@ -24,6 +24,9 @@ class SubTaskInterface: CRUD operations for SubTask + cascading Task progress updates. """ + # -------------------------------------------------------- + # CREATE SUBTASK + # -------------------------------------------------------- def create_subtask(self, task_id: UUID, inputs: Optional[Dict[str, Any]] = None): now = datetime.now(timezone.utc) @@ -35,18 +38,22 @@ class SubTaskInterface: subtask = SubTask( taskId=task_id, inputs=json.dumps(inputs) if inputs else None, - jobStarted=now, + status="waiting", + jobStarted=None, + jobCompleted=None, ) session.add(subtask) session.commit() session.refresh(subtask) - # Recalculate the parent task status + # Recalculate parent task progress self._update_task_progress(session, task_id) - return subtask + # -------------------------------------------------------- + # UPDATE STATUS (in progress, complete, failed) + # -------------------------------------------------------- def update_subtask_status(self, subtask_id: UUID, status: str): now = datetime.now(timezone.utc) @@ -57,12 +64,12 @@ class SubTaskInterface: normalized = status.lower() - # Start time + # When job really starts if normalized == "in progress" and subtask.jobStarted is None: subtask.jobStarted = now - # Completed time - if normalized == "complete": + # Completed or failed + if normalized in ("complete", "failed"): subtask.jobCompleted = now subtask.status = normalized @@ -71,14 +78,80 @@ class SubTaskInterface: session.add(subtask) session.commit() - # Re-evaluate the task + # Recalculate task status self._update_task_progress(session, subtask.taskId) session.refresh(subtask) return subtask # -------------------------------------------------------- - # Task Progress Calculation + # UPDATE OUTPUTS + # -------------------------------------------------------- + def update_subtask_output(self, subtask_id: UUID, outputs: Dict[str, Any]): + now = datetime.now(timezone.utc) + + with get_db_session() as session: + subtask = session.get(SubTask, subtask_id) + if not subtask: + raise ValueError(f"SubTask {subtask_id} not found") + + subtask.outputs = json.dumps(outputs) + subtask.updatedAt = now + + session.add(subtask) + session.commit() + session.refresh(subtask) + return subtask + + # -------------------------------------------------------- + # UPDATE CLOUD LOGS URL + # -------------------------------------------------------- + def update_subtask_logs(self, subtask_id: UUID, cloud_logs_url: str): + now = datetime.now(timezone.utc) + + with get_db_session() as session: + subtask = session.get(SubTask, subtask_id) + if not subtask: + raise ValueError(f"SubTask {subtask_id} not found") + + subtask.cloudLogsURL = cloud_logs_url + subtask.updatedAt = now + + session.add(subtask) + session.commit() + session.refresh(subtask) + return subtask + + # -------------------------------------------------------- + # SET BOTH OUTPUT + LOGS + # -------------------------------------------------------- + def set_subtask_result( + self, + subtask_id: UUID, + outputs: Optional[Dict[str, Any]] = None, + cloud_logs_url: Optional[str] = None, + ): + now = datetime.now(timezone.utc) + + with get_db_session() as session: + subtask = session.get(SubTask, subtask_id) + if not subtask: + raise ValueError(f"SubTask {subtask_id} not found") + + if outputs is not None: + subtask.outputs = json.dumps(outputs) + + if cloud_logs_url is not None: + subtask.cloudLogsURL = cloud_logs_url + + subtask.updatedAt = now + session.add(subtask) + session.commit() + session.refresh(subtask) + return subtask + + # -------------------------------------------------------- + # TASK PROGRESS CALCULATION # -------------------------------------------------------- def _update_task_progress(self, session: Session, task_id: UUID): task = session.get(Task, task_id) @@ -89,14 +162,9 @@ class SubTaskInterface: select(SubTask).where(SubTask.taskId == task_id) ).all() - if not subtasks: - return - - statuses = [st.status.lower() for st in subtasks] + statuses = [s.status.lower() for s in subtasks] now = datetime.now(timezone.utc) - # Priority: - # failed > in progress > complete if "failed" in statuses: task.status = "failed" task.jobCompleted = now @@ -105,16 +173,61 @@ class SubTaskInterface: task.status = "complete" task.jobCompleted = now - else: + elif "in progress" in statuses: task.status = "in progress" if task.jobStarted is None: task.jobStarted = now - task.jobCompleted = None # still running + + else: + # All waiting + task.status = "waiting" + task.jobStarted = None + task.jobCompleted = None task.updatedAt = now session.add(task) session.commit() + def finalize_subtask( + self, + subtask_id: UUID, + status: str, + outputs: Optional[Dict[str, Any]], + cloud_logs_url: Optional[str] + ): + now = datetime.now(timezone.utc) + + with get_db_session() as session: + subtask = session.get(SubTask, subtask_id) + if not subtask: + raise ValueError(f"SubTask {subtask_id} not found") + + normalized = status.lower() + if normalized not in ("complete", "failed"): + raise ValueError("Status must be 'complete' or 'failed'") + + # Set outputs + if outputs is not None: + subtask.outputs = json.dumps(outputs) + + # Set logs + if cloud_logs_url is not None: + subtask.cloudLogsURL = cloud_logs_url + + # Status + timestamps + subtask.status = normalized + subtask.jobCompleted = now + subtask.updatedAt = now + + session.add(subtask) + session.commit() + + # Update parent task (complete/failed) + self._update_task_progress(session, subtask.taskId) + + session.refresh(subtask) + return subtask + # ============================================================ # Task Interface @@ -133,23 +246,24 @@ class TasksInterface: ): now = datetime.now(timezone.utc) - # Step 1: Create the task with get_db_session() as session: task = Task( taskSource=task_source, service=service, - jobStarted=now, + status="waiting", + jobStarted=None, + jobCompleted=None, ) session.add(task) session.commit() session.refresh(task) - # Step 2: Create first subtask using SubTaskInterface + # Create first subtask in waiting state subtask_interface = SubTaskInterface() subtask = subtask_interface.create_subtask( task_id=task.id, - inputs=inputs + inputs=inputs, ) return task.id, subtask.id @@ -176,5 +290,4 @@ class TasksInterface: session.add(task) session.commit() session.refresh(task) - return task diff --git a/backend/app/tasks/router.py b/backend/app/tasks/router.py index d324f9ba..2a45a303 100644 --- a/backend/app/tasks/router.py +++ b/backend/app/tasks/router.py @@ -1,5 +1,6 @@ from fastapi import APIRouter, Depends, HTTPException from uuid import UUID +import json # ← REQUIRED for json.loads from backend.app.dependencies import validate_token from backend.app.tasks.schema import ( @@ -7,9 +8,12 @@ from backend.app.tasks.schema import ( UpdateTaskStatusRequest, CreateSubTaskRequest, UpdateSubTaskStatusRequest, + FinalizeSubTaskRequest, ) +# Correct location of interfaces from backend.app.db.functions.tasks.Tasks import TasksInterface, SubTaskInterface + from backend.app.db.connection import get_db_session from backend.app.db.models.tasks import Task, SubTask from sqlmodel import select @@ -22,6 +26,9 @@ router = APIRouter( ) +# ============================================================ +# Create Task +# ============================================================ @router.post("/", summary="Create a new task and its first subtask") async def create_task(req: CreateTaskRequest): tasks = TasksInterface() @@ -33,6 +40,9 @@ async def create_task(req: CreateTaskRequest): return {"task_id": task_id, "subtask_id": subtask_id} +# ============================================================ +# Get Task + Subtasks +# ============================================================ @router.get("/{task_id}", summary="Get a task and its subtasks") async def get_task(task_id: UUID): with get_db_session() as session: @@ -44,12 +54,13 @@ async def get_task(task_id: UUID): select(SubTask).where(SubTask.taskId == task_id) ).all() - # Deserialize JSON inputs back to dict formatted = [] for st in subtasks: formatted.append({ **st.dict(), - "inputs": json.loads(st.inputs) if st.inputs else None + "inputs": json.loads(st.inputs) if st.inputs else None, + "outputs": json.loads(st.outputs) if st.outputs else None, + "cloud_logs_url": st.cloudLogsURL, }) return { @@ -58,6 +69,9 @@ async def get_task(task_id: UUID): } +# ============================================================ +# Update Task Status +# ============================================================ @router.put("/{task_id}/status", summary="Update a task's status") async def update_task_status(task_id: UUID, req: UpdateTaskStatusRequest): tasks = TasksInterface() @@ -67,17 +81,24 @@ async def update_task_status(task_id: UUID, req: UpdateTaskStatusRequest): except ValueError as e: raise HTTPException(status_code=404, detail=str(e)) + +# ============================================================ +# Create Additional Subtask +# ============================================================ @router.post("/{task_id}/subtasks", summary="Create a new subtask under a task") async def create_subtask(task_id: UUID, req: CreateSubTaskRequest): subtasks = SubTaskInterface() try: st = subtasks.create_subtask(task_id, req.inputs) - return {"subtask_id": st.id, "task_id": task_id} + return {"subtask_id": st.id, "task_id": task_id, "status": st.status} except ValueError as e: raise HTTPException(status_code=404, detail=str(e)) -@router.put("/subtasks/{subtask_id}/status", summary="Update a subtask's status") +# ============================================================ +# Update Subtask Status +# ============================================================ +@router.put("/subtask/{subtask_id}/status", summary="Update a subtask's status") async def update_subtask_status(subtask_id: UUID, req: UpdateSubTaskStatusRequest): subtasks = SubTaskInterface() try: @@ -85,3 +106,28 @@ async def update_subtask_status(subtask_id: UUID, req: UpdateSubTaskStatusReques return {"subtask_id": st.id, "status": st.status} except ValueError as e: raise HTTPException(status_code=404, detail=str(e)) + + +# === +# Sub task is complete +@router.post("/subtask/{subtask_id}/finalize", summary="Finalize a subtask with status, outputs, logs") +async def finalize_subtask(subtask_id: UUID, req: FinalizeSubTaskRequest): + subtasks = SubTaskInterface() + + try: + st = subtasks.finalize_subtask( + subtask_id=subtask_id, + status=req.status, + outputs=req.outputs, + cloud_logs_url=req.cloud_logs_url + ) + + return { + "subtask_id": st.id, + "status": st.status, + "outputs": req.outputs, + "cloud_logs_url": req.cloud_logs_url, + } + + except ValueError as e: + raise HTTPException(status_code=400, detail=str(e)) diff --git a/backend/app/tasks/schema.py b/backend/app/tasks/schema.py index 66be61e7..b1a923b3 100644 --- a/backend/app/tasks/schema.py +++ b/backend/app/tasks/schema.py @@ -19,3 +19,8 @@ class CreateSubTaskRequest(BaseModel): class UpdateSubTaskStatusRequest(BaseModel): status: str + +class FinalizeSubTaskRequest(BaseModel): + status: str # "complete" or "failed" + outputs: Optional[Dict[str, Any]] = None + cloud_logs_url: Optional[str] = None \ No newline at end of file From d5b7fb21b3e405e20d05acf498d399f602a12bff Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Fri, 14 Nov 2025 15:36:58 +0000 Subject: [PATCH 052/202] add this to dev so i can test --- backend/app/tasks/router.py | 56 +++++++++++++++++++++++++++++++++++++ backend/app/tasks/schema.py | 7 ++++- 2 files changed, 62 insertions(+), 1 deletion(-) diff --git a/backend/app/tasks/router.py b/backend/app/tasks/router.py index 2a45a303..90b62dd1 100644 --- a/backend/app/tasks/router.py +++ b/backend/app/tasks/router.py @@ -9,6 +9,7 @@ from backend.app.tasks.schema import ( CreateSubTaskRequest, UpdateSubTaskStatusRequest, FinalizeSubTaskRequest, + TaskSqsTriggerRequest ) # Correct location of interfaces @@ -131,3 +132,58 @@ async def finalize_subtask(subtask_id: UUID, req: FinalizeSubTaskRequest): except ValueError as e: raise HTTPException(status_code=400, detail=str(e)) + + +# for testing: + +import boto3 +import json +from backend.app.tasks.schema import TaskSqsTriggerRequest +from backend.app.db.functions.tasks.Tasks import TasksInterface, SubTaskInterface +from backend.app.config import get_settings + +sqs = boto3.client("sqs") + +@router.post("/trigger", summary="Create task + subtask and publish to SQS", status_code=202) +async def trigger_task(req: TaskSqsTriggerRequest): + """ + Creates a Task + SubTask, then pushes the SubTask into SQS so a Lambda can process it. + If inputs are empty, automatically replaced with {}. + """ + + settings = get_settings() + + tasks = TasksInterface() + + # ---- Normalize empty inputs ---- + inputs = req.inputs or {} # ensures {} even if null + + # ---- 1. Create Task + SubTask ---- + task_id, subtask_id = tasks.create_task( + task_source=req.task_source, + service=req.service, + inputs=inputs, + ) + + # ---- 2. Prepare SQS payload ---- + sqs_payload = { + "subtask_id": str(subtask_id), + "params": inputs, + } + + try: + response = sqs.send_message( + QueueUrl=f"https://sqs.{settings.AWS_REGION}.amazonaws.com/" + f"{settings.AWS_ACCOUNT_ID}/lambda-example-queue", + MessageBody=json.dumps(sqs_payload) + ) + except Exception as e: + raise HTTPException(status_code=500, detail=f"SQS error: {e}") + + return { + "message": "Task triggered", + "task_id": task_id, + "subtask_id": subtask_id, + "sqs_message_id": response.get("MessageId"), + "inputs_sent": inputs, + } \ No newline at end of file diff --git a/backend/app/tasks/schema.py b/backend/app/tasks/schema.py index b1a923b3..a5b4424b 100644 --- a/backend/app/tasks/schema.py +++ b/backend/app/tasks/schema.py @@ -23,4 +23,9 @@ class UpdateSubTaskStatusRequest(BaseModel): class FinalizeSubTaskRequest(BaseModel): status: str # "complete" or "failed" outputs: Optional[Dict[str, Any]] = None - cloud_logs_url: Optional[str] = None \ No newline at end of file + cloud_logs_url: Optional[str] = None + +class TaskSqsTriggerRequest(BaseModel): + task_source: str + service: Optional[str] = None + inputs: Dict[str, Any] # forwarded into SubTask.inputs + SQS message \ No newline at end of file From 30b7370ede0fbac7b8a7e226acac40c0423d4626 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Fri, 14 Nov 2025 16:32:12 +0000 Subject: [PATCH 053/202] tag numpy --- backend/app/requirements/requirements.txt | 1 + serverless.yml | 2 ++ 2 files changed, 3 insertions(+) diff --git a/backend/app/requirements/requirements.txt b/backend/app/requirements/requirements.txt index 8a151e83..e850948d 100644 --- a/backend/app/requirements/requirements.txt +++ b/backend/app/requirements/requirements.txt @@ -6,6 +6,7 @@ psycopg2-binary==2.9.10 python-jose==3.3.0 cryptography==43.0.3 mangum==0.19.0 +numpy==1.26.4 # AWS boto3==1.35.44 # Data diff --git a/serverless.yml b/serverless.yml index 6eea03eb..b1186191 100644 --- a/serverless.yml +++ b/serverless.yml @@ -39,6 +39,8 @@ custom: pythonRequirements: fileName: backend/app/requirements/requirements.txt dockerizePip: true + useDocker: false + enabled: false customDomain: domainName: api.${self:provider.environment.DOMAIN_NAME} createRoute53Record: true From 56902d48ce2060054c4c0da2473f0001c03e6cb2 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Fri, 14 Nov 2025 16:32:59 +0000 Subject: [PATCH 054/202] tag numpy --- serverless.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/serverless.yml b/serverless.yml index b1186191..6eea03eb 100644 --- a/serverless.yml +++ b/serverless.yml @@ -39,8 +39,6 @@ custom: pythonRequirements: fileName: backend/app/requirements/requirements.txt dockerizePip: true - useDocker: false - enabled: false customDomain: domainName: api.${self:provider.environment.DOMAIN_NAME} createRoute53Record: true From a7dd4b636fc7bdb572fb08052af870712b0e4ff7 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Fri, 14 Nov 2025 16:42:18 +0000 Subject: [PATCH 055/202] we like minimal --- backend/app/requirements/requirements.txt | 2 -- 1 file changed, 2 deletions(-) diff --git a/backend/app/requirements/requirements.txt b/backend/app/requirements/requirements.txt index e850948d..7d5fb26b 100644 --- a/backend/app/requirements/requirements.txt +++ b/backend/app/requirements/requirements.txt @@ -6,7 +6,6 @@ psycopg2-binary==2.9.10 python-jose==3.3.0 cryptography==43.0.3 mangum==0.19.0 -numpy==1.26.4 # AWS boto3==1.35.44 # Data @@ -14,7 +13,6 @@ openpyxl==3.1.2 # Basic pytz uvicorn[standard] -pandas ipykernel sqlmodel From 4dc233e010944b1ad7402682edc231f469b5a945 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Fri, 14 Nov 2025 16:43:26 +0000 Subject: [PATCH 056/202] more minmal --- .devcontainer/Dockerfile | 2 +- .devcontainer/requirements.txt | 17 +++++++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) create mode 100644 .devcontainer/requirements.txt diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index c061c9f8..56c366f4 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -29,7 +29,7 @@ ENV PIP_NO_CACHE_DIR=1 PIP_DISABLE_PIP_VERSION_CHECK=1 # Model # ADD asset_list/requirements.txt requirements.txt # FASTAPI backend -ADD backend/app/requirements/requirements.txt requirements.txt +ADD .devcontainer/requirements.txt requirements.txt RUN pip install -r requirements.txt # 5) Workdir diff --git a/.devcontainer/requirements.txt b/.devcontainer/requirements.txt new file mode 100644 index 00000000..d8c51f19 --- /dev/null +++ b/.devcontainer/requirements.txt @@ -0,0 +1,17 @@ +# fastapi +fastapi==0.115.2 +sqlalchemy==2.0.36 +pydantic-settings==2.6.0 +psycopg2-binary==2.9.10 +python-jose==3.3.0 +cryptography==43.0.3 +mangum==0.19.0 +# AWS +boto3==1.35.44 +# Data +openpyxl==3.1.2 +# Basic +pytz +uvicorn[standard] +sqlmodel + From e73046b37b03e7f1169e2362dc16fc2fc267cdd8 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Fri, 14 Nov 2025 16:46:55 +0000 Subject: [PATCH 057/202] more minimal --- backend/app/requirements/requirements.txt | 2 -- 1 file changed, 2 deletions(-) diff --git a/backend/app/requirements/requirements.txt b/backend/app/requirements/requirements.txt index 7d5fb26b..dff7a546 100644 --- a/backend/app/requirements/requirements.txt +++ b/backend/app/requirements/requirements.txt @@ -12,7 +12,5 @@ boto3==1.35.44 openpyxl==3.1.2 # Basic pytz -uvicorn[standard] -ipykernel sqlmodel From 5d106a4f462c02e515c9d7c948540de8756f34cb Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 14 Nov 2025 16:53:52 +0000 Subject: [PATCH 058/202] pfp sal --- asset_list/app.py | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/asset_list/app.py b/asset_list/app.py index b832a3e8..ec47b07d 100644 --- a/asset_list/app.py +++ b/asset_list/app.py @@ -59,6 +59,40 @@ def app(): Property UPRN """ + # PFP + data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/Nov 2025 Inspections" + data_filename = "Inspections List - PFP (1).xlsx" + sheet_name = "Sheet1" + postcode_column = 'Postcode' + address1_column = "Address 1" + address1_method = None + fulladdress_column = None + address_cols_to_concat = ["Address 1", "Address 2", "Address 3"] + missing_postcodes_method = None + landlord_year_built = None + landlord_os_uprn = None + landlord_property_type = "Archetype" # Using inspections + landlord_built_form = "Archetype 2" # Using inspections + landlord_wall_construction = None + landlord_roof_construction = None + landlord_heating_system = None + landlord_existing_pv = None + landlord_property_id = "UPRN" + landlord_sap = None + outcomes_filename = None + outcomes_sheetname = None + outcomes_postcode = None + outcomes_houseno = None + outcomes_id = None + outcomes_address = None + master_filepaths = [] + master_id_colnames = [] + master_to_asset_list_filepath = None + phase = False + ecosurv_landlords = None + asset_list_header = 0 + landlord_block_reference = None + # Stonewater Solar data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/October 2025 Solar" data_filename = "Copy of AP Stonewater Ammended address list - PV AM Amended - Khalim initial review.xlsx" From e13cc52c96131e685c6135408778121680a1b51b Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 14 Nov 2025 17:03:28 +0000 Subject: [PATCH 059/202] If a property is EPC D or above, and is private, it is NOT eligible for ECO4 funding --- recommendations/optimiser/funding_optimiser.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/recommendations/optimiser/funding_optimiser.py b/recommendations/optimiser/funding_optimiser.py index 417363cd..855d7e5c 100644 --- a/recommendations/optimiser/funding_optimiser.py +++ b/recommendations/optimiser/funding_optimiser.py @@ -808,6 +808,9 @@ def _find_measure(input_measures, measure_type): def _make_solar_heating_funding_paths( p, input_measures, funding_paths, remaining_insulation_type, housing_type, funding: Funding ): + # If a property is private and EPC D or above, it's not eligible + if housing_type == "Private" and p.data["current-energy-rating"] in ["D", "C", "B", "A"]: + return funding_paths # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Solar PV with existing eligible heating system # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ From 3624b34dd0e497ae19e33eff0868a6c730e651d8 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 14 Nov 2025 21:10:21 +0000 Subject: [PATCH 060/202] handled electric community heating --- asset_list/utils.py | 128 ++++++++++++++++ backend/Property.py | 5 +- backend/tests/test_integration.py | 2 +- .../Nov 2025 Consulting Project/data_prep.py | 145 ++++++++++++++++++ etl/find_my_epc/RetrieveFindMyEpc.py | 6 +- 5 files changed, 282 insertions(+), 4 deletions(-) create mode 100644 etl/customers/peabody/Nov 2025 Consulting Project/data_prep.py diff --git a/asset_list/utils.py b/asset_list/utils.py index fe2b7d14..c7d0cc0a 100644 --- a/asset_list/utils.py +++ b/asset_list/utils.py @@ -1,6 +1,8 @@ import time import random import pandas as pd + +from adhoc.investigation import newest_epc from backend.SearchEpc import SearchEpc from etl.find_my_epc.RetrieveFindMyEpc import RetrieveFindMyEpc from tqdm import tqdm @@ -9,6 +11,132 @@ from utils.logger import setup_logger logger = setup_logger() +def get_data_for_property( + address1: str, + postcode: str, + full_address: str, + property_type: [str | None], + built_form: [str | None], + uprn: [str | float | None], + epc_auth_token: str, + find_my_epc_return_page: bool +): + """ + Utility function that will fetch the data for a single property + :return: + """ + + if property_type == "block of flats": + return None + + house_number = str(address1).strip() + full_address = full_address.strip() + house_no = SearchEpc.get_house_number(address=str(house_number), postcode=postcode) + if house_no is None: + house_no = house_number + + if pd.isnull(uprn): + uprn = None + + searcher = SearchEpc( + address1=str(house_no), + postcode=postcode, + auth_token=epc_auth_token, + os_api_key="", + property_type=None, + fast=True, + full_address=full_address, + max_retries=5, + uprn=uprn + ) + # Force the skipping of estimating the EPC + # We check if the property was split + + searcher.ordnance_survey_client.property_type = property_type + searcher.ordnance_survey_client.built_form = built_form + searcher.find_property(skip_os=True) + + # Check if we have a flat or appartment + if searcher.newest_epc is None and uprn is None: + # Try again: + if SearchEpc.get_house_number(address=str(house_number), postcode=postcode) is None: + # Backup + add1 = full_address.split(",") + if len(add1) > 1: + add1 = add1[1].strip() + else: + # Try splitting on space + add1 = full_address.split(" ")[0].strip() + else: + add1 = str(house_number) + searcher = SearchEpc( + address1=add1, + postcode=postcode, + auth_token=epc_auth_token, + os_api_key="", + property_type=None, + fast=True, + full_address=full_address, + max_retries=5 + ) + + if ( + "flat" in house_number.lower() or "apartment" in house_number.lower() or "apt" in + house_number.lower() + ): + searcher.ordnance_survey_client.property_type = "Flat" + + searcher.find_property(skip_os=True) + + # As a final resort, we estimate the EPC + if property_type is not None and searcher.newest_epc is None: + searcher.ordnance_survey_client.property_type = property_type + searcher.ordnance_survey_client.built_form = built_form + searcher.find_property(skip_os=True) + + if searcher.newest_epc is None: + return None + + # Retrieve data from FindMyEPC + try: + find_epc_searcher = RetrieveFindMyEpc( + address=searcher.newest_epc["address"], + postcode=searcher.newest_epc["postcode"] + ) + find_epc_response = find_epc_searcher.retrieve_newest_find_my_epc_data( + return_page=find_my_epc_return_page + ) + + except ValueError as e: + if "No EPC found" in str(e) and "address1" in searcher.newest_epc: + try: + find_epc_searcher = RetrieveFindMyEpc( + address=searcher.newest_epc["address1"], postcode=searcher.newest_epc["postcode"] + ) + find_epc_response = find_epc_searcher.retrieve_newest_find_my_epc_data() + except ValueError as e: + if "No EPC found" in str(e): + find_epc_response = ({}, None) if find_my_epc_return_page else ({}) + else: + logger.error(f"Error retrieving FindMyEPC data: {e}") + raise Exception(f"Error retrieving FindMyEPC data: {e}") + else: + find_epc_response = ({}, None) if find_my_epc_return_page else ({}) + except Exception as e: + raise Exception(f"Error retrieving FindMyEPC data: {e}") + + newest_epc = searcher.newest_epc + older_epcs = searcher.older_epcs + + find_my_epc_page = None + if find_my_epc_return_page: + find_my_epc_data, find_my_epc_page = find_epc_response + else: + find_my_epc_data = find_epc_response + + return newest_epc, older_epcs, find_my_epc_data, find_my_epc_page + + def get_data( df, manual_uprn_map, diff --git a/backend/Property.py b/backend/Property.py index 609a9d75..e5639aa2 100644 --- a/backend/Property.py +++ b/backend/Property.py @@ -1221,11 +1221,12 @@ class Property: None: "Natural Gas (Community Scheme)", "mains gas": "Natural Gas (Community Scheme)", "biomass": "Smokeless Fuel", + "electricity": "Electricity" } if self.main_fuel["fuel_type"] in fuel_map: # We assume when None as it's unknown self.heating_energy_source = fuel_map[self.main_fuel["fuel_type"]] else: - raise Exception("Implement me") + raise NotImplementedError(f"Unhandled fuel {self.main_fuel['fuel_type']}") if self.hotwater["heater_type"] is not None: self.hot_water_energy_source = heater_type_to_fuel[self.hotwater["heater_type"]] @@ -1247,7 +1248,7 @@ class Property: secondary_heating = self.data["secondheat-description"] self.hot_water_energy_source = assumptions.DESCRIPTIONS_TO_FUEL_TYPES[secondary_heating]["fuel"] else: - raise Exception("Investiage me") + raise NotImplementedError(f"Investiage me - unhandled hot water fuel {fuel}") else: self.hot_water_energy_source = hotwater_appliance_to_fuel[self.hotwater["appliance"]] diff --git a/backend/tests/test_integration.py b/backend/tests/test_integration.py index 1ba80223..eadd0788 100644 --- a/backend/tests/test_integration.py +++ b/backend/tests/test_integration.py @@ -91,7 +91,7 @@ costs_by_floor_area = costs_by_floor_area.groupby("current-energy-efficiency")[ ].mean().reset_index() sample_epc_data = epc_data[pd.to_datetime(epc_data["LODGEMENT_DATE"]) >= "2015-01-01"].drop_duplicates("UPRN").sample( - 10000).reset_index(drop=True) + 20000).reset_index(drop=True) # TODO: In Property find_energy_sources, sort out biomass community heating - what fuel type # TODO: We might be able to remove find_energy_sources entirely and remove estimate_electrical_consumption. It's used diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/data_prep.py b/etl/customers/peabody/Nov 2025 Consulting Project/data_prep.py new file mode 100644 index 00000000..c68a0b58 --- /dev/null +++ b/etl/customers/peabody/Nov 2025 Consulting Project/data_prep.py @@ -0,0 +1,145 @@ +""" +This scipt prepares the raw data that was sent over by Peabody for production of +a standardised asset list + +They have sent over just short of 100,000 properties and so, to make this easier, we will do the following +1) Break the data up into subsets of 25,000 +2) Combine the data provided into a single list +""" +import json +import time +import os +import pandas as pd +from tqdm import tqdm +from dotenv import load_dotenv +from asset_list.utils import get_data_for_property +from utils.logger import setup_logger + +logger = setup_logger() + +load_dotenv(dotenv_path="backend/.env") +EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN") + +property_list = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody " + "- Data Extracts for Domna.xlsx", + sheet_name="Properties" +) +sustainability_data = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody " + "- Data Extracts for Domna.xlsx", + sheet_name="Sustainability" +) + +# Basic overview: +# 1) We have 10,634 postcodes. If we needed to make requests to the ordnance survey API for +# all of these postcodes, it would cost at least £106, not accounting for double requests for postcodes +# where we have more than 100 properties (WE DONT!) +# 2) This is on average 9.36 properties per postcode +# 3) The UPRN in the property_list matches to the Org Ref in the sustainability data. These +# is an additional UPRN column in sustainability data which appears to be the ordnance survey UPRN +# 4) There appears to be some anomalous records, e.g. a flat with 543 m2 floor area and another flat +# with 6m2 floor area +# 5) Based on the residential indicator, all properties appear to be resi +# 6) We should do some quick calcs on how much it might cost to fetch all of the solar API data +# 7) We have 8785 missing UPRNS, which we should potentially try and fill +# 8) In the backend, we should probably start storing the raw EPC input data to allow for much quicker +# re-runs. All we really need to do is store the find my EPC data, perhaps against UPRN and RRN, as well +# as the raw EPC data, against uprn. This will be useful for scenario re-builds and will be much much +# quicker, as a starting point. Do we store in the database vs s3? TBC + +n_postcodes = property_list["Post Code"].nunique() +postcode_summary = property_list.groupby("Post Code")["UPRN"].count().reset_index() +postcode_summary["UPRN"].mean() + +test_match = property_list.merge(sustainability_data, left_on="UPRN", right_on="Org Ref") + + +def classify_floor_area(x): + if x <= 72: + return "0-72" + if x <= 97: + return "73-97" + if x <= 199: + return "98-199" + return "200+" + + +sustainability_data["Postal Region"] = sustainability_data["Postcode"].str.split(" ").str[0] +sustainability_data["Floor Area Band"] = sustainability_data["Total Floor Area (m2)"].apply( + lambda x: classify_floor_area(x) +) + +archetypes = sustainability_data[ + ["Type", "Attachment", "Construction Years", "Wall Construction", "Wall Insulation", + "Roof Construction", "Roof Insulation", "Floor Construction", "Floor Insulation", + "Glazing", "Heating", "Boiler Efficiency", "Main Fuel", "Controls Adequacy", + "Floor Area Band"] +].drop_duplicates() + +# Maps the property types to the format recognised by the EPC api +property_type_map = {} +# Maps the build form to the format recognised by the OS api +built_form_map = {} + +# Proposed data fetching +# 1) grab propeties with UPRN and fetch the assocated EPC data & find my EPC data +# Some thoughts: +# S3 is quite cheap to query however we may incur some cost if we're making hundreds of thousands of calls +# to S3 to fetch data out of it. It's cheap to fetch data, if we aren't taking data out of S3, but we +# should consider this. This may influence whether or not we want to store each record individually +# against UPRN, or store against the 10,641 postcodes. We can fetch the data and store in a single +# large dump and then determine later if we want to split it up + +# TODO: Handle properties without uprn +# TODO: I think we can json dump all of this, but check if we can load and re-use the page source +# TODO: Create batches? + +batch_size = 500 +batch_indexes = list(range(0, len(sustainability_data), batch_size)) + +# TODO: SET +working_directory = "" +download_contents = os.listdir(working_directory) + +for i in range(0, len(sustainability_data.standardised_asset_list), batch_size): + + batch_name = f"batch_{i}_to_{i + batch_size}" + # TODO: Check this + if batch_name in download_contents: + # Means we already have the data downloaded + continue + + batch_data = {} + for _, property_data in tqdm(sustainability_data.iterrows(), total=len(sustainability_data)): + os_uprn = property_data["UPRN"] + address1 = property_data["Address 1"] + postcode = property_data["Postcode"] + full_address_components = [ + x for x in [property_data["Address 1"], property_data["Address 2"], property_data["Address 3"]] + if not pd.isnull(x) + ] + full_address = ", ".join(full_address_components) + + fetched_data = get_data_for_property( + address1=address1, + postcode=postcode, + full_address=full_address, + property_type=property_type_map[property_data["Type"]], + built_form=built_form_map[property_data["Attachment"]], + uprn=property_data["UPRN"], + epc_auth_token=EPC_AUTH_TOKEN, + find_my_epc_return_page=True + ) + + batch_data[property_data["Org Ref"]] = fetched_data + + # TODO: We likely want to do something like this: to slow down + # TODO: We also perhaps store the data in batches + if len(batch_data) % 50 == 0 and len(batch_data) > 0: + logger.info("Sleeping for 10 seconds to avoid hitting API rate limit") + time.sleep(10) + + # Store the batch data in the wd + with open(os.path.join(working_directory, batch_name), "wb") as f: + json.dump(batch_data, f) diff --git a/etl/find_my_epc/RetrieveFindMyEpc.py b/etl/find_my_epc/RetrieveFindMyEpc.py index b8c24cb8..c9cca011 100644 --- a/etl/find_my_epc/RetrieveFindMyEpc.py +++ b/etl/find_my_epc/RetrieveFindMyEpc.py @@ -371,7 +371,7 @@ class RetrieveFindMyEpc: return all_find_my_epc_data - def retrieve_newest_find_my_epc_data(self, sap_2012_date=None): + def retrieve_newest_find_my_epc_data(self, sap_2012_date=None, return_page=False): """ For a post code and address, we pull out all the required data from the find my epc website """ @@ -577,6 +577,10 @@ class RetrieveFindMyEpc: **low_carbon_energy_sources, } + if return_page: + # We return the page text as well, which can be parsed again later + return resulting_data, postcode_response.text + return resulting_data def format_recommendations(self, recommendations, assessment_data, sap_2012_date=None): From 005c6b844a32af75674cbe463fab8668cd2fd63d Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 14 Nov 2025 22:20:03 +0000 Subject: [PATCH 061/202] refactored the handling of dual heating recommendations and fixing coverage of heating types in property class --- backend/Property.py | 3 +- backend/tests/test_integration.py | 66 ++++++------- recommendations/HeatingRecommender.py | 131 +++++++++++++++++++++----- 3 files changed, 140 insertions(+), 60 deletions(-) diff --git a/backend/Property.py b/backend/Property.py index e5639aa2..d0d85565 100644 --- a/backend/Property.py +++ b/backend/Property.py @@ -1221,7 +1221,8 @@ class Property: None: "Natural Gas (Community Scheme)", "mains gas": "Natural Gas (Community Scheme)", "biomass": "Smokeless Fuel", - "electricity": "Electricity" + "electricity": "Electricity", + "biogas": "Smokeless Fuel", } if self.main_fuel["fuel_type"] in fuel_map: # We assume when None as it's unknown self.heating_energy_source = fuel_map[self.main_fuel["fuel_type"]] diff --git a/backend/tests/test_integration.py b/backend/tests/test_integration.py index eadd0788..e8dda31d 100644 --- a/backend/tests/test_integration.py +++ b/backend/tests/test_integration.py @@ -1,36 +1,36 @@ -import ast -import json +# import ast +# import json from copy import deepcopy -from dataclasses import replace -from datetime import datetime +# from dataclasses import replace +# from datetime import datetime import random from tqdm import tqdm -import pandas as pd +# import pandas as pd import numpy as np from etl.epc.Record import EPCRecord -from backend.SearchEpc import SearchEpc -from sqlalchemy.exc import IntegrityError, OperationalError -from sqlalchemy.orm import sessionmaker -from starlette.responses import Response +# from backend.SearchEpc import SearchEpc +# from sqlalchemy.exc import IntegrityError, OperationalError +# from sqlalchemy.orm import sessionmaker +# from starlette.responses import Response -from backend.app.config import get_settings, get_prediction_buckets -from backend.app.db.connection import db_engine -from backend.app.db.functions.materials_functions import get_materials -from backend.app.db.functions.portfolio_functions import aggregate_portfolio_recommendations -from backend.app.db.functions.property_functions import ( - create_property, create_property_details_epc, create_property_targets, update_property_data, - update_or_create_property_spatial_details -) -from backend.app.db.functions.recommendations_functions import ( - create_plan, upload_recommendations, create_scenario -) -from backend.app.db.functions.funding_functions import upload_funding -from backend.app.db.functions.energy_assessment_functions import get_latest_assessment_by_uprn -from backend.app.db.models.portfolio import rating_lookup +# from backend.app.config import get_settings, get_prediction_buckets +# from backend.app.db.connection import db_engine +# from backend.app.db.functions.materials_functions import get_materials +# from backend.app.db.functions.portfolio_functions import aggregate_portfolio_recommendations +# from backend.app.db.functions.property_functions import ( +# create_property, create_property_details_epc, create_property_targets, update_property_data, +# update_or_create_property_spatial_details +# ) +# from backend.app.db.functions.recommendations_functions import ( +# create_plan, upload_recommendations, create_scenario +# ) +# from backend.app.db.functions.funding_functions import upload_funding +# from backend.app.db.functions.energy_assessment_functions import get_latest_assessment_by_uprn +# from backend.app.db.models.portfolio import rating_lookup from backend.app.plan.schemas import PlanTriggerRequest, WALL_INSULATION_MEASURES, ROOF_INSULATION_MEASURES -from backend.app.plan.utils import get_cleaned -from backend.app.utils import sap_to_epc +# from backend.app.plan.utils import get_cleaned +# from backend.app.utils import sap_to_epc import backend.app.assumptions as assumptions from backend.ml_models.api import ModelApi @@ -41,13 +41,13 @@ from recommendations.optimiser.CostOptimiser import CostOptimiser from recommendations.optimiser.GainOptimiser import GainOptimiser import recommendations.optimiser.optimiser_functions as optimiser_functions from recommendations.Recommendations import Recommendations -from utils.logger import setup_logger -from utils.s3 import read_dataframe_from_s3_parquet, read_csv_from_s3, read_excel_from_s3 -from backend.ml_models.Valuation import PropertyValuation - -from etl.bill_savings.KwhData import KwhData -from etl.spatial.OpenUprnClient import OpenUprnClient -from etl.find_my_epc.RetrieveFindMyEpc import RetrieveFindMyEpc +# from utils.logger import setup_logger +# from utils.s3 import read_dataframe_from_s3_parquet, read_csv_from_s3, read_excel_from_s3 +# from backend.ml_models.Valuation import PropertyValuation +# +# from etl.bill_savings.KwhData import KwhData +# from etl.spatial.OpenUprnClient import OpenUprnClient +# from etl.find_my_epc.RetrieveFindMyEpc import RetrieveFindMyEpc from backend.Funding import Funding from recommendations.optimiser.funding_optimiser import optimise_with_funding_paths @@ -91,7 +91,7 @@ costs_by_floor_area = costs_by_floor_area.groupby("current-energy-efficiency")[ ].mean().reset_index() sample_epc_data = epc_data[pd.to_datetime(epc_data["LODGEMENT_DATE"]) >= "2015-01-01"].drop_duplicates("UPRN").sample( - 20000).reset_index(drop=True) + 3000).reset_index(drop=True) # TODO: In Property find_energy_sources, sort out biomass community heating - what fuel type # TODO: We might be able to remove find_energy_sources entirely and remove estimate_electrical_consumption. It's used diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py index d84a47b5..87311306 100644 --- a/recommendations/HeatingRecommender.py +++ b/recommendations/HeatingRecommender.py @@ -10,6 +10,9 @@ from etl.epc_clean.epc_attributes.MainheatAttributes import MainHeatAttributes from etl.epc_clean.epc_attributes.HotWaterAttributes import HotWaterAttributes from etl.epc_clean.epc_attributes.MainFuelAttributes import MainFuelAttributes from recommendations.HeatingControlRecommender import HeatingControlRecommender +from utils.logger import setup_logger + +logger = setup_logger() class HeatingRecommender: @@ -44,6 +47,22 @@ class HeatingRecommender: ] } }, + "Boiler and radiators, mains gas, electric underfloor heating": { + "boiler": { + "mainheating_description": "Boiler and radiators, mains gas, electric underfloor heating", + "recommendation_description": "Upgrade the existing boiler to a new, more efficient condensing " + "boiler. ", + "controls_suffix": "Manual charge controls" + }, + # These are the heating types we need to produce a dual heating recommendation + "dual": { + "recommendation_description": "Upgrade the existing boiler to a new condensing boiler", + "types": [ + # type 1 + "boiler_upgrade", + ] + } + }, "Portable electric heaters assumed for most rooms, room heaters, electric": { "hhr": { "mainheating_description": "Electric storage heaters, radiators", @@ -127,7 +146,7 @@ class HeatingRecommender: n_trues += 1 if n_trues > 2 or n_trues == 0: - raise Exception("Implement me") + raise NotImplementedError("Implement me, zero or more than two heating systemss") if n_trues == 1: return False @@ -917,9 +936,11 @@ class HeatingRecommender: if self.property.main_heating_controls["clean_description"] != self.high_heat_retention_contols_desc: if self.dual_heating: - controls_prefix = self.DUAL_HEATING_DESCRIPTIONS[ - self.property.main_heating["clean_description"] - ]["hhr"]["controls_prefix"] + controls_prefix = self._map_dual_heating_description( + backup_map_to_description="current_controls", + output_type="controls_prefix", + recommendation_type="hhr" + ) if controls_prefix == "current_controls": description_prefix = self.property.main_heating_controls["clean_description"] @@ -951,9 +972,11 @@ class HeatingRecommender: # We check if the property has dual heating in place with a boiler and storage heaters if self.dual_heating: - new_heating_description = self.DUAL_HEATING_DESCRIPTIONS[ - self.property.main_heating["clean_description"] - ]["hhr"]["mainheating_description"] + new_heating_description = self._map_dual_heating_description( + backup_map_to_description="Electric storage heaters", + output_type="mainheating_description", + recommendation_type="hhr" + ) new_hot_water_description = self.property.hotwater["clean_description"] # We keep the hot water system else: new_heating_description = "Electric storage heaters" @@ -1010,10 +1033,12 @@ class HeatingRecommender: product=hhrsh_product ) if self.dual_heating: - description = self.DUAL_HEATING_DESCRIPTIONS[ - self.property.main_heating["clean_description"] - ]["hhr"]["recommendation_description"] - + description = self._map_dual_heating_description( + backup_map_to_description="Install high heat retention electric storage heaters with an appropriate " + "off-peak tariff.", + output_type="recommendation_description", + recommendation_type="hhr" + ) else: description = "Install high heat retention electric storage heaters with an appropriate off-peak tariff." @@ -1102,6 +1127,60 @@ class HeatingRecommender: return max(num_heated_rooms * 1.5, 6) + def _map_dual_heating_description( + self, backup_map_to_description, output_type, recommendation_type + ): + """ + Utility function to handle dual heating systems + :param backup_map_to_description: + :return: + """ + + if backup_map_to_description not in [ + # Recommendation descriptions - these are the textual descriptions shown in the front end + "Upgrade to a new condensing boiler.", + "Install high heat retention electric storage heaters with an appropriate off-peak tariff.", + # Simulation descriptions - this is the new EPC description we simulate with in the case + # of single heating + "Boiler and radiators, mains gas", + "Electric storage heaters", + # Suffixes allowed + "", + # Controls prefixes + "current_controls" + ]: + raise ValueError(f"Invalid backup_map_to_description, given {backup_map_to_description}") + + if output_type not in [ + "recommendation_description", + "mainheating_description", + "controls_suffix", + "controls_prefix", + ]: + raise ValueError(f"Invalid output_type, given {output_type}") + + if recommendation_type not in [ + "boiler", + ]: + raise ValueError(f"Given invalid recommendation type {recommendation_type}") + + # "Upgrade to a new condensing boiler." + if self.dual_heating: + + # We check if we have a mapped description + if self.property.main_heating["clean_description"] not in self.DUAL_HEATING_DESCRIPTIONS: + logger.warning( + f"We have a dual heating system that hasn't been mapped, defaulting to single " + f"{self.property.main_heating['clean_description']}" + ) + return backup_map_to_description + + return self.DUAL_HEATING_DESCRIPTIONS[ + self.property.main_heating["clean_description"] + ][recommendation_type][output_type] + + return backup_map_to_description + def recommend_boiler_upgrades(self, phase, system_change, exising_room_heaters): """ This boiler recommendation will only recommend a like-for-like upgrade, since changing the system @@ -1137,12 +1216,11 @@ class HeatingRecommender: if has_inefficient_space_heating or has_inefficient_water: - if self.dual_heating: - description = self.DUAL_HEATING_DESCRIPTIONS[ - self.property.main_heating["clean_description"] - ]["boiler"]["recommendation_description"] - else: - description = "Upgrade to a new condensing boiler." + description = self._map_dual_heating_description( + backup_map_to_description="Upgrade to a new condensing boiler.", + output_type="recommendation_description", + recommendation_type="boiler" + ) new_heating_eff = ( "Good" if self.property.data["mainheat-energy-eff"] in ["Very Poor", "Poor", "Average"] @@ -1167,13 +1245,12 @@ class HeatingRecommender: if system_change: # Installation of a boiler improves the hot water system so we need to reflect this in # the outcome of the recommendation - if self.dual_heating: - new_heating_description = self.DUAL_HEATING_DESCRIPTIONS[ - self.property.main_heating["clean_description"] - ]["boiler"]["mainheating_description"] - else: - new_heating_description = "Boiler and radiators, mains gas" + new_heating_description = self._map_dual_heating_description( + backup_map_to_description="Boiler and radiators, mains gas", + output_type="mainheating_description", + recommendation_type="boiler" + ) new_hotwater_description = "From main system" new_fuel_description = "mains gas (not community)" @@ -1239,9 +1316,11 @@ class HeatingRecommender: # If the property did not previously have a boiler, we combine controls_recommender = HeatingControlRecommender(self.property) if self.dual_heating: - description_suffix = self.DUAL_HEATING_DESCRIPTIONS[ - self.property.main_heating["clean_description"] - ]["boiler"]["controls_suffix"] + description_suffix = self._map_dual_heating_description( + backup_map_to_description="", + output_type="controls_suffix", + recommendation_type="boiler" + ) else: description_suffix = "" controls_recommender.recommend( From 9f457d24d26ed7ac53a03734c5764370daf90a9b Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 14 Nov 2025 22:44:14 +0000 Subject: [PATCH 062/202] handling edge case of community hot water, non-community heating --- backend/app/assumptions.py | 1 + backend/ml_models/AnnualBillSavings.py | 3 ++- recommendations/HeatingRecommender.py | 1 + recommendations/Recommendations.py | 15 ++++++++++++--- 4 files changed, 16 insertions(+), 4 deletions(-) diff --git a/backend/app/assumptions.py b/backend/app/assumptions.py index 66660e06..32d63a95 100644 --- a/backend/app/assumptions.py +++ b/backend/app/assumptions.py @@ -80,6 +80,7 @@ DESCRIPTIONS_TO_FUEL_TYPES = { }, "Electric heat pump for water heating only": {"fuel": "Electricity", "cop": 1}, "Ground source heat pump, warm air, electric": {"fuel": "Electricity", "cop": AVERAGE_ASHP_EFFICIENCY / 100}, + "Room heaters, mains gas, Electric storage heaters": {"fuel": "Natural Gas", "cop": 0.85} } # These are the measure types where if there is a ventilation recommendation, we force the inclusion of it diff --git a/backend/ml_models/AnnualBillSavings.py b/backend/ml_models/AnnualBillSavings.py index 4291b1d1..243cae52 100644 --- a/backend/ml_models/AnnualBillSavings.py +++ b/backend/ml_models/AnnualBillSavings.py @@ -263,7 +263,8 @@ class AnnualBillSavings: if fuel == "Electricity": return (kwh / cop) * cls.ELECTRICITY_PRICE_CAP - if fuel in ["Natural Gas", "Natural Gas (Community Scheme)"]: + # We handle "Unmapped" in a similar fashion to gas + if fuel in ["Natural Gas", "Natural Gas (Community Scheme)", "Unmapped"]: return (kwh / cop) * cls.GAS_PRICE_CAP if fuel == "LPG": diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py index 87311306..fdd4376d 100644 --- a/recommendations/HeatingRecommender.py +++ b/recommendations/HeatingRecommender.py @@ -1161,6 +1161,7 @@ class HeatingRecommender: if recommendation_type not in [ "boiler", + "hhr", ]: raise ValueError(f"Given invalid recommendation type {recommendation_type}") diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py index f2dc5804..cc5b7895 100644 --- a/recommendations/Recommendations.py +++ b/recommendations/Recommendations.py @@ -681,7 +681,9 @@ class Recommendations: ): # Handle the case of community schemes - if (heating_description == "Community scheme") or (hotwater_description == "Community scheme"): + if (heating_description == "Community scheme") or (hotwater_description == "Community scheme") and ( + "not community" not in main_fuel_description + ): if main_fuel_description in ["mains gas (community)", "UNKNOWN"]: return { "heating_fuel_type": "Natural Gas (Community Scheme)", @@ -702,7 +704,7 @@ class Recommendations: if hotwater_description in [ "From main system", "From main system, no cylinder thermostat", - 'From main system, waste water heat recovery' + 'From main system, waste water heat recovery', ]: return { "heating_fuel_type": heating_fuel, "hotwater_fuel_type": heating_fuel, @@ -718,7 +720,14 @@ class Recommendations: "heating_cop": mapped["cop"], "hotwater_cop": 1 } - mapped_hotwater = descriptions_to_fuel_types[hotwater_description] + mapped_hotwater = descriptions_to_fuel_types.get(hotwater_description) + if mapped_hotwater is None: + # TODO: This is a non-ideal placeholder but we put something in place for a process that falls over + # fairly regularly. A task has been added to planner to refactor this + # We have observed an edge case where the fuel is described as not being community + # but the hot water is. We handle as such + logger.warning("Hot water description not mapped: %s", heating_description) + mapped_hotwater = {"fuel": 'Unmapped', "cop": 0.9} return { "heating_fuel_type": heating_fuel, "hotwater_fuel_type": mapped_hotwater["fuel"], From 070d7d332c85e21fdf3588c245935f8ac52d02b3 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 14 Nov 2025 22:51:45 +0000 Subject: [PATCH 063/202] added backup handling for another fuel edge case --- recommendations/Recommendations.py | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py index cc5b7895..bcb697fc 100644 --- a/recommendations/Recommendations.py +++ b/recommendations/Recommendations.py @@ -691,7 +691,25 @@ class Recommendations: "heating_cop": 1, "hotwater_cop": 1 } - raise NotImplementedError("Handle this case") + if main_fuel_description in ['biogas (community)']: + return { + "heating_fuel_type": "Smokeless Fuel", + "hotwater_fuel_type": "Smokeless Fuel", + "heating_cop": 0.85, + "hotwater_cop": 0.85 + } + logger.warning( + "Unhandled community fuel." + f"Fuel: {main_fuel_description}" + f"Heating: {heating_description}" + f"Heating: {hotwater_description}" + ) + return { + "heating_fuel_type": "Unmapped", + "hotwater_fuel_type": "Unmapped", + "heating_cop": 0.9, + "hotwater_cop": 0.9 + } mapped = descriptions_to_fuel_types.get(heating_description, None) if mapped is None: From 0170272abd32b05da47bfe77bbce7fd2024439e2 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 14 Nov 2025 22:58:16 +0000 Subject: [PATCH 064/202] added eco packages to integration test --- backend/app/assumptions.py | 3 +- backend/tests/test_integration.py | 293 +++++++++++++++++++++++++++--- 2 files changed, 268 insertions(+), 28 deletions(-) diff --git a/backend/app/assumptions.py b/backend/app/assumptions.py index 32d63a95..0172466e 100644 --- a/backend/app/assumptions.py +++ b/backend/app/assumptions.py @@ -80,7 +80,8 @@ DESCRIPTIONS_TO_FUEL_TYPES = { }, "Electric heat pump for water heating only": {"fuel": "Electricity", "cop": 1}, "Ground source heat pump, warm air, electric": {"fuel": "Electricity", "cop": AVERAGE_ASHP_EFFICIENCY / 100}, - "Room heaters, mains gas, Electric storage heaters": {"fuel": "Natural Gas", "cop": 0.85} + "Room heaters, mains gas, Electric storage heaters": {"fuel": "Natural Gas", "cop": 0.85}, + "Water source heat pump, radiators, electric": {"fuel": "Electricity", "cop": AVERAGE_ASHP_EFFICIENCY / 100}, } # These are the measure types where if there is a ventilation recommendation, we force the inclusion of it diff --git a/backend/tests/test_integration.py b/backend/tests/test_integration.py index e8dda31d..f0c53f16 100644 --- a/backend/tests/test_integration.py +++ b/backend/tests/test_integration.py @@ -302,6 +302,11 @@ body = PlanTriggerRequest( 'sheet_name': None, 'sheet_count': None, 'index_start': None, 'index_end': None} ) +eco_packages = {} +# For testing +for p in input_properties: + eco_packages[p.id] = (None, None, None) + for p in tqdm(input_properties): if not recommendations.get(p.id): continue @@ -327,16 +332,16 @@ for p in tqdm(input_properties): fixed_gain = optimiser_functions.calculate_fixed_gain( property_required_measures, recommendations, p, needs_ventilation ) - gain = optimiser_functions.calculate_gain(body=body, p=p, fixed_gain=fixed_gain) + gain = optimiser_functions.calculate_gain(body=body, p=p, fixed_gain=fixed_gain, eco_packages=eco_packages) funding = Funding( - tenure="Social", + tenure=body.housing_type, project_scores_matrix=project_scores_matrix, partial_project_scores_matrix=partial_project_scores_matrix, whlg_eligible_postcodes=whlg_eligible_postcodes, - eco4_social_cavity_abs_rate=12.5, + eco4_social_cavity_abs_rate=13, eco4_social_solid_abs_rate=17, - eco4_private_cavity_abs_rate=12.5, + eco4_private_cavity_abs_rate=13, eco4_private_solid_abs_rate=17, gbis_social_cavity_abs_rate=21, gbis_social_solid_abs_rate=25, @@ -380,7 +385,7 @@ for p in tqdm(input_properties): r["uplift_project_score"] ) = funding.get_innovation_uplift( measure=r, - starting_sap=p.data["current-energy-efficiency"], + starting_sap=int(p.data["current-energy-efficiency"]), floor_area=p.floor_area, is_cavity=p.walls["is_cavity_wall"], current_wall_uvalue=current_wall_u_value, @@ -391,8 +396,16 @@ for p in tqdm(input_properties): mainheat_energy_eff=p.data["mainheat-energy-eff"], ) + if r["already_installed"]: + # if already installed, we zero out the uplift and funding + (r["partial_project_score"], r["partial_project_funding"], r["innovation_uplift"], + r["uplift_project_score"]) = ( + 0, 0, 0, 0 + ) + input_measures = optimiser_functions.prepare_input_measures( - measures_to_optimise_with_uplift, body.goal, needs_ventilation, funding=True + measures_to_optimise_with_uplift, body.goal, needs_ventilation, funding=True, + property_eco_packages=eco_packages.get(p.id) ) # When the goal is Increasing EPC, we can run the funding optimiser @@ -404,20 +417,14 @@ for p in tqdm(input_properties): housing_type=body.housing_type, budget=body.budget, target_gain=gain, - funding=funding + funding=funding, + work_package=eco_packages[p.id][2] ) - # Given the solutions we select the optimal one - solutions["cost_less_full_project_funding"] = np.where( - solutions["scheme"] == "eco4", - solutions["total_cost"] - solutions["full_project_funding"] - solutions["total_uplift"], - solutions["total_cost"] - solutions["partial_project_funding"] - solutions["total_uplift"] - ) - - solutions["cost_less_full_project_funding"] = ( - solutions["total_cost"] - solutions["full_project_funding"] - solutions["total_uplift"] - ) - solutions = solutions.sort_values("cost_less_full_project_funding", ascending=True) + # If the solution isn't eligible, we can't really consider it + solutions = solutions[ + (solutions["is_eligible"] & (solutions["scheme"] != "none")) | (solutions["scheme"] == "none") + ] if solutions["meets_upgrade_target"].any(): # If we have a solution that meets the upgrade target, we select that one @@ -428,9 +435,13 @@ for p in tqdm(input_properties): # This is the list of measures that we will recommend scheme = optimal_solution["scheme"] - funded_measures = optimal_solution["items"] if scheme != "none" else [] - solution = optimal_solution["items"] + optimal_solution["unfunded_items"] - # This is the total amount of funding that the project will produce (including uplifts) (£) + + # We create this full list of selected measures, which is used in the next section for setting + # default measures + solution = deepcopy(optimal_solution["items"]) + deepcopy(optimal_solution["unfunded_items"]) + funded_measures = deepcopy(optimal_solution["items"]) if scheme != "none" else [] + + # This is the total amount of funding that the project will produce (EXCLUDING uplifts) (£) project_funding = optimal_solution["full_project_funding"] if scheme == "eco4" else \ optimal_solution["partial_project_funding"] # This is the total amount of funding associated to the uplift (£) @@ -470,8 +481,8 @@ for p in tqdm(input_properties): funding.check_funding( measures=solution, - starting_sap=p.data["current-energy-efficiency"], - ending_sap=p.data["current-energy-efficiency"] + sum([x["gain"] for x in solution]), + starting_sap=int(p.data["current-energy-efficiency"]), + ending_sap=int(p.data["current-energy-efficiency"]) + sum([x["gain"] for x in solution]), floor_area=p.floor_area, mainheat_description=p.main_heating["clean_description"], heating_control_description=p.main_heating_controls["clean_description"], @@ -510,10 +521,10 @@ for p in tqdm(input_properties): # Add best practice measures (ventilation/trickle vents) selected = optimiser_functions.add_best_practice_measures(p.id, solution, recommendations, selected) - # Final flattening - Don't do this! - # recommendations[p.id] = optimiser_functions.flatten_recommendations_with_defaults( - # p.id, recommendations, selected - # ) + # Final flattening + recommendations[p.id] = optimiser_functions.flatten_recommendations_with_defaults( + p.id, recommendations, selected + ) # TODO: functionise for measure in funded_measures: @@ -529,3 +540,231 @@ for p in tqdm(input_properties): partial_project_score=partial_project_score, uplift_project_score=uplift_project_score ) + +# for p in tqdm(input_properties): +# if not recommendations.get(p.id): +# continue +# +# # we need to double unlist because we have a list of lists +# property_measure_types = {rec["type"] for recs in recommendations[p.id] for rec in recs} +# property_required_measures = [m for m in recommendations[p.id] if m[0]["type"] in body.required_measures] +# measures_to_optimise = [m for m in recommendations[p.id] if m[0]["type"] not in body.required_measures] +# +# # If a measure requiring ventilation is selected, and the property does not have ventilation, we enfore +# # its inclusion +# needs_ventilation = any( +# x in property_measure_types for x in assumptions.measures_needing_ventilation +# ) and not p.has_ventilation +# +# if not measures_to_optimise: +# # Nothing to do, we just reshape the recommendations +# recommendations[p.id] = optimiser_functions.flatten_recommendations_with_defaults( +# p.id, recommendations, set() +# ) +# continue +# +# fixed_gain = optimiser_functions.calculate_fixed_gain( +# property_required_measures, recommendations, p, needs_ventilation +# ) +# gain = optimiser_functions.calculate_gain(body=body, p=p, fixed_gain=fixed_gain) +# +# funding = Funding( +# tenure="Social", +# project_scores_matrix=project_scores_matrix, +# partial_project_scores_matrix=partial_project_scores_matrix, +# whlg_eligible_postcodes=whlg_eligible_postcodes, +# eco4_social_cavity_abs_rate=12.5, +# eco4_social_solid_abs_rate=17, +# eco4_private_cavity_abs_rate=12.5, +# eco4_private_solid_abs_rate=17, +# gbis_social_cavity_abs_rate=21, +# gbis_social_solid_abs_rate=25, +# gbis_private_cavity_abs_rate=21, +# gbis_private_solid_abs_rate=28, +# ) +# +# li_thickness = convert_thickness_to_numeric( +# p.roof["insulation_thickness"], p.roof["is_pitched"], p.roof["is_flat"] +# ) +# current_wall_u_value = p.walls["thermal_transmittance"] +# if current_wall_u_value is None: +# current_wall_u_value = get_wall_u_value( +# clean_description=p.walls["clean_description"], +# age_band=p.age_band, +# is_granite_or_whinstone=p.walls["is_granite_or_whinstone"], +# is_sandstone_or_limestone=p.walls["is_sandstone_or_limestone"], +# ) +# +# # We insert the innovation uplift +# measures_to_optimise_with_uplift = deepcopy(measures_to_optimise) +# +# # TODO: Turn this into a function and store the innovaiton uplift +# for group in measures_to_optimise_with_uplift: +# for r in group: +# +# if r["type"] in ["mechanical_ventilation", "low_energy_lighting", "secondary_heating", +# "extension_cavity_wall_insulation", "draught_proofing", "sealing_open_fireplace"]: +# ( +# r["partial_project_score"], +# r["partial_project_funding"], +# r["innovation_uplift"], +# r["uplift_project_score"], +# ) = ( +# 0, 0, 0, 0 +# ) +# continue +# +# ( +# r["partial_project_score"], r["partial_project_funding"], r["innovation_uplift"], +# r["uplift_project_score"] +# ) = funding.get_innovation_uplift( +# measure=r, +# starting_sap=p.data["current-energy-efficiency"], +# floor_area=p.floor_area, +# is_cavity=p.walls["is_cavity_wall"], +# current_wall_uvalue=current_wall_u_value, +# is_partial="partial" in p.walls["clean_description"].lower(), +# existing_li_thickness=li_thickness, +# mainheating=p.main_heating, +# main_fuel=p.main_fuel, +# mainheat_energy_eff=p.data["mainheat-energy-eff"], +# ) +# +# input_measures = optimiser_functions.prepare_input_measures( +# measures_to_optimise_with_uplift, body.goal, needs_ventilation, funding=True +# ) +# +# # When the goal is Increasing EPC, we can run the funding optimiser +# if body.goal == "Increasing EPC": +# +# solutions = optimise_with_funding_paths( +# p=p, +# input_measures=input_measures, +# housing_type=body.housing_type, +# budget=body.budget, +# target_gain=gain, +# funding=funding +# ) +# +# # Given the solutions we select the optimal one +# solutions["cost_less_full_project_funding"] = np.where( +# solutions["scheme"] == "eco4", +# solutions["total_cost"] - solutions["full_project_funding"] - solutions["total_uplift"], +# solutions["total_cost"] - solutions["partial_project_funding"] - solutions["total_uplift"] +# ) +# +# solutions["cost_less_full_project_funding"] = ( +# solutions["total_cost"] - solutions["full_project_funding"] - solutions["total_uplift"] +# ) +# solutions = solutions.sort_values("cost_less_full_project_funding", ascending=True) +# +# if solutions["meets_upgrade_target"].any(): +# # If we have a solution that meets the upgrade target, we select that one +# optimal_solution = solutions[solutions["meets_upgrade_target"]].iloc[0] +# else: +# # Pick the cheapest +# optimal_solution = solutions.iloc[0] +# +# # This is the list of measures that we will recommend +# scheme = optimal_solution["scheme"] +# funded_measures = optimal_solution["items"] if scheme != "none" else [] +# solution = optimal_solution["items"] + optimal_solution["unfunded_items"] +# # This is the total amount of funding that the project will produce (including uplifts) (£) +# project_funding = optimal_solution["full_project_funding"] if scheme == "eco4" else \ +# optimal_solution["partial_project_funding"] +# # This is the total amount of funding associated to the uplift (£) +# total_uplift = optimal_solution["total_uplift"] +# # This is the funding scheme selected +# # This is the full project ABS +# full_project_score = optimal_solution["project_score"] +# # This is the partial project ABS +# partial_project_score = optimal_solution["partial_project_score"] +# # This is the uplift score ABS +# uplift_project_score = optimal_solution["total_uplift_score"] +# else: +# # We optimise and then we determine eligibility for funding, based on the measures selected +# optimiser = ( +# GainOptimiser( +# input_measures, max_cost=body.budget, max_gain=gain, allow_slack=False +# ) if body.budget else CostOptimiser(input_measures, min_gain=gain) +# ) +# optimiser.setup() +# optimiser.solve() +# solution = optimiser.solution +# +# recommendation_types = [] +# for measures in input_measures: +# for measure in measures: +# recommendation_types.append(measure["type"]) +# recommendation_types = set(recommendation_types) +# +# has_wall_insulation_recommendation = any( +# (m in recommendation_types or "+".join([m, "mechanical_ventilation"])) for m in +# WALL_INSULATION_MEASURES +# ) +# has_roof_insulation_recommendation = any( +# (m in recommendation_types or "+".join([m, "mechanical_ventilation"])) for m in +# ROOF_INSULATION_MEASURES +# ) +# +# funding.check_funding( +# measures=solution, +# starting_sap=p.data["current-energy-efficiency"], +# ending_sap=p.data["current-energy-efficiency"] + sum([x["gain"] for x in solution]), +# floor_area=p.floor_area, +# mainheat_description=p.main_heating["clean_description"], +# heating_control_description=p.main_heating_controls["clean_description"], +# is_cavity=p.walls["is_cavity_wall"], +# current_wall_uvalue=current_wall_u_value, +# is_partial="partial" in p.walls["clean_description"].lower(), +# existing_li_thickness=li_thickness, +# mainheating=p.main_heating, +# main_fuel=p.main_fuel, +# mainheat_energy_eff=p.data["mainheat-energy-eff"], +# has_wall_insulation_recommendation=has_wall_insulation_recommendation, +# has_roof_insulation_recommendation=has_roof_insulation_recommendation, +# ) +# +# # Determine the scheme +# scheme = "none" +# if funding.eco4_eligible: +# scheme = "eco4" +# if scheme == "none" and funding.gbis_eligible: +# scheme = "gbis" +# +# funded_measures = solution if scheme in ["gbis", "eco4"] else [] +# project_funding = 0 if funding.full_project_abs is not None else funding.full_project_abs +# total_uplift = funding.eco4_uplift +# full_project_score = 0 if funding.full_project_abs is not None else funding.full_project_abs +# partial_project_score = funding.partial_project_abs +# uplift_project_score = funding.eco4_uplift if scheme == "eco4" else funding.gbis_uplift +# +# selected = {r["id"] for r in solution} +# +# if property_required_measures: +# solution = optimiser_functions.add_required_measures( +# property_id=p.id, property_required_measures=property_required_measures, +# recommendations=recommendations, selected=selected, +# ) +# +# # Add best practice measures (ventilation/trickle vents) +# selected = optimiser_functions.add_best_practice_measures(p.id, solution, recommendations, selected) +# # Final flattening - Don't do this! +# # recommendations[p.id] = optimiser_functions.flatten_recommendations_with_defaults( +# # p.id, recommendations, selected +# # ) +# +# # TODO: functionise +# for measure in funded_measures: +# if "+mechanical_ventilation" in measure["type"]: +# measure["type"] = measure["type"].split("+mechanical_ventilation")[0] +# +# p.insert_funding( +# scheme=scheme, +# funded_measures=funded_measures, +# project_funding=project_funding, +# total_uplift=total_uplift, +# full_project_score=full_project_score, +# partial_project_score=partial_project_score, +# uplift_project_score=uplift_project_score +# ) From 1458cff95cd5c9b94bc049d96ddf9432070f2764 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 14 Nov 2025 23:04:10 +0000 Subject: [PATCH 065/202] handle empty pps matrix and low C and above for ewi, iwi and cwi --- backend/Funding.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/backend/Funding.py b/backend/Funding.py index be3db0d4..bd4b2edf 100644 --- a/backend/Funding.py +++ b/backend/Funding.py @@ -542,6 +542,8 @@ class Funding: pps = filtered_pps_matrix[filtered_pps_matrix["Measure_Type"] == measure_code] if pps.shape[0] != 1: + if pps.empty and self.starting_sap_band in ["Low_C", "High_C", "Low_B", "High_B", "Low_A", "High_A"]: + return 0 raise ValueError(f"Invalid IWI category: {measure_code}") return pps.squeeze()["Cost Savings"] @@ -554,6 +556,8 @@ class Funding: pps = filtered_pps_matrix[filtered_pps_matrix["Measure_Type"] == measure_code] if pps.shape[0] != 1: + if pps.empty and self.starting_sap_band in ["Low_C", "High_C", "Low_B", "High_B", "Low_A", "High_A"]: + return 0 raise ValueError(f"Invalid EWI category: {measure_code}") return pps.squeeze()["Cost Savings"] @@ -562,6 +566,8 @@ class Funding: pps = filtered_pps_matrix[filtered_pps_matrix["Measure_Type"] == measure_code] if pps.shape[0] != 1: + if pps.empty and self.starting_sap_band in ["Low_C", "High_C", "Low_B", "High_B", "Low_A", "High_A"]: + return 0 raise ValueError(f"Invalid CWI category: {measure_code}") return pps.squeeze()["Cost Savings"] From 4d6a73fd7b2e8d0a30b37d2f2f0c4438e86d1d9a Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 14 Nov 2025 23:43:07 +0000 Subject: [PATCH 066/202] handling more fuel types --- backend/app/assumptions.py | 3 +++ backend/ml_models/AnnualBillSavings.py | 2 +- backend/tests/test_integration.py | 4 +++- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/backend/app/assumptions.py b/backend/app/assumptions.py index 0172466e..924bd2a0 100644 --- a/backend/app/assumptions.py +++ b/backend/app/assumptions.py @@ -82,6 +82,9 @@ DESCRIPTIONS_TO_FUEL_TYPES = { "Ground source heat pump, warm air, electric": {"fuel": "Electricity", "cop": AVERAGE_ASHP_EFFICIENCY / 100}, "Room heaters, mains gas, Electric storage heaters": {"fuel": "Natural Gas", "cop": 0.85}, "Water source heat pump, radiators, electric": {"fuel": "Electricity", "cop": AVERAGE_ASHP_EFFICIENCY / 100}, + "Air source heat pump, Systems with radiators, electric": {"fuel": "Electricity", + "cop": AVERAGE_ASHP_EFFICIENCY / 100}, + "Ground source heat pump, underfloor, electric": {"fuel": "Electricity", "cop": AVERAGE_ASHP_EFFICIENCY / 100}, } # These are the measure types where if there is a ventilation recommendation, we force the inclusion of it diff --git a/backend/ml_models/AnnualBillSavings.py b/backend/ml_models/AnnualBillSavings.py index 243cae52..4a2a6f1f 100644 --- a/backend/ml_models/AnnualBillSavings.py +++ b/backend/ml_models/AnnualBillSavings.py @@ -286,7 +286,7 @@ class AnnualBillSavings: # The solar thermal covers a % of the heating kwh, so we need to adjust the cost return (kwh / cop) * assumptions.SOLAR_CONSUMPTION_PROPORTION * cls.GAS_PRICE_CAP - if fuel == "Electricity + Solar Thermal": + if fuel in ["Electricity + Solar Thermal", 'Unmapped + Solar Thermal']: # The solar thermal covers a % of the heating kwh, so we need to adjust the cost return (kwh / cop) * assumptions.SOLAR_CONSUMPTION_PROPORTION * cls.ELECTRICITY_PRICE_CAP diff --git a/backend/tests/test_integration.py b/backend/tests/test_integration.py index f0c53f16..ac61a09c 100644 --- a/backend/tests/test_integration.py +++ b/backend/tests/test_integration.py @@ -90,8 +90,10 @@ costs_by_floor_area = costs_by_floor_area.groupby("current-energy-efficiency")[ ["lighting-cost-current_scaled", "heating-cost-current_scaled", "hot-water-cost-current_scaled"] ].mean().reset_index() +epc_data = epc_data[~pd.isnull(epc_data["UPRN"])] + sample_epc_data = epc_data[pd.to_datetime(epc_data["LODGEMENT_DATE"]) >= "2015-01-01"].drop_duplicates("UPRN").sample( - 3000).reset_index(drop=True) + 5000).reset_index(drop=True) # TODO: In Property find_energy_sources, sort out biomass community heating - what fuel type # TODO: We might be able to remove find_energy_sources entirely and remove estimate_electrical_consumption. It's used From 3acad1e04962d2d3d554ac6deb90e5430eec1c25 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 14 Nov 2025 23:47:56 +0000 Subject: [PATCH 067/202] handle rir pps filtering --- backend/Funding.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/backend/Funding.py b/backend/Funding.py index bd4b2edf..fe5165f6 100644 --- a/backend/Funding.py +++ b/backend/Funding.py @@ -603,6 +603,8 @@ class Funding: code = "RIRI_res_unin" pps = filtered_pps_matrix[filtered_pps_matrix["Measure_Type"] == code] if pps.shape[0] != 1: + if pps.empty and self.starting_sap_band in ["Low_C", "High_C", "Low_B", "High_B", "Low_A", "High_A"]: + return 0 raise ValueError(f"Invalid RIRI category: {code}") return pps.squeeze()["Cost Savings"] From ab9b0b16f9245dfdedaec31df4a40d52883b978e Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sat, 15 Nov 2025 00:30:36 +0000 Subject: [PATCH 068/202] handling more fuel types --- backend/app/assumptions.py | 1 + recommendations/Recommendations.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/backend/app/assumptions.py b/backend/app/assumptions.py index 924bd2a0..bc8f9cc6 100644 --- a/backend/app/assumptions.py +++ b/backend/app/assumptions.py @@ -85,6 +85,7 @@ DESCRIPTIONS_TO_FUEL_TYPES = { "Air source heat pump, Systems with radiators, electric": {"fuel": "Electricity", "cop": AVERAGE_ASHP_EFFICIENCY / 100}, "Ground source heat pump, underfloor, electric": {"fuel": "Electricity", "cop": AVERAGE_ASHP_EFFICIENCY / 100}, + "Electric ceiling heating": {"fuel": "Electricity", "cop": 1}, } # These are the measure types where if there is a ventilation recommendation, we force the inclusion of it diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py index bcb697fc..20f0c760 100644 --- a/recommendations/Recommendations.py +++ b/recommendations/Recommendations.py @@ -711,7 +711,7 @@ class Recommendations: "hotwater_cop": 0.9 } - mapped = descriptions_to_fuel_types.get(heating_description, None) + mapped = descriptions_to_fuel_types.get(heating_description.strip(), None) if mapped is None: # TODO: This is a non-ideal placeholder but we put something in place for a process that falls over # fairly regularly. A task has been added to planner to refactor this @@ -738,7 +738,7 @@ class Recommendations: "heating_cop": mapped["cop"], "hotwater_cop": 1 } - mapped_hotwater = descriptions_to_fuel_types.get(hotwater_description) + mapped_hotwater = descriptions_to_fuel_types.get(hotwater_description.strip()) if mapped_hotwater is None: # TODO: This is a non-ideal placeholder but we put something in place for a process that falls over # fairly regularly. A task has been added to planner to refactor this From f6ff0379942606fab5eae4418723028d7e26f13c Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sun, 16 Nov 2025 16:26:23 +0800 Subject: [PATCH 069/202] temp clearing endpoint --- backend/app/whlg/router.py | 63 +++++++++++++++++++------------------- recommendations/Costs.py | 2 ++ 2 files changed, 33 insertions(+), 32 deletions(-) diff --git a/backend/app/whlg/router.py b/backend/app/whlg/router.py index 3957a3f4..eaaffb41 100644 --- a/backend/app/whlg/router.py +++ b/backend/app/whlg/router.py @@ -1,31 +1,30 @@ -import boto3 -import json -import math -import asyncio -import random - -from datetime import datetime +# import boto3 +# import json +# import math +# import asyncio +# import random +# +# from datetime import datetime from fastapi import APIRouter, Depends from backend.app.dependencies import validate_token -from backend.app.plan.schemas import PlanTriggerRequest +# from backend.app.plan.schemas import PlanTriggerRequest from backend.app.config import get_settings -from sqlalchemy.orm import sessionmaker +# from sqlalchemy.orm import sessionmaker from utils.logger import setup_logger -from backend.app.db.connection import db_engine -from backend.app.db.functions.recommendations_functions import create_scenario -import pandas as pd +# from backend.app.db.connection import db_engine +# from backend.app.db.functions.recommendations_functions import create_scenario +# import pandas as pd from backend.app.whlg.schema import WHLGElligibilityRequest -from utils.s3 import read_csv_from_s3 -from sqlalchemy.dialects.postgresql import insert -from backend.app.db.connection import get_db_session -from backend.app.db.models.whlg import Whlg -from backend.app.db.functions.whlg_functions import upsert_whlg_postcode +# from utils.s3 import read_csv_from_s3 +# from sqlalchemy.dialects.postgresql import insert +# from backend.app.db.connection import get_db_session +# from backend.app.db.models.whlg import Whlg +# from backend.app.db.functions.whlg_functions import upsert_whlg_postcode logger = setup_logger() - if get_settings().ENVIRONMENT == "local": router = APIRouter( prefix="/whlg", @@ -40,6 +39,7 @@ else: responses={404: {"description": "Not found"}} ) + @router.get("/") async def whlg_entrypoint(): # body needs to include postcode, UPRN [task ID?] @@ -62,17 +62,16 @@ async def whlg_entrypoint(): @router.post("/eligible") async def eligiable(body: WHLGElligibilityRequest): - postcode = body.postcode or "" - postcode = postcode.lower().replace(" ", "") - - whlg_eligible_postcodes = read_csv_from_s3( - bucket_name=get_settings().DATA_BUCKET, - filepath="funding/whlg eligible postcodes.csv", - ) - whlg_eligible_postcodes = pd.DataFrame(whlg_eligible_postcodes) - whlg_eligible_postcodes['Postcode'] = whlg_eligible_postcodes['Postcode'].str.replace(' ', '', regex=False) - - is_eligible = postcode in whlg_eligible_postcodes['Postcode'].values - return {"whlg_eligible": is_eligible} - - + # postcode = body.postcode or "" + # postcode = postcode.lower().replace(" ", "") + # + # whlg_eligible_postcodes = read_csv_from_s3( + # bucket_name=get_settings().DATA_BUCKET, + # filepath="funding/whlg eligible postcodes.csv", + # ) + # whlg_eligible_postcodes = pd.DataFrame(whlg_eligible_postcodes) + # whlg_eligible_postcodes['Postcode'] = whlg_eligible_postcodes['Postcode'].str.replace(' ', '', regex=False) + # + # is_eligible = postcode in whlg_eligible_postcodes['Postcode'].values + # return {"whlg_eligible": is_eligible} + return None diff --git a/recommendations/Costs.py b/recommendations/Costs.py index 33d7b061..1184d5ed 100644 --- a/recommendations/Costs.py +++ b/recommendations/Costs.py @@ -751,7 +751,9 @@ class Costs: # Adjust total radiator needs based on built form form_factor = { + 'Enclosed Mid-Terrace': 0.9, 'Mid-Terrace': 0.95, + 'Enclosed End-Terrace': 0.95, 'Semi-Detached': 1.05, 'Detached': 1.25, 'End-Terrace': 1.05 From 756bf4b720f4c8184bedb707f9b8dfddb408610c Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 24 Nov 2025 12:18:01 +0000 Subject: [PATCH 070/202] handled not recorded different format --- backend/tests/test_integration.py | 2 +- etl/epc/settings.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/backend/tests/test_integration.py b/backend/tests/test_integration.py index ac61a09c..45dd109a 100644 --- a/backend/tests/test_integration.py +++ b/backend/tests/test_integration.py @@ -93,7 +93,7 @@ costs_by_floor_area = costs_by_floor_area.groupby("current-energy-efficiency")[ epc_data = epc_data[~pd.isnull(epc_data["UPRN"])] sample_epc_data = epc_data[pd.to_datetime(epc_data["LODGEMENT_DATE"]) >= "2015-01-01"].drop_duplicates("UPRN").sample( - 5000).reset_index(drop=True) + 10000).reset_index(drop=True) # TODO: In Property find_energy_sources, sort out biomass community heating - what fuel type # TODO: We might be able to remove find_energy_sources entirely and remove estimate_electrical_consumption. It's used diff --git a/etl/epc/settings.py b/etl/epc/settings.py index 47a75def..16619fa2 100644 --- a/etl/epc/settings.py +++ b/etl/epc/settings.py @@ -20,6 +20,7 @@ DATA_ANOMALY_MATCHES = { # certificate retrieval process is successfully completed. Mandatory data items cannot be applied # retrospectively to energy certificates lodged before the date of the change. "Not recorded", + "Not Recorded", # The data also contains DECs with an operational rating of ‘9999’ (a ‘default’ DEC). The production of a # ‘default’ DEC value was allowed to enable building occupiers, with poor quality or no energy data, # the opportunity to comply with the regulations. From April 2011 the ability to lodge a ‘default’ DEC was no From caeb776428db6dba12b475a2f40750eb167b25cb Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 25 Nov 2025 12:15:06 +0000 Subject: [PATCH 071/202] started work on peabody --- asset_list/utils.py | 1 - backend/app/config.py | 4 +- backend/app/db/connection.py | 1 + backend/app/plan/utils.py | 2 +- backend/engine/engine.py | 37 ++++++++++++------- .../Nov 2025 Consulting Project/data_prep.py | 8 ++++ etl/find_my_epc/RetrieveFindMyEpc.py | 1 + 7 files changed, 37 insertions(+), 17 deletions(-) diff --git a/asset_list/utils.py b/asset_list/utils.py index c7d0cc0a..8746c03a 100644 --- a/asset_list/utils.py +++ b/asset_list/utils.py @@ -2,7 +2,6 @@ import time import random import pandas as pd -from adhoc.investigation import newest_epc from backend.SearchEpc import SearchEpc from etl.find_my_epc.RetrieveFindMyEpc import RetrieveFindMyEpc from tqdm import tqdm diff --git a/backend/app/config.py b/backend/app/config.py index 98e1c447..dd3f5db1 100644 --- a/backend/app/config.py +++ b/backend/app/config.py @@ -3,7 +3,6 @@ from pydantic_settings import BaseSettings from typing import Optional - class Settings(BaseSettings): API_KEY: str API_KEY_NAME: str = "X-API-KEY" @@ -43,7 +42,8 @@ class Settings(BaseSettings): AWS_DEFAULT_REGION: Optional[str] = None class Config: - env_file = "backend.env" + env_file = "backend/.env" + @lru_cache() def get_settings(): diff --git a/backend/app/db/connection.py b/backend/app/db/connection.py index fbec9102..2ac9bd02 100644 --- a/backend/app/db/connection.py +++ b/backend/app/db/connection.py @@ -14,6 +14,7 @@ db_string = connection_string.format( db_engine = create_engine(db_string, pool_size=5, max_overflow=5) + def get_db_session(): if db_engine is None: raise RuntimeError("Database is not configured. Set DATABASE_URL in environment variables.") diff --git a/backend/app/plan/utils.py b/backend/app/plan/utils.py index 67b7bce1..ea328d5b 100644 --- a/backend/app/plan/utils.py +++ b/backend/app/plan/utils.py @@ -64,7 +64,7 @@ def extract_property_request_data( x for x in already_installed if (x["address"] == config["address"]) and (x["postcode"] == config["postcode"]) ), []) - + # Because we have some non-invasive recommendations that match on address and postcode, but not UPRN # we need to check existence of uprn has_uprn = "uprn" in non_invasive_recommendations[0] if non_invasive_recommendations else False diff --git a/backend/engine/engine.py b/backend/engine/engine.py index 1cd379b9..6e90a297 100644 --- a/backend/engine/engine.py +++ b/backend/engine/engine.py @@ -392,6 +392,26 @@ def parse_heating_system(config): return None +def check_duplicate_uprns(plan_input): + """ + Simple function to check if the input data contains duplicated UPRNS. + If there are duplicates, an exception will be rasied + :return: + """ + # Check for duplicate UPRNS + input_uprns = [x.get("uprn") for x in plan_input if "uprn" in x and x.get("uprn")] + + if input_uprns: + # Check for dupes + if len(input_uprns) != len(set(input_uprns)): + # Find the duplicate UPRNs + duplicates = set([x for x in input_uprns if input_uprns.count(x) > 1]) + # de-dupe input_uprns + raise ValueError(f"Duplicate UPRNs in the input data: {duplicates}") + + return True + + async def model_engine(body: PlanTriggerRequest): logger.info("Model Engine triggered with body: %s", json.loads(body.model_dump_json())) @@ -480,16 +500,8 @@ async def model_engine(body: PlanTriggerRequest): if body.index_start is not None and body.index_end is not None: plan_input = plan_input[body.index_start:body.index_end] - # Check for duplicate UPRNS - input_uprns = [x.get("uprn") for x in plan_input if "uprn" in x and x.get("uprn")] - - if input_uprns: - # Check for dupes - if len(input_uprns) != len(set(input_uprns)): - # Find the duplicate UPRNs - duplicates = set([x for x in input_uprns if input_uprns.count(x) > 1]) - # de-dupe input_uprns - raise ValueError(f"Duplicate UPRNs in the input data: {duplicates}") + # Confirm no duplicate UPRNS + check_duplicate_uprns(plan_input) # If we have patches or overrides, we should read them in here patches, already_installed, non_invasive_recommendations, valuation_data = get_request_property_data(body) @@ -528,9 +540,7 @@ async def model_engine(body: PlanTriggerRequest): if (body.event_type == "remote_assessment") and config.get("property_type") == "Flat": # We're running a remote assessment for a flat - we go and grab the associated # UPRNS for other units in the same building - associated_uprns = get_associated_uprns( - session, postcode=config["postcode"], uprn=uprn - ) + associated_uprns = get_associated_uprns(session, postcode=config["postcode"], uprn=uprn) epc_searcher = SearchEpc( address1=address1, @@ -1140,6 +1150,7 @@ async def model_engine(body: PlanTriggerRequest): ) property_value_increase_ranges[p.id] = valuations + # TODO - this is not right, especially if the existing run failed if p.is_new: property_details_epc = p.get_property_details_epc( portfolio_id=body.portfolio_id, rating_lookup=rating_lookup, diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/data_prep.py b/etl/customers/peabody/Nov 2025 Consulting Project/data_prep.py index c68a0b58..6dd71b98 100644 --- a/etl/customers/peabody/Nov 2025 Consulting Project/data_prep.py +++ b/etl/customers/peabody/Nov 2025 Consulting Project/data_prep.py @@ -77,6 +77,14 @@ archetypes = sustainability_data[ "Floor Area Band"] ].drop_duplicates() +# Potential reductions: +# 1) Split roof insulation into > 100mm loft and <= 100mm loft +# 2) Group all of the glazed together (e.g. double glazed, secondary glazed, triple glazed) +# 3) Group up boiler efficiency A-C, D - F, G? or someting like this +# 4) Group up main fuel into gas, electric, oil, other? +# 5) Wall Construction - group up Sandstone and Granite into one category +# 6) Reduce or remove floor construction + # Maps the property types to the format recognised by the EPC api property_type_map = {} # Maps the build form to the format recognised by the OS api diff --git a/etl/find_my_epc/RetrieveFindMyEpc.py b/etl/find_my_epc/RetrieveFindMyEpc.py index c9cca011..c57f9ca8 100644 --- a/etl/find_my_epc/RetrieveFindMyEpc.py +++ b/etl/find_my_epc/RetrieveFindMyEpc.py @@ -398,6 +398,7 @@ class RetrieveFindMyEpc: extracted_address_cleaned = ( extracted_address.replace(",", "").replace(" ", "").lower() ) + if not extracted_address_cleaned.startswith(self.address_cleaned): continue From 7fde580b376e51637df422f60920a9dcfdbe4aca Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 25 Nov 2025 13:43:36 +0000 Subject: [PATCH 072/202] set up epc model and already installed model --- backend/app/db/models/epc.py | 29 +++++++++++++++++++++++++++++ backend/app/db/models/portfolio.py | 16 ++++++++++++++++ 2 files changed, 45 insertions(+) create mode 100644 backend/app/db/models/epc.py diff --git a/backend/app/db/models/epc.py b/backend/app/db/models/epc.py new file mode 100644 index 00000000..5a216040 --- /dev/null +++ b/backend/app/db/models/epc.py @@ -0,0 +1,29 @@ +from sqlalchemy import ( + Column, + Integer, + String, + JSON, + TIMESTAMP, + UniqueConstraint, +) +from sqlalchemy.orm import declarative_base + +Base = declarative_base() + + +class EpcStore(Base): + """ + Stores EPC data retrieved from the EPC API and EPC web pages. + """ + __tablename__ = "epc_store" + + id = Column(Integer, primary_key=True, autoincrement=True) + uprn = Column(Integer) + epc_api_created_at = Column(TIMESTAMP(timezone=False)) + epc_api = Column(JSON, nullable=False) + epc_page_created_at = Column(TIMESTAMP(timezone=False)) + epc_page = Column(String) + epc_page_rrn = Column(String) + + def __repr__(self): + return f"" diff --git a/backend/app/db/models/portfolio.py b/backend/app/db/models/portfolio.py index 953e7b3d..fbe9661b 100644 --- a/backend/app/db/models/portfolio.py +++ b/backend/app/db/models/portfolio.py @@ -4,6 +4,7 @@ import datetime from sqlalchemy import Column, Integer, Text, Boolean, Float, DateTime, Enum, ForeignKey, CheckConstraint from sqlalchemy.ext.declarative import declarative_base from backend.app.db.models.users import UserModel # noqa +from backend.app.db.models.materials import MaterialType Base = declarative_base() @@ -225,3 +226,18 @@ class PortfolioUsers(Base): role = Column(Text, nullable=False) created_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc)) updated_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc)) + + +class PropertyInstalledMeasures(Base): + """ + This model keeps a record of the installed measures for each property, at the UPRN level + """ + __tablename__ = 'property_installed_measures' + id = Column(Integer, primary_key=True, autoincrement=True) + uprn = Column(Integer, nullable=False) + measure_type = Column( + Enum(MaterialType, values_callable=lambda x: [e.value for e in x], create_constraint=False), + nullable=False + ) + created_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc)) + installed_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc)) From 7640baec02a1fe7084eb383811e4c26b695e06c3 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 25 Nov 2025 18:22:20 +0000 Subject: [PATCH 073/202] implemented EPC caching logic --- backend/SearchEpc.py | 11 +- backend/app/db/functions/__init__.py | 12 +++ backend/app/db/functions/epc_functions.py | 125 ++++++++++++++++++++++ backend/engine/engine.py | 46 ++++++-- etl/find_my_epc/RetrieveFindMyEpc.py | 49 +++++++-- 5 files changed, 223 insertions(+), 20 deletions(-) create mode 100644 backend/app/db/functions/__init__.py create mode 100644 backend/app/db/functions/epc_functions.py diff --git a/backend/SearchEpc.py b/backend/SearchEpc.py index eb2b0b23..c47e82c4 100644 --- a/backend/SearchEpc.py +++ b/backend/SearchEpc.py @@ -917,7 +917,7 @@ class SearchEpc: return agg[key].values[0] - def find_property(self, skip_os=False): + def find_property(self, skip_os=False, api_data=None): """ This method will attempt to identify a property. It will, at first, use the EPC api to try and find the EPC for the property and the associated UPRN. If this fails, it will use the Ordnance Survey API to @@ -928,10 +928,17 @@ class SearchEpc: as a final check to see if there is any EPC data. If there is no EPC data, the epc data will be estimated based on the surrounding properties + + :param skip_os: If True, the ordnance survey api will be skipped and only the EPC api will be used + :param api_data: If provided, this data will be used instead of querying the EPC api """ # Step 1: use the epc api to find the property and uprn - response = self.get_epc() + if api_data: + self.data = api_data + response = {"status": 200} + else: + response = self.get_epc() if response["status"] == 200: ( diff --git a/backend/app/db/functions/__init__.py b/backend/app/db/functions/__init__.py new file mode 100644 index 00000000..0f239d6e --- /dev/null +++ b/backend/app/db/functions/__init__.py @@ -0,0 +1,12 @@ +from .epc_functions import * +from .address_functions import * +from .portfolio_functions import * +from .energy_assessment_functions import * +from .property_functions import * +from .recommendations_functions import * +from .solar_functions import * +from .funding_functions import * +from .materials_functions import * +from .inspections_functions import * +from .non_intrusive_surveys import * +from .whlg_functions import * diff --git a/backend/app/db/functions/epc_functions.py b/backend/app/db/functions/epc_functions.py new file mode 100644 index 00000000..4b675f1f --- /dev/null +++ b/backend/app/db/functions/epc_functions.py @@ -0,0 +1,125 @@ +from datetime import datetime, timedelta, timezone +from sqlalchemy.orm import Session +from sqlalchemy.exc import SQLAlchemyError +from backend.app.db.models.epc import EpcStore + + +class EpcStoreService: + """ + Service layer for EPC data lookup and persistence. + """ + + FRESHNESS_DAYS = 30 + + # status labels + FRESH = "fresh" + EXPIRED = "expired" + MISSING = "missing" + + @classmethod + def get_epc_for_uprn(cls, session: Session, uprn: int): + """ + Query EPC data for a given UPRN and return a dict describing: + - epc_api: only if within last 30 days + - epc_page: only if epc_api exists + - status: 'fresh', 'expired', or 'missing' + """ + + record = session.query(EpcStore).filter(EpcStore.uprn == uprn).first() + + if not record: + return {"status": cls.MISSING, "epc_api": None, "epc_page": None} + + if not record.epc_api_created_at: + # API data missing → treat as missing even if page data exists + return {"status": cls.MISSING, "epc_api": None, "epc_page": None} + + # check freshness + cutoff = datetime.now(timezone.utc) - timedelta(days=EpcStoreService.FRESHNESS_DAYS) + + if record.epc_api_created_at.date() < cutoff.date(): + return {"status": cls.EXPIRED, "epc_api": None, "epc_page": None} + + # Fresh API → include page only if present + return { + "status": cls.FRESH, + "epc_api": record.epc_api, + "epc_page": record.epc_page if record.epc_page else None, + "epc_page_rrn": record.epc_page_rrn, + "epc_api_created_at": record.epc_api_created_at, + "epc_page_created_at": record.epc_page_created_at, + } + + @classmethod + def check_insert_needed(cls, epc_cache, epc_estimated, uprn): + """ + Check if an insert is needed based on existing data. + :return: + """ + no_existing_epc_cache = epc_cache.get("epc_api") is None + existing_cache_expired = ( + epc_cache.get("status") == cls.EXPIRED + ) + + needs_insert = bool((no_existing_epc_cache or existing_cache_expired) and not epc_estimated and uprn) + + return needs_insert + + @staticmethod + def upsert_epc_data( + session: Session, + uprn: int, + epc_api: dict | None, + epc_page: str | None, + epc_page_rrn: str | None, + epc_api_created_at: datetime | None = None, + epc_page_created_at: datetime | None = None, + ): + """ + Insert or update EPC data for a UPRN. + + Rules: + - If record exists → update it + - If record does not exist → create new + """ + + try: + record = session.query(EpcStore).filter(EpcStore.uprn == uprn).first() + + if record: + # update path + if epc_api is not None: + record.epc_api = epc_api + if epc_api_created_at is None: + epc_api_created_at = datetime.now(timezone.utc) + record.epc_api_created_at = epc_api_created_at + + # update page data only if BOTH: + # 1) the caller passed page data + # 2) epc_api is not None (page only allowed when API exists) + if epc_page is not None and epc_api is not None: + record.epc_page = epc_page + record.epc_page_rrn = epc_page_rrn + if epc_page_created_at is None: + epc_page_created_at = datetime.now(timezone.utc) + record.epc_page_created_at = epc_page_created_at + else: + # insert path + record = EpcStore( + uprn=uprn, + epc_api=epc_api, + epc_api_created_at=epc_api_created_at, + epc_page=epc_page if epc_api is not None else None, + epc_page_rrn=epc_page_rrn if epc_api is not None else None, + epc_page_created_at=epc_page_created_at if epc_api is not None else None, + ) + session.add(record) + + session.flush() + session.commit() + + return record + + except SQLAlchemyError as e: + session.rollback() + raise e diff --git a/backend/engine/engine.py b/backend/engine/engine.py index 6e90a297..ee415593 100644 --- a/backend/engine/engine.py +++ b/backend/engine/engine.py @@ -5,6 +5,7 @@ from datetime import datetime from tqdm import tqdm import pandas as pd import numpy as np + from etl.epc.Record import EPCRecord from backend.SearchEpc import SearchEpc from sqlalchemy.exc import IntegrityError, OperationalError @@ -24,7 +25,7 @@ from backend.app.db.functions.recommendations_functions import ( ) from backend.app.db.functions.funding_functions import upload_funding from backend.app.db.functions.energy_assessment_functions import get_latest_assessment_by_uprn -from backend.app.db.functions.address_functions import get_associated_uprns +import backend.app.db.functions as db_funcs from backend.app.db.models.portfolio import rating_lookup from backend.app.plan.schemas import PlanTriggerRequest, WALL_INSULATION_MEASURES, ROOF_INSULATION_MEASURES @@ -527,6 +528,14 @@ async def model_engine(body: PlanTriggerRequest): if uprn: uprn = int(float(uprn)) + epc_api_data, epc_page, rrn, epc_cache = None, None, None, {} + if uprn: + # if we have a UPRN, we check if we already have EPC data associated with this UPRN + epc_cache = db_funcs.epc_functions.EpcStoreService.get_epc_for_uprn(session, uprn) + + if epc_cache["status"] == db_funcs.epc_functions.EpcStoreService.FRESH: + epc_api_data, epc_page, rrn = epc_cache["epc_api"], epc_cache["epc_page"], epc_cache["epc_page_rrn"] + address1 = config.get("address", None) # Handle domna address list format if pd.isnull(address1) and body.file_format == "domna_asset_list": @@ -540,7 +549,9 @@ async def model_engine(body: PlanTriggerRequest): if (body.event_type == "remote_assessment") and config.get("property_type") == "Flat": # We're running a remote assessment for a flat - we go and grab the associated # UPRNS for other units in the same building - associated_uprns = get_associated_uprns(session, postcode=config["postcode"], uprn=uprn) + associated_uprns = db_funcs.address_functions.get_associated_uprns( + session, postcode=config["postcode"], uprn=uprn + ) epc_searcher = SearchEpc( address1=address1, @@ -555,7 +566,9 @@ async def model_engine(body: PlanTriggerRequest): epc_searcher.ordnance_survey_client.built_form = config.get("built_form", None) epc_searcher.ordnance_survey_client.property_type = config.get("property_type", None) # For the moment, our OS API access is unavailable, so we skip and interpolate - epc_searcher.find_property(skip_os=True) + + epc_searcher.find_property(skip_os=True, api_data=epc_api_data) + if epc_searcher.newest_epc.get("estimated") and body.file_format == "domna_asset_list" and ( epc_searcher.newest_epc["uprn"] < 0 ): @@ -609,18 +622,19 @@ async def model_engine(body: PlanTriggerRequest): patch = req_data.patch # if we have a remote assment data type, we pull the additional data and include it + epc_page_source = {} if (body.event_type == "remote_assessment") and not (epc_searcher.newest_epc.get("estimated")): logger.info("Retrieving find my epc data") try: - property_non_invasive_recommendations, patch = RetrieveFindMyEpc.get_from_epc( - epc_searcher.newest_epc + property_non_invasive_recommendations, patch, epc_page_source = RetrieveFindMyEpc.get_from_epc( + epc_searcher.newest_epc, epc_page, rrn=rrn ) except Exception as e: logger.error(f"Failed to retrieve without cleaning address {e}") for k in ["address", "address1"]: epc_searcher.newest_epc[k] = epc_searcher.address_clean - property_non_invasive_recommendations, patch = RetrieveFindMyEpc.get_from_epc( - epc_searcher.newest_epc + property_non_invasive_recommendations, patch, epc_page_source = RetrieveFindMyEpc.get_from_epc( + epc_searcher.newest_epc, epc_page, rrn=rrn ) # If we have a property type, this means when we pull the epc data, we might need to make a patch @@ -657,6 +671,24 @@ async def model_engine(body: PlanTriggerRequest): ) ) + # If we have: + # 1) No EPC API data + # 2) A real EPC + # 3) A UPRN (meaning that a UPRN could be fetched against that property) + # We store this data + + if db_funcs.epc_functions.EpcStoreService.check_insert_needed( + epc_cache, epc_searcher.newest_epc.get("estimated"), epc_searcher.uprn + ): + # We store the EPC data we have found for this property + db_funcs.epc_functions.EpcStoreService.upsert_epc_data( + session=session, + uprn=epc_searcher.uprn, + epc_api=epc_searcher.data, + epc_page=epc_page_source.get("page_source"), + epc_page_rrn=epc_page_source.get("rrn"), + ) + if not input_properties: return Response(status_code=204) diff --git a/etl/find_my_epc/RetrieveFindMyEpc.py b/etl/find_my_epc/RetrieveFindMyEpc.py index c57f9ca8..ae9e5ff7 100644 --- a/etl/find_my_epc/RetrieveFindMyEpc.py +++ b/etl/find_my_epc/RetrieveFindMyEpc.py @@ -371,9 +371,12 @@ class RetrieveFindMyEpc: return all_find_my_epc_data - def retrieve_newest_find_my_epc_data(self, sap_2012_date=None, return_page=False): + def _find_epc_page(self): """ - For a post code and address, we pull out all the required data from the find my epc website + This function is used to find the EPC page source for a given address and postcode. + It is done by fetching the page, associating to the postcode and then matching the + addresses on the page to the address we have been given. + :return: """ postcode_input = self.postcode.replace(" ", "+") @@ -428,8 +431,22 @@ class RetrieveFindMyEpc: chosen_epc = self.BASE_ENERGY_URL + extracted_table[0]['extracted_address_url'] epc_certificate = chosen_epc.split('/')[-1] - address_response = requests.get(chosen_epc, headers=self.HEADERS) - address_res = BeautifulSoup(address_response.text, features="html.parser") + return chosen_epc, epc_certificate + + def retrieve_newest_find_my_epc_data(self, sap_2012_date=None, return_page=False, epc_page_source=None, rrn=None): + """ + For a post code and address, we pull out all the required data from the find my epc website + """ + + if epc_page_source is None: + chosen_epc, rrn = self._find_epc_page() + address_response = requests.get(chosen_epc, headers=self.HEADERS) + epc_page_source = address_response.text + address_res = BeautifulSoup(address_response.text, features="html.parser") + else: + if rrn is None: + raise ValueError("rrn must be provided if epc_page_source is provided") + address_res = BeautifulSoup(epc_page_source, features="html.parser") # Key data we want to retrieve: # 1) Rating @@ -565,7 +582,7 @@ class RetrieveFindMyEpc: epc_data = self.extract_epc_data(address_res) resulting_data = { - 'epc_certificate': epc_certificate, + 'epc_certificate': rrn, 'current_epc_rating': current_rating.split(' ')[-6], 'current_epc_efficiency': current_sap, 'potential_epc_rating': potential_rating.split(' ')[-6], @@ -576,11 +593,12 @@ class RetrieveFindMyEpc: "epc_data": epc_data, **assessment_data, **low_carbon_energy_sources, + "page_source": epc_page_source, } if return_page: # We return the page text as well, which can be parsed again later - return resulting_data, postcode_response.text + return resulting_data, epc_page_source return resulting_data @@ -722,11 +740,15 @@ class RetrieveFindMyEpc: return formatted_recommendations @classmethod - def get_from_epc(cls, epc): + def get_from_epc(cls, epc, epc_page_source=None, rrn=None): + + if epc_page_source is not None and rrn is None: + raise ValueError("rrn must be provided if epc_page_source is provided") + # Attempt both methods: try: searcher = cls(address=epc["address"], postcode=epc["postcode"]) - find_epc_data = searcher.retrieve_newest_find_my_epc_data() + find_epc_data = searcher.retrieve_newest_find_my_epc_data(epc_page_source=epc_page_source, rrn=rrn) except Exception as e: logger.error(f"Error retrieving find my epc data: {e}") @@ -734,7 +756,7 @@ class RetrieveFindMyEpc: address1 = ",".join(epc["address"].split(",")[:-1]) try: searcher = cls(address=address1, postcode=epc["postcode"]) - find_epc_data = searcher.retrieve_newest_find_my_epc_data() + find_epc_data = searcher.retrieve_newest_find_my_epc_data(epc_page_source=epc_page_source, rrn=rrn) logger.info("Successfully retrieved find my epc data using trimmed address") except Exception as e2: logger.error(f"Error retrieving find my epc data using trimmed address: {e2}") @@ -747,7 +769,7 @@ class RetrieveFindMyEpc: address1 = epc["address1"] # We attempt with the backup add searcher = cls(address=address1, postcode=epc["postcode"]) - find_epc_data = searcher.retrieve_newest_find_my_epc_data() + find_epc_data = searcher.retrieve_newest_find_my_epc_data(epc_page_source=epc_page_source, rrn=rrn) logger.info("Successfully retrieved find my epc data using backup address") non_invasive_recommendations = { @@ -766,4 +788,9 @@ class RetrieveFindMyEpc: **find_epc_data["epc_data"], } - return non_invasive_recommendations, patch + page_source = { + "rrn": find_epc_data["epc_certificate"], + "page_source": find_epc_data["page_source"] + } + + return non_invasive_recommendations, patch, page_source From 3d72d967ec6bc51e494d0ae69e9f2f7ee942393d Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 25 Nov 2025 18:28:08 +0000 Subject: [PATCH 074/202] simplifying db funtions --- backend/engine/engine.py | 27 +++++++++++---------------- 1 file changed, 11 insertions(+), 16 deletions(-) diff --git a/backend/engine/engine.py b/backend/engine/engine.py index ee415593..07a32be0 100644 --- a/backend/engine/engine.py +++ b/backend/engine/engine.py @@ -14,8 +14,6 @@ from starlette.responses import Response from backend.app.config import get_settings, get_prediction_buckets from backend.app.db.connection import db_engine -from backend.app.db.functions.materials_functions import get_materials -from backend.app.db.functions.portfolio_functions import aggregate_portfolio_recommendations from backend.app.db.functions.property_functions import ( create_property_details_epc, create_property_targets, update_property_data, update_or_create_property_spatial_details, ensure_property_exists @@ -23,8 +21,6 @@ from backend.app.db.functions.property_functions import ( from backend.app.db.functions.recommendations_functions import ( create_plan, upload_recommendations, create_scenario ) -from backend.app.db.functions.funding_functions import upload_funding -from backend.app.db.functions.energy_assessment_functions import get_latest_assessment_by_uprn import backend.app.db.functions as db_funcs from backend.app.db.models.portfolio import rating_lookup @@ -34,9 +30,6 @@ from backend.app.plan.utils import ( ) from backend.app.utils import sap_to_epc import backend.app.assumptions as assumptions -from backend.app.db.functions.inspections_functions import ( - extract_inspection_data, bulk_upsert_inspections_pg -) from backend.ml_models.api import ModelApi from backend.Property import Property @@ -46,8 +39,6 @@ from recommendations.optimiser.CostOptimiser import CostOptimiser from recommendations.optimiser.GainOptimiser import GainOptimiser import recommendations.optimiser.optimiser_functions as optimiser_functions from recommendations.Recommendations import Recommendations -from utils.logger import setup_logger -from utils.s3 import read_dataframe_from_s3_parquet, read_csv_from_s3, read_excel_from_s3 from backend.ml_models.Valuation import PropertyValuation from etl.bill_savings.KwhData import KwhData @@ -58,6 +49,9 @@ from backend.Funding import Funding from recommendations.optimiser.funding_optimiser import optimise_with_funding_paths from recommendations.recommendation_utils import convert_thickness_to_numeric, get_wall_u_value +from utils.logger import setup_logger +from utils.s3 import read_dataframe_from_s3_parquet, read_csv_from_s3, read_excel_from_s3 + logger = setup_logger() BATCH_SIZE = 5 @@ -575,7 +569,9 @@ async def model_engine(body: PlanTriggerRequest): epc_searcher.newest_epc["uprn-source"] = epc_searcher.UPRN_SOURCE_SIMULATED # We check for an energy assessment we have performed on this property: - energy_assessment = get_latest_assessment_by_uprn(session, uprn if uprn is not None else epc_searcher.uprn) + energy_assessment = db_funcs.energy_assessment_functions.get_latest_assessment_by_uprn( + session, uprn if uprn is not None else epc_searcher.uprn + ) property_id, is_new = ensure_property_exists( session, body, epc_searcher, energy_assessment, landlord_property_id=config.get("landlord_property_id") @@ -651,7 +647,7 @@ async def model_engine(body: PlanTriggerRequest): eco_packages[property_id] = parse_eco_packages(config, prepared_epc) # Final step - extract inspections data, if we have it - we inject into property for usage - property_inspections = extract_inspection_data(config) + property_inspections = db_funcs.inspections_functions.extract_inspection_data(config) if property_inspections: inspections_map[property_id] = property_inspections @@ -696,7 +692,7 @@ async def model_engine(body: PlanTriggerRequest): # aginst each property if if inspections_map: logger.info("Inserting inspections data") - bulk_upsert_inspections_pg(session, inspections_map) + db_funcs.inspections_functions.bulk_upsert_inspections_pg(session, inspections_map) # Set up model api and warm up the lambdas model_api = ModelApi( @@ -713,7 +709,7 @@ async def model_engine(body: PlanTriggerRequest): # consistent requests to the backend for # the same data logger.info("Reading in materials and cleaned datasets") - materials = get_materials(session) + materials = db_funcs.materials_functions.get_materials(session) cleaned = get_cleaned() project_scores_matrix, partial_project_scores_matrix, whlg_eligible_postcodes = get_funding_data() @@ -1221,8 +1217,7 @@ async def model_engine(body: PlanTriggerRequest): upload_recommendations( session, recommendations_to_upload, p.id, new_plan_id ) - - upload_funding(session, p, new_plan_id, recommendations_to_upload) + db_funcs.funding_functions.upload_funding(session, p, new_plan_id, recommendations_to_upload) if valuations["current_value"] > 0: property_valuation_increases.append( @@ -1261,7 +1256,7 @@ async def model_engine(body: PlanTriggerRequest): property_value_increase_ranges=property_value_increase_ranges ) - aggregate_portfolio_recommendations( + db_funcs.portfolio_functions.aggregate_portfolio_recommendations( session, portfolio_id=body.portfolio_id, scenario_id=scenario_id, From caeab2bf82cff6c9f161882a3ff95ad22a14e755 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 26 Nov 2025 08:42:32 +0000 Subject: [PATCH 075/202] tidy up imports --- backend/engine/engine.py | 30 ++++++++++++------------------ 1 file changed, 12 insertions(+), 18 deletions(-) diff --git a/backend/engine/engine.py b/backend/engine/engine.py index 07a32be0..a7743b90 100644 --- a/backend/engine/engine.py +++ b/backend/engine/engine.py @@ -6,21 +6,16 @@ from tqdm import tqdm import pandas as pd import numpy as np -from etl.epc.Record import EPCRecord +from backend.Funding import Funding from backend.SearchEpc import SearchEpc + +from etl.epc.Record import EPCRecord from sqlalchemy.exc import IntegrityError, OperationalError from sqlalchemy.orm import sessionmaker from starlette.responses import Response from backend.app.config import get_settings, get_prediction_buckets from backend.app.db.connection import db_engine -from backend.app.db.functions.property_functions import ( - create_property_details_epc, create_property_targets, update_property_data, - update_or_create_property_spatial_details, ensure_property_exists -) -from backend.app.db.functions.recommendations_functions import ( - create_plan, upload_recommendations, create_scenario -) import backend.app.db.functions as db_funcs from backend.app.db.models.portfolio import rating_lookup @@ -45,7 +40,6 @@ from etl.bill_savings.KwhData import KwhData from etl.spatial.OpenUprnClient import OpenUprnClient from etl.find_my_epc.RetrieveFindMyEpc import RetrieveFindMyEpc -from backend.Funding import Funding from recommendations.optimiser.funding_optimiser import optimise_with_funding_paths from recommendations.recommendation_utils import convert_thickness_to_numeric, get_wall_u_value @@ -573,7 +567,7 @@ async def model_engine(body: PlanTriggerRequest): session, uprn if uprn is not None else epc_searcher.uprn ) - property_id, is_new = ensure_property_exists( + property_id, is_new = db_funcs.property_functions.ensure_property_exists( session, body, epc_searcher, energy_assessment, landlord_property_id=config.get("landlord_property_id") ) if not property_id: @@ -589,7 +583,7 @@ async def model_engine(body: PlanTriggerRequest): ) if is_new: - create_property_targets( + db_funcs.property_functions.create_property_targets( session, property_id=property_id, portfolio_id=body.portfolio_id, @@ -1134,7 +1128,7 @@ async def model_engine(body: PlanTriggerRequest): # We don't need to create a new scenario, we just use the existing one scenario_id = body.scenario_id else: - engine_scenario = create_scenario( + engine_scenario = db_funcs.recommendations_functions.create_scenario( session=session, scenario={ "name": body.scenario_name, @@ -1183,20 +1177,20 @@ async def model_engine(body: PlanTriggerRequest): property_details_epc = p.get_property_details_epc( portfolio_id=body.portfolio_id, rating_lookup=rating_lookup, ) - create_property_details_epc(session, property_details_epc) + db_funcs.property_functions.create_property_details_epc(session, property_details_epc) - update_or_create_property_spatial_details(session, p.uprn, p.spatial) + db_funcs.property_functions.update_or_create_property_spatial_details(session, p.uprn, + p.spatial) property_data = p.get_full_property_data(current_valuation=valuations["current_value"]) - update_property_data( + db_funcs.property_functions.update_property_data( session, property_id=p.id, portfolio_id=body.portfolio_id, property_data=property_data ) if not recommendations_to_upload: continue - - new_plan_id = create_plan(session, { + new_plan_id = db_funcs.recommendations_functions.create_plan(session, { "portfolio_id": body.portfolio_id, "property_id": p.id, "scenario_id": scenario_id, @@ -1214,7 +1208,7 @@ async def model_engine(body: PlanTriggerRequest): "plan_type": eco_packages.get(p.id, (None, None, None))[2] }) - upload_recommendations( + db_funcs.recommendations_functions.upload_recommendations( session, recommendations_to_upload, p.id, new_plan_id ) db_funcs.funding_functions.upload_funding(session, p, new_plan_id, recommendations_to_upload) From 5c8c9251c486a158b582e0a2970191460cc7a018 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 26 Nov 2025 16:40:16 +0000 Subject: [PATCH 076/202] refactoring tasks to get working with sqlalchemy 2 --- asset_list/app.py | 1264 ----------------- backend/app/db/functions/tasks/Tasks.py | 105 +- backend/app/db/models/tasks.py | 65 +- backend/app/plan/router.py | 26 +- backend/app/plan/schemas.py | 4 + .../Nov 2025 Consulting Project/data_prep.py | 186 ++- etl/find_my_epc/RetrieveFindMyEpc.py | 83 +- 7 files changed, 310 insertions(+), 1423 deletions(-) diff --git a/asset_list/app.py b/asset_list/app.py index ec47b07d..cbb2cd93 100644 --- a/asset_list/app.py +++ b/asset_list/app.py @@ -59,1270 +59,6 @@ def app(): Property UPRN """ - # PFP - data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/Nov 2025 Inspections" - data_filename = "Inspections List - PFP (1).xlsx" - sheet_name = "Sheet1" - postcode_column = 'Postcode' - address1_column = "Address 1" - address1_method = None - fulladdress_column = None - address_cols_to_concat = ["Address 1", "Address 2", "Address 3"] - missing_postcodes_method = None - landlord_year_built = None - landlord_os_uprn = None - landlord_property_type = "Archetype" # Using inspections - landlord_built_form = "Archetype 2" # Using inspections - landlord_wall_construction = None - landlord_roof_construction = None - landlord_heating_system = None - landlord_existing_pv = None - landlord_property_id = "UPRN" - landlord_sap = None - outcomes_filename = None - outcomes_sheetname = None - outcomes_postcode = None - outcomes_houseno = None - outcomes_id = None - outcomes_address = None - master_filepaths = [] - master_id_colnames = [] - master_to_asset_list_filepath = None - phase = False - ecosurv_landlords = None - asset_list_header = 0 - landlord_block_reference = None - - # Stonewater Solar - data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/October 2025 Solar" - data_filename = "Copy of AP Stonewater Ammended address list - PV AM Amended - Khalim initial review.xlsx" - sheet_name = "Proposed Sheet" - postcode_column = 'Postcode' - address1_column = None - address1_method = "house_number_extraction" - fulladdress_column = "Address" - address_cols_to_concat = [] - missing_postcodes_method = None - landlord_year_built = None - landlord_os_uprn = None - landlord_property_type = "Property Type" - landlord_built_form = "Property Type" - landlord_wall_construction = "Walls" - landlord_roof_construction = "Roofs" - landlord_heating_system = "Heating" - landlord_existing_pv = None - landlord_property_id = "Asset Id" - landlord_sap = "SAP" - outcomes_filename = None - outcomes_sheetname = None - outcomes_postcode = None - outcomes_houseno = None - outcomes_id = None - outcomes_address = None - master_filepaths = [] - master_id_colnames = [] - master_to_asset_list_filepath = None - phase = False - ecosurv_landlords = None - asset_list_header = 0 - landlord_block_reference = None - - # - data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Cambridge/" - data_filename = "22.10_Cambridge_west addresses.xlsx" - sheet_name = "Asset List" - postcode_column = 'Postcode' - address1_column = None - address1_method = "house_number_extraction" - fulladdress_column = "Full Address" - address_cols_to_concat = [] - missing_postcodes_method = None - landlord_year_built = None - landlord_os_uprn = None - landlord_property_type = None - landlord_built_form = None - landlord_wall_construction = None - landlord_roof_construction = None - landlord_heating_system = None - landlord_existing_pv = None - landlord_property_id = "id" - landlord_sap = None - outcomes_filename = None - outcomes_sheetname = None - outcomes_postcode = None - outcomes_houseno = None - outcomes_id = None - outcomes_address = None - master_filepaths = [] - master_id_colnames = [] - master_to_asset_list_filepath = None - phase = False - ecosurv_landlords = None - asset_list_header = 0 - landlord_block_reference = None - - # Property Box - data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/NRLA/Property Box" - data_filename = "Property Box Finance Portfolio.xlsx" - sheet_name = "Sheet1" - postcode_column = 'Postcode' - address1_column = None - address1_method = "house_number_extraction" - fulladdress_column = "Address 1" - address_cols_to_concat = [] - missing_postcodes_method = None - landlord_year_built = None - landlord_os_uprn = None - landlord_property_type = None - landlord_built_form = None - landlord_wall_construction = None - landlord_roof_construction = None - landlord_heating_system = None - landlord_existing_pv = None - landlord_property_id = "row_id" - landlord_sap = None - outcomes_filename = None - outcomes_sheetname = None - outcomes_postcode = None - outcomes_houseno = None - outcomes_id = None - outcomes_address = None - master_filepaths = [] - master_id_colnames = [] - master_to_asset_list_filepath = None - phase = False - ecosurv_landlords = None - asset_list_header = 0 - landlord_block_reference = "block_id" - - # CDS - able-to-pay - data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/CDS/Able to pay" - data_filename = "CDS_ASSET LIST_(2314).xlsx" - sheet_name = "Sheet1" - postcode_column = 'Property Address - Postcode' - address1_column = "Property Address - Line 1" - address1_method = None - fulladdress_column = "Property Address - Line 1" - address_cols_to_concat = [] - missing_postcodes_method = None - landlord_year_built = None - landlord_os_uprn = None - landlord_property_type = None - landlord_built_form = None - landlord_wall_construction = None - landlord_roof_construction = None - landlord_heating_system = None - landlord_existing_pv = None - landlord_property_id = "row_id" - landlord_sap = None - outcomes_filename = None - outcomes_sheetname = None - outcomes_postcode = None - outcomes_houseno = None - outcomes_id = None - outcomes_address = None - master_filepaths = [] - master_id_colnames = [] - master_to_asset_list_filepath = None - phase = False - ecosurv_landlords = None - asset_list_header = 0 - landlord_block_reference = None - - # Hyde - solar - data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Hyde/Solar" - data_filename = "Domna Property Analysis HYDE (Chichester Removed)V2-Completed.xlsx" - sheet_name = "Electric Property Inspections" - postcode_column = 'Postcode' - address1_column = None # Is only patchily populated so we create it - address1_method = 'house_number_extraction' - fulladdress_column = "Address" - address_cols_to_concat = [] - missing_postcodes_method = None - landlord_year_built = None - landlord_os_uprn = None - landlord_property_type = "Property Type" - landlord_built_form = "Property Type" - landlord_wall_construction = "Walls " - landlord_roof_construction = "Roofs" - landlord_heating_system = "Heating" - landlord_existing_pv = None - landlord_property_id = "Address ID" - landlord_sap = "SAP" - outcomes_filename = None - outcomes_sheetname = None - outcomes_postcode = None - outcomes_houseno = None - outcomes_id = None - outcomes_address = None - master_filepaths = [] - master_id_colnames = [] - master_to_asset_list_filepath = None - phase = False - ecosurv_landlords = None - asset_list_header = 0 - landlord_block_reference = None - - # Hyde cavity - data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Hyde/Cavity" - data_filename = "Domna Property Analysis HYDE (Chichester Removed)V2-Completed.xlsx" - sheet_name = "Cavity Inspections" - postcode_column = 'Postcode' - address1_column = None # Is only patchily populated so we create it - address1_method = 'house_number_extraction' - fulladdress_column = "Address" - address_cols_to_concat = [] - missing_postcodes_method = None - landlord_year_built = None - landlord_os_uprn = None - landlord_property_type = "Property Type" - landlord_built_form = "Property Type" - landlord_wall_construction = "Walls " - landlord_roof_construction = "Roofs" - landlord_heating_system = "Heating" - landlord_existing_pv = None - landlord_property_id = "Address ID" - landlord_sap = "SAP" - outcomes_filename = None - outcomes_sheetname = None - outcomes_postcode = None - outcomes_houseno = None - outcomes_id = None - outcomes_address = None - master_filepaths = [] - master_id_colnames = [] - master_to_asset_list_filepath = None - phase = False - ecosurv_landlords = None - asset_list_header = 0 - landlord_block_reference = None - - # CDS - Sept 2025 - data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/CDS/September 2025 Programme" - data_filename = "Founder Estates CDS.xlsx" - sheet_name = "Combined List" - postcode_column = 'Postcode' - address1_column = None # Is only patchily populated so we create it - address1_method = 'house_number_extraction' - fulladdress_column = "Address" - address_cols_to_concat = [] - missing_postcodes_method = None - landlord_year_built = None - landlord_os_uprn = None - landlord_property_type = "Property Type" - landlord_built_form = None - landlord_wall_construction = None - landlord_roof_construction = None - landlord_heating_system = "Heating Type" - landlord_existing_pv = None - landlord_property_id = "(Do Not Modify) Property" - landlord_sap = None - outcomes_filename = None - outcomes_sheetname = None - outcomes_postcode = None - outcomes_houseno = None - outcomes_id = None - outcomes_address = None - master_filepaths = [] - master_id_colnames = [] - master_to_asset_list_filepath = None - phase = False - ecosurv_landlords = None - asset_list_header = 0 - landlord_block_reference = None - - # Project from Nick - data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/sfr/October 2025 AL portfolio" - data_filename = "22.10 AL Portfolio.xlsx" - sheet_name = "22.10 AL Portfolio" - postcode_column = 'Postcode' - address1_column = None - address1_method = 'house_number_extraction' - fulladdress_column = "Address" - address_cols_to_concat = [] - missing_postcodes_method = None - landlord_year_built = None - landlord_os_uprn = None - landlord_property_type = None - landlord_built_form = None - landlord_wall_construction = None - landlord_roof_construction = None - landlord_heating_system = None - landlord_existing_pv = None - landlord_property_id = "Row ID" - landlord_sap = None - outcomes_filename = None - outcomes_sheetname = None - outcomes_postcode = None - outcomes_houseno = None - outcomes_id = None - outcomes_address = None - master_filepaths = [] - master_id_colnames = [] - master_to_asset_list_filepath = None - phase = False - ecosurv_landlords = None - asset_list_header = 0 - landlord_block_reference = None - - # Lambeth - data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Lambeth" - data_filename = "LAMBETH Asset List ( Incomplete).xlsx" - sheet_name = "Green properties" - postcode_column = 'SX3 Postcode' - address1_column = "SX3 Short Address" - address1_method = None - fulladdress_column = None - address_cols_to_concat = ["SX3 Short Address"] - missing_postcodes_method = None - landlord_year_built = None - landlord_os_uprn = None - landlord_property_type = "Property Type" - landlord_built_form = None - landlord_wall_construction = None - landlord_roof_construction = None - landlord_heating_system = None - landlord_existing_pv = None - landlord_property_id = "row_id" - landlord_sap = None - outcomes_filename = None - outcomes_sheetname = None - outcomes_postcode = None - outcomes_houseno = None - outcomes_id = None - outcomes_address = None - master_filepaths = [] - master_id_colnames = [] - master_to_asset_list_filepath = None - phase = False - ecosurv_landlords = None - asset_list_header = 0 - landlord_block_reference = None - - # # Colchester - # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Colchester/Aug2025 202 inspections" - # data_filename = "Colchester Borough Homes - Inspections - Additional 202 Addresses JW 280725 copy.xlsx" - # sheet_name = "Extra 202 Colchester Addresses" - # postcode_column = 'domna_postcode' - # address1_column = "domna_address_1" - # address1_method = None - # fulladdress_column = "domna_full_address" - # address_cols_to_concat = [] - # missing_postcodes_method = None - # landlord_year_built = None - # landlord_os_uprn = None - # landlord_property_type = "landlord_property_type" - # landlord_built_form = "landlord_built_form" - # landlord_wall_construction = None - # landlord_roof_construction = None - # landlord_heating_system = None - # landlord_existing_pv = None - # landlord_property_id = "landlord_property_id" - # landlord_sap = None - # outcomes_filename = None - # outcomes_sheetname = None - # outcomes_postcode = None - # outcomes_houseno = None - # outcomes_id = None - # outcomes_address = None - # master_filepaths = [] - # master_id_colnames = [] - # master_to_asset_list_filepath = None - # phase = False - # ecosurv_landlords = None - # asset_list_header = 0 - # landlord_block_reference = "landlord_block_reference" - - # # Abri - # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Abri/Post Inspections" - # data_filename = "Desktop ABRI data - Standardised After Programmes (2).xlsx" - # sheet_name = "Reviewed List" - # postcode_column = 'domna_postcode' - # address1_column = "domna_address_1" - # address1_method = None - # fulladdress_column = "domna_full_address" - # address_cols_to_concat = [] - # missing_postcodes_method = None - # landlord_year_built = "landlord_year_built" - # landlord_os_uprn = None - # landlord_property_type = "PropertyType_original_from_landlord" - # landlord_built_form = "BuildForm_original_from_landlord" - # landlord_wall_construction = "Wall Construction_original_from_landlord" - # landlord_roof_construction = None - # landlord_heating_system = "HeatingType_original_from_landlord" - # landlord_existing_pv = None - # landlord_property_id = "landlord_property_id" - # landlord_sap = None - # outcomes_filename = None - # outcomes_sheetname = None - # outcomes_postcode = None - # outcomes_houseno = None - # outcomes_id = None - # outcomes_address = None - # master_filepaths = [] - # master_id_colnames = [] - # master_to_asset_list_filepath = None - # phase = False - # ecosurv_landlords = None - # asset_list_header = 0 - # landlord_block_reference = None - - # Freebridge - data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Freebridge/Aug2025 programme" - data_filename = "Domna - FCH property data May 25 copy.xlsx" - sheet_name = "EPC Data" - postcode_column = 'Post Code' - address1_column = "Address 1" - address1_method = None - fulladdress_column = None - address_cols_to_concat = ["Address 1", "Address 4"] - missing_postcodes_method = None - landlord_year_built = "Build Date" - landlord_os_uprn = None - landlord_property_type = "Property Type" - landlord_built_form = None - landlord_wall_construction = "Walls Description" - landlord_heating_system = "Heating Type" - landlord_existing_pv = None - landlord_property_id = "Place Ref" - landlord_roof_construction = "Roof Description" - landlord_sap = "Current SAP" - outcomes_filename = [] - outcomes_sheetname = [] - outcomes_postcode = [] - outcomes_houseno = [] - outcomes_address = [] - outcomes_id = [] - master_filepaths = [] - master_to_asset_list_filepath = None - asset_list_header = 0 - landlord_block_reference = None - master_id_colnames = [] - phase = False # Inspections not complete, produce a partial view - ecosurv_landlords = None - - # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Broadlands" - # data_filename = "Broadlands Asset List.xlsx" - # sheet_name = "Assets" - # postcode_column = 'POSTCODE' - # fulladdress_column = None - # address1_column = "Address1" - # address1_method = None - # address_cols_to_concat = ["Address1"] - # missing_postcodes_method = None - # landlord_year_built = "DATEBUILT" - # landlord_os_uprn = None - # landlord_property_type = "PropertyType" - # landlord_built_form = "PropertyType" - # landlord_wall_construction = None - # landlord_heating_system = "Heating Fuel" - # landlord_existing_pv = None - # landlord_property_id = "Row ID" - # outcomes_filename = [os.path.join(data_folder, "outcomes.xlsx")] - # outcomes_sheetname = ["Sheet1"] - # outcomes_postcode = ["Postcode"] - # outcomes_houseno = ["No."] - # outcomes_address = ["Address"] - # outcomes_id = [None] - # master_filepaths = [ - # os.path.join(data_folder, "eco3 submissions.csv"), - # os.path.join(data_folder, "eco4 submissions.csv"), - # ] - # master_to_asset_list_filepath = None - # asset_list_header = 0 - # landlord_block_reference = None - # master_id_colnames = [None, None] - # landlord_roof_construction = None - # phase = False - # landlord_sap = None - # ecosurv_landlords = "broadland" - # # - # - # # Community: - # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Community Housing/New Programme" - # data_filename = "SUB EPC C to DOMNA - 24.07.25.xlsx" - # sheet_name = "Sheet1" - # postcode_column = 'POSTCODE' - # fulladdress_column = "ADDRESS" - # address1_column = None - # address1_method = "house_number_extraction" - # address_cols_to_concat = [] - # missing_postcodes_method = None - # landlord_year_built = "BUILD DATE" - # landlord_os_uprn = None - # landlord_property_type = "PROPERTY TYPE" - # landlord_built_form = "Archetype" # Using the inspections archetype - # landlord_wall_construction = "CONSTRUCTION TYPE" - # landlord_roof_construction = None - # landlord_heating_system = None - # landlord_existing_pv = None - # landlord_property_id = "UPRN" - # landlord_sap = None - # outcomes_filename = [] - # outcomes_sheetname = [] - # outcomes_postcode = [] - # outcomes_houseno = [] - # outcomes_id = [] - # outcomes_address = [] - # master_filepaths = [] - # master_to_asset_list_filepath = None - # phase = False - # ecosurv_landlords = None - # asset_list_header = 1 - # landlord_block_reference = None - # master_id_colnames = [] - # - # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing/Programme Analysis" - # data_filename = "EalingProjectRebuildJW210725.xlsx" - # sheet_name = "Refine & Houses" - # postcode_column = 'Postcode' - # fulladdress_column = "Address" - # address1_column = None - # address1_method = "house_number_extraction" - # address_cols_to_concat = [] - # missing_postcodes_method = None - # landlord_year_built = None - # landlord_os_uprn = None - # landlord_property_type = None # Using the inspections property type - # landlord_built_form = None - # landlord_wall_construction = None - # landlord_roof_construction = None - # landlord_heating_system = None - # landlord_existing_pv = None - # landlord_property_id = "Property ref" - # landlord_sap = None - # outcomes_filename = [] - # outcomes_sheetname = [] - # outcomes_postcode = [] - # outcomes_houseno = [] - # outcomes_id = [] - # outcomes_address = [] - # master_filepaths = [] - # master_to_asset_list_filepath = None - # phase = False - # ecosurv_landlords = None - # asset_list_header = 0 - # landlord_block_reference = "Block Reference" - # master_id_colnames = [] - # - # # TODO: Delete me - # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/NRLA/" - # data_filename = "20250716 Asset List.xlsx" - # sheet_name = "Sheet 1" - # postcode_column = 'Postcode' - # fulladdress_column = "Full Address" - # address1_column = None - # address1_method = "house_number_extraction" - # address_cols_to_concat = [] - # missing_postcodes_method = None - # landlord_year_built = None - # landlord_os_uprn = None - # landlord_property_type = None - # landlord_built_form = None - # landlord_wall_construction = None - # landlord_heating_system = None - # landlord_existing_pv = None - # landlord_property_id = "Row ID" - # outcomes_filename = [] - # outcomes_sheetname = [] - # outcomes_postcode = [] - # outcomes_houseno = [] - # outcomes_address = [] - # outcomes_id = [] - # master_filepaths = [] - # master_to_asset_list_filepath = None - # asset_list_header = 0 - # landlord_block_reference = None - # master_id_colnames = [] - # landlord_roof_construction = None - # phase = False - # landlord_sap = None - # ecosurv_landlords = None - # - # # Southend - # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Southend/July 2025 Programme" - # data_filename = "SOUTHEND - RYAN.xlsx" - # sheet_name = "July 2025 Surveys" - # postcode_column = 'Postcode' - # fulladdress_column = "Full postal address" - # address1_column = None - # address1_method = "house_number_extraction" - # address_cols_to_concat = [] - # missing_postcodes_method = None - # landlord_year_built = "Property age" - # landlord_os_uprn = None - # landlord_property_type = "Property type" - # landlord_built_form = "Property type" - # landlord_wall_construction = None - # landlord_heating_system = None - # landlord_existing_pv = None - # landlord_property_id = "ID" - # outcomes_filename = [] - # outcomes_sheetname = [] - # outcomes_postcode = [] - # outcomes_houseno = [] - # outcomes_address = [] - # outcomes_id = [] - # master_filepaths = [] - # master_to_asset_list_filepath = None - # asset_list_header = 0 - # landlord_block_reference = None - # master_id_colnames = [] - # landlord_roof_construction = None - # phase = False - # landlord_sap = None - # ecosurv_landlords = None - # - # # For Rooftop - # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Rooftop" - # data_filename = "Rooftop Asset List - July 2025.xlsx" - # sheet_name = "Sheet1" - # postcode_column = 'post_code' - # fulladdress_column = None - # address1_column = "add_1" - # address1_method = None - # address_cols_to_concat = [ - # "add_1", "add_2", "add_3", "add_4" - # ] - # missing_postcodes_method = None - # landlord_year_built = "date_built" - # landlord_os_uprn = None - # landlord_property_type = "ConstructionStyle" - # landlord_built_form = "ConstructionStyle" - # landlord_wall_construction = None - # landlord_heating_system = "Description" - # landlord_existing_pv = None - # landlord_property_id = "PropertyCode" - # outcomes_filename = [os.path.join(data_folder, "Rooftop_Outcomes.xlsx")] - # outcomes_sheetname = ["OUTCOMES"] - # outcomes_postcode = ["POSTCODE"] - # outcomes_houseno = ["NO"] - # outcomes_address = ["ADDRESS"] - # outcomes_id = [None] - # master_filepaths = [os.path.join(data_folder, "Master.csv")] - # master_to_asset_list_filepath = None - # asset_list_header = 1 - # landlord_block_reference = "bl_rec_ref" - # master_id_colnames = [None] - # landlord_roof_construction = None - # phase = False - # landlord_sap = None - # ecosurv_landlords = "rooftop" - # - # # For Housing - # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/For Housing/New Programme July 2025" - # data_filename = "FOR HOUSING Asset List (Combined).xlsx" - # sheet_name = "Asset List" - # postcode_column = 'Postcode' - # fulladdress_column = "Address" - # address1_column = None - # address1_method = "house_number_extraction" - # address_cols_to_concat = [] - # missing_postcodes_method = None - # landlord_year_built = None - # landlord_os_uprn = None - # landlord_property_type = "Type" - # landlord_built_form = "Type" - # landlord_wall_construction = None - # landlord_heating_system = "Heating - full" - # landlord_existing_pv = None - # landlord_property_id = "UPRN" - # outcomes_filename = [os.path.join(data_folder, "Khalim Combined - for analysis.xlsx")] - # outcomes_sheetname = ["Sheet1"] - # outcomes_postcode = ["POSTCODE"] - # outcomes_houseno = ["NO"] - # outcomes_address = ["ADDRESS"] - # outcomes_id = [None] - # master_filepaths = [os.path.join(data_folder, "submissions.csv")] - # master_to_asset_list_filepath = None - # asset_list_header = 0 - # landlord_block_reference = None - # master_id_colnames = [None] - # landlord_roof_construction = None - # phase = False - # landlord_sap = "SAP" - # ecosurv_landlords = "for housing" - # - # # CDS - # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/CDS" - # data_filename = "Founder Estates - Asset List.xlsx" - # sheet_name = "Combined" - # postcode_column = 'Postcode' - # fulladdress_column = "Address" - # address1_column = None - # address1_method = "house_number_extraction" - # address_cols_to_concat = [] - # missing_postcodes_method = None - # landlord_year_built = None - # landlord_os_uprn = None - # landlord_property_type = None - # landlord_built_form = None - # landlord_wall_construction = None - # landlord_heating_system = "Heating Type" - # landlord_existing_pv = None - # landlord_property_id = "Row ID" - # outcomes_filename = [] - # outcomes_sheetname = [] - # outcomes_postcode = [] - # outcomes_houseno = [] - # outcomes_address = [] - # outcomes_id = [] - # master_filepaths = [os.path.join(data_folder, "submissions.csv")] - # master_to_asset_list_filepath = None - # asset_list_header = 0 - # landlord_block_reference = None - # master_id_colnames = [None] - # landlord_roof_construction = None - # phase = False - # landlord_sap = None - # ecosurv_landlords = "cds" - # - # # Plus Dane - # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Plus Dane/New Programme July 2025/" - # data_filename = "20250711 Plus Dane Asset List.xlsx" - # sheet_name = "Sheet1" - # postcode_column = 'Postcode' - # fulladdress_column = "Address" - # address1_column = None - # address1_method = "house_number_extraction" - # address_cols_to_concat = [] - # missing_postcodes_method = None - # landlord_year_built = "Property Age" - # landlord_os_uprn = None - # landlord_property_type = "Property Type" - # landlord_built_form = "Built Form" - # landlord_wall_construction = "Wall Construction" - # landlord_heating_system = "Full Heating System" - # landlord_existing_pv = None - # landlord_property_id = "UPRN" - # outcomes_filename = [ - # os.path.join(data_folder, "Outcomes - Plus Dane_CWI_2024.xlsx"), - # os.path.join(data_folder, "Outcomes - Plus Dane_CWI_2025.xlsx"), - # os.path.join(data_folder, "Outcomes - Plus Dane_PV_2025.xlsx"), - # ] - # outcomes_sheetname = [ - # "CWI & LI - 2024", "2025 - CWI", "PV - 2025", - # ] - # outcomes_postcode = ["Postcode", "Postcode", "Postcode"] - # outcomes_houseno = ["No.", "No", "No"] - # outcomes_address = ["Address", "Address", "Address"] - # outcomes_id = ["Asset Reference", "LL UPRN", "LL UPRN"] - # master_filepaths = [ - # os.path.join(data_folder, "submissions/JJC-Table 1.csv"), - # os.path.join(data_folder, "submissions/SCIS-Table 1.csv") - # ] - # master_to_asset_list_filepath = None - # asset_list_header = 1 - # landlord_block_reference = None - # master_id_colnames = [None, None] - # landlord_roof_construction = None - # phase = False - # landlord_sap = "SAP Rating" - # ecosurv_landlords = "plus dane" - - # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Brentwood/July 2025 New Programme" - # data_filename = "20250710 Asset List Brentwood.xlsx" - # sheet_name = "Sheet1" - # postcode_column = 'Postcode' - # fulladdress_column = None - # address1_column = "House Number" - # address1_method = None - # address_cols_to_concat = ["House Number", "Address Line 1", "Address Line 2", "Address Line 3"] - # missing_postcodes_method = None - # landlord_year_built = "Year Built" - # landlord_os_uprn = None - # landlord_property_type = "Dwelling" - # landlord_built_form = None - # landlord_wall_construction = None - # landlord_heating_system = "Heating" - # landlord_existing_pv = None - # landlord_property_id = "UPRN" - # outcomes_filename = [os.path.join(data_folder, "Brentwood - outcomes for analysis.xlsx")] - # outcomes_sheetname = ["OUTCOMES"] - # outcomes_postcode = ["POSTCODE"] - # outcomes_houseno = [None] - # outcomes_address = ["ADDRESS"] - # outcomes_id = [None] - # master_filepaths = [os.path.join(data_folder, "Submissions.csv")] - # master_to_asset_list_filepath = None - # asset_list_header = 1 - # landlord_block_reference = None - # master_id_colnames = [None] - # landlord_roof_construction = None - # phase = False - # landlord_sap = None - # ecosurv_landlords = "brentwood" - - # Brentwood - # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Brentwood/July 2025 New Programme" - # data_filename = "20250710 Asset List Brentwood.xlsx" - # sheet_name = "Sheet1" - # postcode_column = 'Postcode' - # fulladdress_column = None - # address1_column = "House Number" - # address1_method = None - # address_cols_to_concat = ["House Number", "Address Line 1", "Address Line 2", "Address Line 3"] - # missing_postcodes_method = None - # landlord_year_built = "Year Built" - # landlord_os_uprn = None - # landlord_property_type = "Dwelling" - # landlord_built_form = None - # landlord_wall_construction = None - # landlord_heating_system = "Heating" - # landlord_existing_pv = None - # landlord_property_id = "UPRN" - # outcomes_filename = [os.path.join(data_folder, "Brentwood - outcomes for analysis.xlsx")] - # outcomes_sheetname = ["OUTCOMES"] - # outcomes_postcode = ["POSTCODE"] - # outcomes_houseno = [None] - # outcomes_address = ["ADDRESS"] - # outcomes_id = [None] - # master_filepaths = [os.path.join(data_folder, "Submissions.csv")] - # master_to_asset_list_filepath = None - # asset_list_header = 1 - # landlord_block_reference = None - # master_id_colnames = [None] - # landlord_roof_construction = None - # phase = False - # landlord_sap = None - # ecosurv_landlords = "brentwood" - # - # # Eastlight - # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Eastlight/New Programme" - # data_filename = "INSPECTIONS MASTER Non Tech.xlsx" - # sheet_name = "EASTLIGHT CW" - # postcode_column = 'Postcode' - # fulladdress_column = None - # address1_column = "HouseName" - # address1_method = None - # address_cols_to_concat = ["HouseName", "Block", "Address1", "Address2", "Address3"] - # missing_postcodes_method = None - # landlord_year_built = "Built In Year" - # landlord_os_uprn = None - # landlord_property_type = "AssetType" - # landlord_built_form = "Archetype" # Using inspections archetype - # landlord_wall_construction = None - # landlord_roof_construction = None - # landlord_heating_system = "Main Heating Source" - # landlord_existing_pv = None - # landlord_property_id = "UPRN" - # landlord_sap = "SAP Score" - # outcomes_filename = [ - # os.path.join(data_folder, "Eastlight_CWI_JJC_2025.xlsx"), - # os.path.join(data_folder, "Eastlight_CWI_SCIS_2025.xlsx"), - # ] - # outcomes_sheetname = ["Outcomes", "Feedback"] - # outcomes_postcode = ["Postcode", "Postcode"] - # outcomes_houseno = ["No", "No."] - # outcomes_id = [None, None] - # outcomes_address = ["Address", "Address"] - # master_filepaths = [ - # os.path.join(data_folder, "ECO 3-Table 1.csv"), - # os.path.join(data_folder, "ECO 4-Table 1.csv"), - # ] - # master_to_asset_list_filepath = None - # phase = False - # ecosurv_landlords = "eastlight" - # asset_list_header = 0 - # landlord_block_reference = None - # master_id_colnames = [None, None] - # landlord_sap = None - - # Pickering and Ferens - # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Pickering & Ferens" - # data_filename = "SAP 9 vs SAP 10 Sava Intelligent Energy - Property List (190625).xlsx" - # sheet_name = "Sava Intelligent Energy - Prope" - # postcode_column = 'Postcode' - # fulladdress_column = 'Address' - # address1_column = None - # address1_method = "house_number_extraction" - # address_cols_to_concat = [] - # missing_postcodes_method = None - # landlord_year_built = None - # landlord_os_uprn = None - # landlord_property_type = "Property Type" # Using the inspections property type - # landlord_built_form = "Archetype 2" - # landlord_wall_construction = None - # landlord_roof_construction = None - # landlord_heating_system = None - # landlord_existing_pv = None - # landlord_property_id = "UPRN" - # landlord_sap = "SAP Rating (RdSAP 10)" - # outcomes_filename = [] - # outcomes_sheetname = [] - # outcomes_postcode = [] - # outcomes_houseno = [] - # outcomes_id = [] - # outcomes_address = [] - # master_filepaths = [ - # os.path.join(data_folder, "PICKERING & FERENS ROLLING MASTER SHEET HEDGEFUND - 26.7.24 - K.csv"), - # os.path.join(data_folder, "PICKERING & FERENS NEW MASTER GBIS UPDATED 21.8.24 - M - For Analysis.csv"), - # ] - # master_to_asset_list_filepath = None - # phase = False - # ecosurv_landlords = "pickering" - # asset_list_header = 0 - # landlord_block_reference = None - # master_id_colnames = [None, None] - - # Colchester - # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Colchester" - # data_filename = "Warmfront data- Colchester Borough Homes (Complete).xlsx" - # sheet_name = "Sheet1" - # postcode_column = 'Full Address.1' - # fulladdress_column = "Full Address" - # address1_column = None - # address1_method = "first_word" - # address_cols_to_concat = [] - # missing_postcodes_method = None - # landlord_year_built = "Build Date" - # landlord_os_uprn = None - # landlord_property_type = "Property Type" - # landlord_wall_construction = "Wallinsul" - # landlord_heating_system = "HeatSorc" - # landlord_existing_pv = None - # landlord_property_id = "Property Reference" - # outcomes_filename = [] - # outcomes_sheetname = [] - # outcomes_postcode = [] - # outcomes_houseno = [] - # outcomes_id = [] - # outcomes_address = [] - # master_filepaths = [] - # master_to_asset_list_filepath = None - # asset_list_header = 0 - # landlord_built_form = None - # landlord_roof_construction = None - # landlord_sap = None - # landlord_block_reference = None - # phase = False - # ecosurv_landlords = None - # master_id_colnames = [] - - # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing/Hubspot" - # data_filename = "EalingFlats.xlsx" - # sheet_name = "Sheet1" - # postcode_column = 'Postcode' - # fulladdress_column = "Address" - # address1_column = None - # address1_method = "house_number_extraction" - # address_cols_to_concat = [] - # missing_postcodes_method = None - # landlord_year_built = None - # landlord_os_uprn = None - # landlord_property_type = None # Using the inspections property type - # landlord_built_form = None - # landlord_wall_construction = None - # landlord_roof_construction = None - # landlord_heating_system = None - # landlord_existing_pv = None - # landlord_property_id = "Property ref" - # landlord_sap = None - # outcomes_filename = [] - # outcomes_sheetname = [] - # outcomes_postcode = [] - # outcomes_houseno = [] - # outcomes_id = [] - # outcomes_address = [] - # master_filepaths = [] - # master_to_asset_list_filepath = None - # phase = False - # ecosurv_landlords = None - # asset_list_header = 0 - # landlord_block_reference = "Block Ref" - # master_id_colnames = [] - - # Southern - Jan list - # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Southern/New Programme/Jan 2025 List" - # data_filename = "SOUTHERN ASSETS January 2025 Additions Query 21.03.2025.xlsx" - # sheet_name = "Jan 2025 additions" - # postcode_column = 'Post Code' - # fulladdress_column = None - # address1_column = "NO." - # address1_method = None - # address_cols_to_concat = ["NO.", "Street / Block Name", "Town/Area"] - # missing_postcodes_method = None - # landlord_year_built = None - # landlord_os_uprn = None - # landlord_property_type = None # Using the inspections property type - # landlord_built_form = None - # landlord_wall_construction = None - # landlord_roof_construction = None - # landlord_heating_system = None - # landlord_existing_pv = None - # landlord_property_id = "SH Property Reference" - # landlord_sap = None - # outcomes_filename = [ - # os.path.join(data_folder, "RT - Southern Housing Group - JJC.xlsx"), - # os.path.join(data_folder, "RT - SOUTHERN OUTCOMES - SCIS Merged.xlsx"), - # ] - # outcomes_sheetname = ["Feedback", "Collated"] - # outcomes_postcode = ["Poscode", "Postcode"] - # outcomes_houseno = ["No.", "No"] - # outcomes_id = ["UPRNs", None] - # outcomes_address = ["Address", "Address"] - # master_filepaths = [ - # os.path.join(data_folder, "southern_submissions/CAVITY'S - DECEMBER 2018-Table 1.csv"), - # os.path.join(data_folder, "southern_submissions/CAVITY'S 2019-Table 1.csv"), - # os.path.join(data_folder, "southern_submissions/CAVITY'S ECO4-Table 1.csv"), - # os.path.join(data_folder, "southern_submissions/LOFT'S-Table 1.csv"), - # ] - # master_to_asset_list_filepath = None - # phase = False - # ecosurv_landlords = "southern" - # asset_list_header = 0 - # landlord_block_reference = None - # master_id_colnames = [None, None, None, None] - - # NCHA - # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/NCHA" - # data_filename = "Energy Information MASTER June 2025.xlsx" - # sheet_name = "Data" - # postcode_column = 'Postcode' - # fulladdress_column = "Address" - # address1_column = None - # address1_method = "house_number_extraction" - # address_cols_to_concat = [] - # missing_postcodes_method = None - # landlord_year_built = "Build Date (HAR10)" - # landlord_os_uprn = None - # landlord_property_type = "Property Type (HAR10)" - # landlord_built_form = "Build Form (EPC)" - # landlord_wall_construction = "Wall Description" - # landlord_roof_construction = None - # landlord_heating_system = "HEAT Code" - # landlord_existing_pv = None - # landlord_property_id = "Place ref" - # landlord_sap = "EPC SAP" - # outcomes_filename = None - # outcomes_sheetname = None - # outcomes_postcode = None - # outcomes_houseno = None - # outcomes_id = None - # outcomes_address = None - # master_filepaths = [] - # master_to_asset_list_filepath = None - # phase = False - # ecosurv_landlords = None - # asset_list_header = 0 - # landlord_block_reference = None - # master_id_colnames = [] - - # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Calico" - # data_filename = "07.04 CALICO - Final List.xlsx" - # asset_list_header = 2 - # sheet_name = "Final List" - # postcode_column = 'Postcode' - # fulladdress_column = None - # address1_column = "Property Number / Name" - # address1_method = None - # address_cols_to_concat = [ - # "Property Number / Name", - # "Street", - # "Town" - # ] - # missing_postcodes_method = None - # landlord_year_built = "NROSH Estimated Build Date" - # landlord_os_uprn = None - # landlord_property_type = "Asset Type" - # landlord_built_form = None - # landlord_wall_construction = "Wall Type" - # landlord_heating_system = "Boiler Type" - # landlord_existing_pv = None - # landlord_property_id = "Asset Reference" - # outcomes_filename = [] - # outcomes_sheetname = [] - # outcomes_postcode = [] - # outcomes_houseno = [] - # outcomes_id = [] - # outcomes_address = [] - # master_filepaths = [] - # master_id_colnames = [] - # master_to_asset_list_filepath = None - # landlord_roof_construction = None - # landlord_block_reference = None - # landlord_sap = "Current Efficiency Rating - Score" - # phase = None - # ecosurv_landlords = None - - # data_folder = ( - # "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Livewest/Programme Update - March 2025/2018 Asset - # List" - # ) - # data_filename = "LIVEWEST STOCK - 23rd October 2018.xlsx" - # sheet_name = "Assets" - # postcode_column = 'Postcode' - # fulladdress_column = "Address" - # address1_column = None - # address1_method = "house_number_extraction" - # address_cols_to_concat = [] - # missing_postcodes_method = None - # landlord_year_built = "Build Year" - # landlord_os_uprn = None - # landlord_property_type = "Property Archetype" - # landlord_built_form = None - # landlord_wall_construction = None - # landlord_heating_system = "Heating Fuel Type" - # landlord_existing_pv = None - # landlord_property_id = "Uprn - DO NOT DELETE" - # outcomes_filename = [ - # os.path.join(data_folder, "RT - LiveWest.xlsx") - # ] - # outcomes_sheetname = ["Feedback"] - # outcomes_postcode = ["Poscode"] - # outcomes_houseno = ["No."] - # outcomes_id = ["UPRN"] - # outcomes_address = ["Address"] - # master_filepaths = [ - # "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Livewest/Programme Update - March 2025/Rolling - # Master " - # "- redacted for analysis/CAVITY-Table 1.csv" - # ] - # master_id_colnames = [None] - # master_to_asset_list_filepath = None - # landlord_roof_construction = None - # landlord_block_reference = None - # landlord_sap = None - # phase = None - # ecosurv_landlords = "livewest|live west" - - # data_folder = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Livewest/Programme Update - March " - # "2025/Livewest Asset List (Original) - csv") - # data_filename = "Report-Table 1.csv" - # sheet_name = None - # postcode_column = 'Postcode' - # fulladdress_column = "T1_Address" - # address1_column = None - # address1_method = "house_number_extraction" - # address_cols_to_concat = [] - # missing_postcodes_method = None - # landlord_year_built = "Build Yr" - # landlord_os_uprn = None - # landlord_property_type = "T1_AssetType" - # landlord_built_form = "T1_AssetType" - # landlord_wall_construction = "Wall Type Cavity" - # landlord_heating_system = "Heating Fuel" - # landlord_existing_pv = None - # landlord_property_id = "T1_UPRN" - # outcomes_filename = [ - # os.path.join(data_folder, "RT - LiveWest.xlsx") - # ] - # outcomes_address = ["Address"] - # outcomes_sheetname = ["Feedback"] - # outcomes_postcode = ["Poscode"] - # outcomes_houseno = ["No."] - # outcomes_id = ["UPRN"] - # master_filepaths = [ - # "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Livewest/Programme Update - March 2025/Rolling - # Master " - # "- redacted for analysis/CAVITY-Table 1.csv" - # ] - # master_id_colnames = [None] - # master_to_asset_list_filepath = None - # landlord_roof_construction = None - # landlord_block_reference = None - # landlord_sap = None - # phase = None - # ecosurv_landlords = "livewest|live west" - - # Stori - # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Storicymru" - # data_filename = "Asset list - for analysis.xlsx" - # sheet_name = "SAP and Costs Calculations" - # postcode_column = 'Postcode' - # fulladdress_column = "Address1" - # address1_column = None - # address1_method = "house_number_extraction" - # address_cols_to_concat = [] - # missing_postcodes_method = None - # landlord_year_built = "Age" - # landlord_os_uprn = None - # landlord_property_type = "TYPE" - # landlord_built_form = "AGE / DETACHMENT" - # landlord_wall_construction = "WALL" - # landlord_roof_construction = "LOFT INSULATION" - # landlord_heating_system = "BOILER" - # landlord_existing_pv = "SOLAR PV" - # landlord_property_id = "UPRN" - # landlord_sap = "Current SAP Rating" - # landlord_block_reference = None - # outcomes_filename = [] - # outcomes_sheetname = [] - # outcomes_postcode = [] - # outcomes_houseno = [] - # outcomes_id = [] - # outcomes_address = [] - # master_filepaths = [] - # master_to_asset_list_filepath = None - # master_id_colnames = [] - # phase = False - # ecosurv_landlords = None - - # Thrive - reconciliation - # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation" - # data_filename = "Thrive Asset List - Complete - Updated May 2025.xlsx" - # sheet_name = "Sheet1" - # postcode_column = 'postcode' - # fulladdress_column = "full_address" - # address1_column = "address_line_1" - # address1_method = None - # address_cols_to_concat = [] - # missing_postcodes_method = None - # landlord_year_built = "age_band_calculated" - # landlord_os_uprn = None - # landlord_property_type = "property_type" - # landlord_built_form = "build_form" - # landlord_wall_construction = None - # landlord_roof_construction = "assumed_loft_insulation_thickness_updated" - # landlord_heating_system = "heating_type_updated" - # landlord_existing_pv = None - # landlord_property_id = "thrive_property_id" - # landlord_sap = "sap_rating_updated" - # landlord_block_reference = "block_reference" - # outcomes_filename = [ - # os.path.join(data_folder, "Thrive - Outcomes - April 24-March25 - Corrected.xlsx") - # ] - # outcomes_sheetname = ["Sheet1"] - # outcomes_postcode = ["postcode"] - # outcomes_houseno = ["No."] - # outcomes_id = ["thrive_property_id"] - # outcomes_address = ["address"] - # master_filepaths = [ - # os.path.join(data_folder, "Thrive Submissions ECO3 - with IDS.csv"), - # os.path.join(data_folder, "Thrive Submissions ECO4 - with IDS.csv"), - # ] - # master_to_asset_list_filepath = None - # master_id_colnames = ["thrive_property_id", "thrive_property_id"] - # phase = False - # ecosurv_landlords = "thrive" - - # Southern Midlands - # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Southern/Midlands Properties - Apr 2025" - # data_filename = "Southern Housing Midlands Property List - combined.xlsx" - # sheet_name = "Sheet 1" - # postcode_column = 'Post Code' - # fulladdress_column = "Address" - # address1_column = None - # address1_method = "house_number_extraction" - # address_cols_to_concat = [] - # missing_postcodes_method = None - # landlord_year_built = "Age_1" - # landlord_os_uprn = None - # landlord_property_type = "Prop_Type" - # landlord_built_form = "Prop_Type" - # landlord_wall_construction = "Walls_P" - # landlord_heating_system = "Heating System" - # landlord_existing_pv = None - # landlord_property_id = "AssetID" - # outcomes_filename = None - # outcomes_sheetname = None - # outcomes_postcode = None - # outcomes_houseno = None - # outcomes_id = None - # outcomes_address = None - # master_filepaths = [] - # master_to_asset_list_filepath = None - # Maps addresses to uprn in problematic cases manual_uprn_map = {} diff --git a/backend/app/db/functions/tasks/Tasks.py b/backend/app/db/functions/tasks/Tasks.py index 06e1c6fe..7508ab2e 100644 --- a/backend/app/db/functions/tasks/Tasks.py +++ b/backend/app/db/functions/tasks/Tasks.py @@ -1,5 +1,3 @@ -from __future__ import annotations - # ---- Standard Library ---- from typing import Optional, Dict, Any from datetime import datetime, timezone @@ -28,7 +26,6 @@ class SubTaskInterface: # CREATE SUBTASK # -------------------------------------------------------- def create_subtask(self, task_id: UUID, inputs: Optional[Dict[str, Any]] = None): - now = datetime.now(timezone.utc) with get_db_session() as session: task = session.get(Task, task_id) @@ -36,11 +33,11 @@ class SubTaskInterface: raise ValueError(f"Task {task_id} not found") subtask = SubTask( - taskId=task_id, + task_id=task_id, inputs=json.dumps(inputs) if inputs else None, status="waiting", - jobStarted=None, - jobCompleted=None, + job_started=None, + job_completed=None, ) session.add(subtask) @@ -49,7 +46,7 @@ class SubTaskInterface: # Recalculate parent task progress self._update_task_progress(session, task_id) - return subtask + return subtask.id # -------------------------------------------------------- # UPDATE STATUS (in progress, complete, failed) @@ -65,21 +62,21 @@ class SubTaskInterface: normalized = status.lower() # When job really starts - if normalized == "in progress" and subtask.jobStarted is None: - subtask.jobStarted = now + if normalized == "in progress" and subtask.job_started is None: + subtask.job_started = now # Completed or failed if normalized in ("complete", "failed"): - subtask.jobCompleted = now + subtask.job_completed = now subtask.status = normalized - subtask.updatedAt = now + subtask.updated_at = now session.add(subtask) session.commit() # Recalculate task status - self._update_task_progress(session, subtask.taskId) + self._update_task_progress(session, subtask.task_id) session.refresh(subtask) return subtask @@ -87,7 +84,8 @@ class SubTaskInterface: # -------------------------------------------------------- # UPDATE OUTPUTS # -------------------------------------------------------- - def update_subtask_output(self, subtask_id: UUID, outputs: Dict[str, Any]): + @staticmethod + def update_subtask_output(subtask_id: UUID, outputs: Dict[str, Any]): now = datetime.now(timezone.utc) with get_db_session() as session: @@ -96,7 +94,7 @@ class SubTaskInterface: raise ValueError(f"SubTask {subtask_id} not found") subtask.outputs = json.dumps(outputs) - subtask.updatedAt = now + subtask.updated_at = now session.add(subtask) session.commit() @@ -106,7 +104,8 @@ class SubTaskInterface: # -------------------------------------------------------- # UPDATE CLOUD LOGS URL # -------------------------------------------------------- - def update_subtask_logs(self, subtask_id: UUID, cloud_logs_url: str): + @staticmethod + def update_subtask_logs(subtask_id: UUID, cloud_logs_url: str): now = datetime.now(timezone.utc) with get_db_session() as session: @@ -114,8 +113,8 @@ class SubTaskInterface: if not subtask: raise ValueError(f"SubTask {subtask_id} not found") - subtask.cloudLogsURL = cloud_logs_url - subtask.updatedAt = now + subtask.cloud_logs_url = cloud_logs_url + subtask.updated_at = now session.add(subtask) session.commit() @@ -125,8 +124,8 @@ class SubTaskInterface: # -------------------------------------------------------- # SET BOTH OUTPUT + LOGS # -------------------------------------------------------- + @staticmethod def set_subtask_result( - self, subtask_id: UUID, outputs: Optional[Dict[str, Any]] = None, cloud_logs_url: Optional[str] = None, @@ -142,9 +141,9 @@ class SubTaskInterface: subtask.outputs = json.dumps(outputs) if cloud_logs_url is not None: - subtask.cloudLogsURL = cloud_logs_url + subtask.cloud_logs_url = cloud_logs_url - subtask.updatedAt = now + subtask.updated_at = now session.add(subtask) session.commit() session.refresh(subtask) @@ -153,13 +152,14 @@ class SubTaskInterface: # -------------------------------------------------------- # TASK PROGRESS CALCULATION # -------------------------------------------------------- - def _update_task_progress(self, session: Session, task_id: UUID): + @staticmethod + def _update_task_progress(session: Session, task_id: UUID): task = session.get(Task, task_id) if not task: return subtasks = session.exec( - select(SubTask).where(SubTask.taskId == task_id) + select(SubTask).where(SubTask.task_id == task_id) ).all() statuses = [s.status.lower() for s in subtasks] @@ -167,24 +167,24 @@ class SubTaskInterface: if "failed" in statuses: task.status = "failed" - task.jobCompleted = now + task.job_completed = now elif all(s == "complete" for s in statuses): task.status = "complete" - task.jobCompleted = now + task.job_completed = now elif "in progress" in statuses: task.status = "in progress" - if task.jobStarted is None: - task.jobStarted = now + if task.job_started is None: + task.job_started = now else: # All waiting task.status = "waiting" - task.jobStarted = None - task.jobCompleted = None + task.job_started = None + task.job_completed = None - task.updatedAt = now + task.updated_at = now session.add(task) session.commit() @@ -212,18 +212,18 @@ class SubTaskInterface: # Set logs if cloud_logs_url is not None: - subtask.cloudLogsURL = cloud_logs_url + subtask.cloud_logs_url = cloud_logs_url # Status + timestamps subtask.status = normalized - subtask.jobCompleted = now - subtask.updatedAt = now + subtask.job_completed = now + subtask.updated_at = now session.add(subtask) session.commit() # Update parent task (complete/failed) - self._update_task_progress(session, subtask.taskId) + self._update_task_progress(session, subtask.task_id) session.refresh(subtask) return subtask @@ -237,38 +237,49 @@ class TasksInterface: High-level operations for Task records. """ + @staticmethod def create_task( - self, - *, task_source: str, service: Optional[str] = None, inputs: Optional[Dict[str, Any]] = None, + task_only: bool = False, ): - now = datetime.now(timezone.utc) - + """ + Create a new Task record, and an initial SubTask in waiting state. Can also be used to create just + a task, without a subtask + :param task_source: Text indicating source of task creation (e.g. file path + function name) + :param service: Optional service name + :param inputs: Inputs of the job being run + :param task_only: If True, only create the Task record, without a SubTask + :return: + """ with get_db_session() as session: task = Task( - taskSource=task_source, + task_source=task_source, service=service, status="waiting", - jobStarted=None, - jobCompleted=None, + job_started=None, + job_completed=None, ) session.add(task) session.commit() session.refresh(task) + if task_only: + return task.id, None + # Create first subtask in waiting state subtask_interface = SubTaskInterface() - subtask = subtask_interface.create_subtask( + subtask_id = subtask_interface.create_subtask( task_id=task.id, inputs=inputs, ) - return task.id, subtask.id + return task.id, subtask_id - def update_task_status(self, task_id: UUID, status: str): + @staticmethod + def update_task_status(task_id: UUID, status: str): now = datetime.now(timezone.utc) with get_db_session() as session: @@ -278,14 +289,14 @@ class TasksInterface: normalized = status.lower() - if normalized == "in progress" and task.jobStarted is None: - task.jobStarted = now + if normalized == "in progress" and task.job_started is None: + task.job_started = now if normalized == "complete": - task.jobCompleted = now + task.job_completed = now task.status = normalized - task.updatedAt = now + task.updated_at = now session.add(task) session.commit() diff --git a/backend/app/db/models/tasks.py b/backend/app/db/models/tasks.py index d8007dcd..cfe18d83 100644 --- a/backend/app/db/models/tasks.py +++ b/backend/app/db/models/tasks.py @@ -1,6 +1,4 @@ -from __future__ import annotations - -from typing import Optional, List +from typing import Optional from datetime import datetime from uuid import UUID, uuid4 @@ -10,64 +8,29 @@ from sqlmodel import SQLModel, Field, Relationship class Task(SQLModel, table=True): __tablename__ = "tasks" - id: UUID = Field( - default_factory=uuid4, - primary_key=True, - index=True, - ) - - taskSource: str = Field(alias="task_source") - - jobStarted: Optional[datetime] = Field( - default=None, alias="job_started" - ) - jobCompleted: Optional[datetime] = Field( - default=None, alias="job_completed" - ) - + id: UUID = Field(default_factory=uuid4, primary_key=True, index=True, ) + task_source: str + job_started: Optional[datetime] = None + job_completed: Optional[datetime] = None status: str = Field(default="In Progress") service: Optional[str] = None + updated_at: datetime = Field(default_factory=datetime.utcnow) - updatedAt: datetime = Field( - default_factory=datetime.utcnow, - alias="updated_at", - ) - - # Relationship - subTasks: List["SubTask"] = Relationship(back_populates="task") + sub_tasks: list["SubTask"] = Relationship(back_populates="task") class SubTask(SQLModel, table=True): __tablename__ = "sub_task" - id: UUID = Field( - default_factory=uuid4, - primary_key=True, - index=True, - ) - - taskId: UUID = Field( - foreign_key="tasks.id", - alias="task_id", - ) - - jobStarted: Optional[datetime] = Field( - default=None, alias="job_started" - ) - jobCompleted: Optional[datetime] = Field( - default=None, alias="job_completed" - ) + id: UUID = Field(default_factory=uuid4, primary_key=True, index=True, ) + task_id: UUID = Field(foreign_key="tasks.id") + job_started: Optional[datetime] = None + job_completed: Optional[datetime] = None status: str = Field(default="In Progress") - inputs: Optional[str] = None outputs: Optional[str] = None - cloudLogsURL: Optional[str] = Field(alias="cloud_logs_url") + cloud_logs_url: Optional[str] = None + updated_at: datetime = Field(default_factory=datetime.utcnow) - updatedAt: datetime = Field( - default_factory=datetime.utcnow, - alias="updated_at", - ) - - # Relationship - task: Optional[Task] = Relationship(back_populates="subTasks") + task: Optional["Task"] = Relationship(back_populates="sub_tasks") diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index af57e35a..2b2306ee 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -81,14 +81,38 @@ async def trigger_plan_entrypoint(body: PlanTriggerRequest): # Insert the scenario ID into the data payload data["scenario_id"] = scenario_id + # Create a task, and associated sub-tasks + from backend.app.db.functions.tasks.Tasks import TasksInterface, SubTaskInterface + # Create a main task + task_id = TasksInterface.create_task( + task_source="backend/plan/router.py:trigger_plan_entrypoint", + service="plan_engine", + inputs=data, + task_only=True + ) + + subtask_interface = SubTaskInterface() + for i in range(total_chunks): # Create an entry in the request logs table index_start = i * chunk_size index_end = min((i + 1) * chunk_size, total_rows) - message_payload = {**data, "index_start": index_start, "index_end": index_end} + message_payload = { + **data, "index_start": index_start, "index_end": index_end, + } message_body = json.dumps(message_payload) + # Create a subtask for this chunk + subtask_id = subtask_interface.create_subtask( + task_id=task_id, + inputs=message_payload + ) + + # Add task and subtask to message + message_payload["task_id"] = str(task_id) + message_payload["subtask_id"] = str(subtask_id) + response = sqs_client.send_message( QueueUrl=settings.ENGINE_SQS_URL, MessageBody=message_body diff --git a/backend/app/plan/schemas.py b/backend/app/plan/schemas.py index 858a0a35..6f6db328 100644 --- a/backend/app/plan/schemas.py +++ b/backend/app/plan/schemas.py @@ -129,6 +129,10 @@ class PlanTriggerRequest(BaseModel): index_start: Optional[int] = None index_end: Optional[int] = None + # Task and subtask IDs + task_id: Optional[str] = None + subtask_id: Optional[str] = None + @model_validator(mode="after") def check_indexes(self): if (self.index_start is None) != (self.index_end is None): diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/data_prep.py b/etl/customers/peabody/Nov 2025 Consulting Project/data_prep.py index 6dd71b98..77736aff 100644 --- a/etl/customers/peabody/Nov 2025 Consulting Project/data_prep.py +++ b/etl/customers/peabody/Nov 2025 Consulting Project/data_prep.py @@ -10,6 +10,7 @@ import json import time import os import pandas as pd +import numpy as np from tqdm import tqdm from dotenv import load_dotenv from asset_list.utils import get_data_for_property @@ -52,8 +53,6 @@ n_postcodes = property_list["Post Code"].nunique() postcode_summary = property_list.groupby("Post Code")["UPRN"].count().reset_index() postcode_summary["UPRN"].mean() -test_match = property_list.merge(sustainability_data, left_on="UPRN", right_on="Org Ref") - def classify_floor_area(x): if x <= 72: @@ -70,20 +69,187 @@ sustainability_data["Floor Area Band"] = sustainability_data["Total Floor Area ( lambda x: classify_floor_area(x) ) -archetypes = sustainability_data[ - ["Type", "Attachment", "Construction Years", "Wall Construction", "Wall Insulation", - "Roof Construction", "Roof Insulation", "Floor Construction", "Floor Insulation", - "Glazing", "Heating", "Boiler Efficiency", "Main Fuel", "Controls Adequacy", - "Floor Area Band"] -].drop_duplicates() +# Archetype reductions -# Potential reductions: +# Roof insulation category # 1) Split roof insulation into > 100mm loft and <= 100mm loft +sustainability_data["Roof Insulation Category"] = sustainability_data["Roof Insulation"].copy() +sustainability_data["Roof Insulation Category"] = np.where( + sustainability_data["Roof Insulation Category"].isin( + ['mm200', 'mm300', 'mm250', 'mm150', 'mm270', 'mm400', 'mm350'], + ), + "LI > 100mm", + sustainability_data["Roof Insulation Category"], +) + +sustainability_data["Roof Insulation Category"] = np.where( + sustainability_data["Roof Insulation Category"].isin( + ['mm100', 'mm50', 'mm75', 'mm25'], + ), + "LI <= 100mm", + sustainability_data["Roof Insulation Category"], +) + # 2) Group all of the glazed together (e.g. double glazed, secondary glazed, triple glazed) -# 3) Group up boiler efficiency A-C, D - F, G? or someting like this +sustainability_data["Glazing Type"] = sustainability_data["Glazing"].copy() +sustainability_data["Glazing Type"] = np.where( + sustainability_data["Glazing Type"].isin( + ['Double 2002 or later', 'Double before 2002', 'Double but age unknown', 'DoubleKnownData'] + ), + "Double Glazed", + sustainability_data["Glazing Type"], +) +sustainability_data["Glazing Type"] = np.where( + sustainability_data["Glazing Type"].isin(['Triple', 'TripleKnownData']), + "Triple Glazed", + sustainability_data["Glazing Type"], +) + +# 3) Group up boiler efficiency A, B-D, E - G? or someting like this +sustainability_data["Boiler Efficiency Group"] = sustainability_data["Boiler Efficiency"].copy() +sustainability_data["Boiler Efficiency Group"] = np.where( + sustainability_data["Boiler Efficiency Group"].isin(['B', 'C', 'D']), + "B-D", + sustainability_data["Boiler Efficiency Group"], +) +sustainability_data["Boiler Efficiency Group"] = np.where( + sustainability_data["Boiler Efficiency Group"].isin(['E', 'F', 'G']), + "E-G", + sustainability_data["Boiler Efficiency Group"], +) + # 4) Group up main fuel into gas, electric, oil, other? +sustainability_data["Main Fuel Group"] = sustainability_data["Main Fuel"].copy() +sustainability_data["Main Fuel Group"] = np.where( + sustainability_data["Main Fuel Group"].isin( + ["SmokelessCoal", "BiomassCommunity", "B30DCommunity"] + ), + "Other Fuel", + sustainability_data["Main Fuel Group"], +) + # 5) Wall Construction - group up Sandstone and Granite into one category +sustainability_data["Wall Construction"] = np.where( + sustainability_data["Wall Construction"].isin(["Sandstone", "Granite"]), + "Sandstone/Granite", + sustainability_data["Wall Construction"] +) + +sustainability_data["Wall Construction"] = np.where( + sustainability_data["Wall Construction"].isin(["Timber Frame", "System", "Solid Brick"]), + "Solid", + sustainability_data["Wall Construction"] +) + # 6) Reduce or remove floor construction +sustainability_data["Floor Construction"] = np.where( + sustainability_data["Floor Construction"].isin(["SuspendedTimber", "SuspendedNotTimber"]), + "Suspended Floor", + sustainability_data["Floor Construction"] +) + +# 7) Reduce wall insulation +sustainability_data["Wall Insulation"] = np.where( + sustainability_data["Wall Insulation"].isin( + ["FilledCavityPlusInternal", "FilledCavityPlusExternal", "FilledCavity", "External", "Internal"] + ), + "Insulated", + sustainability_data["Wall Insulation"] +) + +# 8) Fill floor insulation +sustainability_data["Floor Insulation"] = sustainability_data["Floor Insulation"].fillna("Unknown") + +# 9) Reduce Age bands +sustainability_data["Construction Years"] = np.where( + sustainability_data["Construction Years"].isin(["2003-2006", "2007-2011", "2012 onwards"]), + "2003 onwards", + sustainability_data["Construction Years"], +) + +sustainability_data["Construction Years"] = np.where( + sustainability_data["Construction Years"].isin(["Before 1900", "1900-1929"]), + "Before 1929", + sustainability_data["Construction Years"], +) + +sustainability_data["Construction Years"] = np.where( + sustainability_data["Construction Years"].isin(["1983-1990", "1991-1995"]), + "1983-1995", + sustainability_data["Construction Years"], +) + +sustainability_data["Construction Years"] = np.where( + sustainability_data["Construction Years"].isin(["1950-1966", "1967-1975", "1976-1982"]), + "1950-1982", + sustainability_data["Construction Years"], +) + +# Roof +sustainability_data["Roof Construction"] = np.where( + sustainability_data["Roof Construction"].isin( + ["PitchedNormalLoftAccess", "PitchedThatched", "PitchedNormalNoLoftAccess", "PitchedWithSlopingCeiling"] + ), + "Pitched Roof", + sustainability_data["Roof Construction"] +) + +archetype_variables = [ + "Type", "Attachment", "Construction Years", "Wall Construction", "Wall Insulation", + "Roof Construction", "Roof Insulation Category", "Floor Construction", "Floor Insulation", + "Glazing Type", "Heating", "Boiler Efficiency Group", "Main Fuel Group", "Controls Adequacy", + "Floor Area Band" +] + +archetypes = sustainability_data[archetype_variables + ["UPRN"]].dropna().groupby(archetype_variables)[ + "UPRN"].nunique().reset_index().rename(columns={"UPRN": "Count"}).sort_values(by="Count", + ascending=False).reset_index( + drop=True) + +# We take a sample that represents 95% of the properties +archetypes["Cumulative Count"] = archetypes["Count"].cumsum() +archetypes["Cumulative Proportion"] = archetypes["Cumulative Count"] / archetypes["Count"].sum() + +archetypes_85 = archetypes[archetypes["Cumulative Proportion"] <= 0.80] +archetypes_85["Archetypes_85_reference"] = archetypes_85.index + 1 +archetypes_85["Archetypes_85_reference"] = "Archetype_Sample_" + archetypes_85["Archetypes_85_reference"].astype(str) + +# We now take a sample of the properties that represent 85% of the total properties +sustainability_data = sustainability_data.merge( + archetypes_85, + on=archetype_variables, + how="inner" +) +# We take 1 random property, by archetype 85 reference +modelling_sample = sustainability_data.groupby("Archetypes_85_reference").apply( + lambda x: x.sample(1, random_state=42) +).reset_index(drop=True) + + +# Checking distributions +def compare_distributions(full_df, sample_df, column): + full_dist = full_df[column].value_counts(normalize=True) + sample_dist = sample_df[column].value_counts(normalize=True) + comparison = pd.concat([full_dist, sample_dist], axis=1, keys=['Full', 'Sample']).fillna(0) + return comparison + + +for col in archetype_variables: + print(f"--- {col} ---") + print(compare_distributions(sustainability_data, modelling_sample, col)) + +# Save this CSV as input +modelling_sample.to_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/modelling_sample.xlsx", +) +# Save the archetype definitions +archetypes_85.to_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/archetypes_85.xlsx", +) +# Save the full archetypes +archetypes.to_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/full_archetypes.xlsx", +) # Maps the property types to the format recognised by the EPC api property_type_map = {} diff --git a/etl/find_my_epc/RetrieveFindMyEpc.py b/etl/find_my_epc/RetrieveFindMyEpc.py index ae9e5ff7..519c3e52 100644 --- a/etl/find_my_epc/RetrieveFindMyEpc.py +++ b/etl/find_my_epc/RetrieveFindMyEpc.py @@ -21,14 +21,16 @@ class RetrieveFindMyEpc: 'Chrome/111.0.0.0 Safari/537.36' } - def __init__(self, address: str, postcode: str): + def __init__(self, address: str, postcode: str, rrn: str = None): """ This class is tasked with retrieving the latest EPC data from the find my epc website :param address: The address of the property :param postcode: The postcode of the property + :param rrn: The RRN of the EPC (if known) """ self.address = address self.postcode = postcode + self.rrn = rrn self.address_cleaned = self.address.replace(",", "").replace(" ", "").lower() self.walls = [] @@ -286,54 +288,12 @@ class RetrieveFindMyEpc: :return: """ - postcode_input = self.postcode.replace(" ", "+") - postcode_search = self.SEARCH_POSTCODE_URL.format(postcode_input=postcode_input) - postcode_response = requests.get(postcode_search, headers=self.HEADERS) - - postcode_res = BeautifulSoup(postcode_response.text, features="html.parser") - rows = postcode_res.find_all('tr', class_='govuk-table__row') - - extracted_table = [] - for row in rows: - # Extract the address and URL - address_tag = row.find('a', class_='govuk-link') - if address_tag is None: - continue - extracted_address = None - extracted_address_url = None - if address_tag: - extracted_address = address_tag.text.strip() - extracted_address_url = address_tag['href'] - - extracted_address_cleaned = extracted_address.replace(",", "").replace(" ", "").lower() - if not extracted_address_cleaned.startswith(self.address_cleaned): - continue - - # If the address is a match, we can extract the data - - # Extract the expiry date - expiry_date_tag = row.find('td', class_='govuk-table__cell date') - expiry_date = None - if expiry_date_tag is not None: - expiry_date = expiry_date_tag.parent.find('span').text.strip() - - extracted_table.append( - { - "extracted_address": extracted_address, - "extracted_address_url": extracted_address_url, - "expiry_date": datetime.strptime(expiry_date, '%d %B %Y'), - } - ) - - if not extracted_table: - raise ValueError("No EPC found") - - if len(extracted_table) > 1: - # We take the one with the most recent expiry date - extracted_table = sorted(extracted_table, key=lambda x: x['expiry_date'], reverse=True) - - chosen_epc = self.BASE_ENERGY_URL + extracted_table[0]['extracted_address_url'] - epc_certificate = chosen_epc.split('/')[-1] + if self.rrn: + # We build the URL directly + epc_certificate = self.rrn + chosen_epc = f"{self.BASE_ENERGY_URL}/energy-certificate/{epc_certificate}" + else: + chosen_epc, epc_certificate = self._find_epc_page() address_response = requests.get(chosen_epc, headers=self.HEADERS) address_res = BeautifulSoup(address_response.text, features="html.parser") @@ -438,11 +398,17 @@ class RetrieveFindMyEpc: For a post code and address, we pull out all the required data from the find my epc website """ - if epc_page_source is None: + if epc_page_source is None and rrn is None: chosen_epc, rrn = self._find_epc_page() address_response = requests.get(chosen_epc, headers=self.HEADERS) epc_page_source = address_response.text address_res = BeautifulSoup(address_response.text, features="html.parser") + elif self.rrn: + epc_certificate = self.rrn + chosen_epc = f"{self.BASE_ENERGY_URL}/energy-certificate/{epc_certificate}" + address_response = requests.get(chosen_epc, headers=self.HEADERS) + epc_page_source = address_response.text + address_res = BeautifulSoup(address_response.text, features="html.parser") else: if rrn is None: raise ValueError("rrn must be provided if epc_page_source is provided") @@ -581,6 +547,19 @@ class RetrieveFindMyEpc: # 5) Pull out the EPC data epc_data = self.extract_epc_data(address_res) + # Pull out the address information which can be found in the box with the class "epc-address" + # We split it up on break tags + addr = address_res.find("p", class_="epc-address").get_text(separator="\n").strip() + lines = addr.split("\n") + if len(lines) > 2: + address1 = lines[0] + address2 = lines[1] + postcode = lines[-1] + else: + address1 = lines[0] + address2 = "" + postcode = lines[-1] + resulting_data = { 'epc_certificate': rrn, 'current_epc_rating': current_rating.split(' ')[-6], @@ -594,6 +573,10 @@ class RetrieveFindMyEpc: **assessment_data, **low_carbon_energy_sources, "page_source": epc_page_source, + # Add in address a postcode from the page - covers use cases where we are given RRN + "address1": address1, + "address2": address2, + "postcode": postcode, } if return_page: From d20725d12b91a4fd0fd496139a2b7fa0ea0e0eaf Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 26 Nov 2025 21:42:09 +0000 Subject: [PATCH 077/202] adding tasks to engine --- backend/app/db/functions/tasks/Tasks.py | 13 ++++++++++++- backend/app/db/models/materials.py | 1 + backend/app/plan/router.py | 9 ++++----- backend/app/plan/utils.py | 13 ++++++++++--- backend/engine/engine.py | 24 ++++++++++++++---------- 5 files changed, 41 insertions(+), 19 deletions(-) diff --git a/backend/app/db/functions/tasks/Tasks.py b/backend/app/db/functions/tasks/Tasks.py index 7508ab2e..5a3ae699 100644 --- a/backend/app/db/functions/tasks/Tasks.py +++ b/backend/app/db/functions/tasks/Tasks.py @@ -51,7 +51,16 @@ class SubTaskInterface: # -------------------------------------------------------- # UPDATE STATUS (in progress, complete, failed) # -------------------------------------------------------- - def update_subtask_status(self, subtask_id: UUID, status: str): + def update_subtask_status( + self, subtask_id: UUID, status: str, outputs=None + ): + """ + Update the status of a subtask, and recalculate the parent task progress. + :param subtask_id: UUID of the subtask to update + :param status: New status (in progress, complete, failed) + :param outputs: Optional outputs to set + :return: + """ now = datetime.now(timezone.utc) with get_db_session() as session: @@ -71,6 +80,8 @@ class SubTaskInterface: subtask.status = normalized subtask.updated_at = now + if outputs is not None: + subtask.outputs = json.dumps(outputs) session.add(subtask) session.commit() diff --git a/backend/app/db/models/materials.py b/backend/app/db/models/materials.py index 9b38addd..99759438 100644 --- a/backend/app/db/models/materials.py +++ b/backend/app/db/models/materials.py @@ -20,6 +20,7 @@ class MaterialType(enum.Enum): room_roof_insulation = "room_roof_insulation" windows_glazing = "windows_glazing" secondary_glazing = "secondary_glazing" + double_glazing = "double_glazing" cavity_wall_extraction = "cavity_wall_extraction" iwi_wall_demolition = "iwi_wall_demolition" diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index 2b2306ee..d143dc95 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -15,6 +15,7 @@ from utils.logger import setup_logger from backend.app.db.connection import db_engine from backend.app.db.functions.recommendations_functions import create_scenario +from backend.app.db.functions.tasks.Tasks import TasksInterface, SubTaskInterface logger = setup_logger() @@ -81,10 +82,8 @@ async def trigger_plan_entrypoint(body: PlanTriggerRequest): # Insert the scenario ID into the data payload data["scenario_id"] = scenario_id - # Create a task, and associated sub-tasks - from backend.app.db.functions.tasks.Tasks import TasksInterface, SubTaskInterface # Create a main task - task_id = TasksInterface.create_task( + task_id, _ = TasksInterface.create_task( task_source="backend/plan/router.py:trigger_plan_entrypoint", service="plan_engine", inputs=data, @@ -92,7 +91,6 @@ async def trigger_plan_entrypoint(body: PlanTriggerRequest): ) subtask_interface = SubTaskInterface() - for i in range(total_chunks): # Create an entry in the request logs table index_start = i * chunk_size @@ -101,7 +99,6 @@ async def trigger_plan_entrypoint(body: PlanTriggerRequest): message_payload = { **data, "index_start": index_start, "index_end": index_end, } - message_body = json.dumps(message_payload) # Create a subtask for this chunk subtask_id = subtask_interface.create_subtask( @@ -113,6 +110,8 @@ async def trigger_plan_entrypoint(body: PlanTriggerRequest): message_payload["task_id"] = str(task_id) message_payload["subtask_id"] = str(subtask_id) + message_body = json.dumps(message_payload) + response = sqs_client.send_message( QueueUrl=settings.ENGINE_SQS_URL, MessageBody=message_body diff --git a/backend/app/plan/utils.py b/backend/app/plan/utils.py index ea328d5b..3f2e0e1f 100644 --- a/backend/app/plan/utils.py +++ b/backend/app/plan/utils.py @@ -1,8 +1,10 @@ import msgpack +from uuid import UUID +from typing import Any from utils.s3 import read_from_s3 from backend.app.config import get_settings from backend.app.plan.data_classes import PropertyRequestData -from typing import Any +from backend.app.db.functions.tasks.Tasks import SubTaskInterface from starlette.responses import Response from utils.logger import setup_logger @@ -64,7 +66,7 @@ def extract_property_request_data( x for x in already_installed if (x["address"] == config["address"]) and (x["postcode"] == config["postcode"]) ), []) - + # Because we have some non-invasive recommendations that match on address and postcode, but not UPRN # we need to check existence of uprn has_uprn = "uprn" in non_invasive_recommendations[0] if non_invasive_recommendations else False @@ -211,8 +213,13 @@ def parse_eco_packages(config: dict[str, Any], prepared_epc) -> tuple[list[str], return measures, mapped["target_sap"], mapped["plan_type"], already_installed -def handle_error(session, msg, status=500): +def handle_error(session, msg, e, subtask_id, status=500): # When the pipeline fails, handles error process + SubTaskInterface().update_subtask_status( + subtask_id=UUID(subtask_id), + status="failed", + outputs=str(e) + ) logger.error(msg, exc_info=True) session.rollback() return Response(status_code=status, content=msg) diff --git a/backend/engine/engine.py b/backend/engine/engine.py index a7743b90..a172972a 100644 --- a/backend/engine/engine.py +++ b/backend/engine/engine.py @@ -430,6 +430,9 @@ async def model_engine(body: PlanTriggerRequest): ) # Where the EPC has been estimated, that is because a UPRN wasn't avaialble and so we remove UPRN # This will be reflexted + if "estimated" not in plan_input.columns: + plan_input["estimated"] = False + plan_input["uprn"] = np.where( plan_input["estimated"].isin([1, True]) & ( (plan_input["uprn"] < 0) | pd.isnull(plan_input["uprn"]) @@ -527,7 +530,7 @@ async def model_engine(body: PlanTriggerRequest): address1 = config.get("address", None) # Handle domna address list format if pd.isnull(address1) and body.file_format == "domna_asset_list": - address1 = config.get("domna_full_address", None) + address1 = config.get("domna_address_1", None) address1 = str(int(address1)) if isinstance(address1, float) else str(address1) full_address = config["domna_full_address"] if body.file_format == "domna_asset_list" else None @@ -1179,8 +1182,9 @@ async def model_engine(body: PlanTriggerRequest): ) db_funcs.property_functions.create_property_details_epc(session, property_details_epc) - db_funcs.property_functions.update_or_create_property_spatial_details(session, p.uprn, - p.spatial) + db_funcs.property_functions.update_or_create_property_spatial_details( + session, p.uprn, p.spatial + ) property_data = p.get_full_property_data(current_valuation=valuations["current_value"]) @@ -1262,14 +1266,14 @@ async def model_engine(body: PlanTriggerRequest): # Commit final changes session.commit() - except IntegrityError: - return handle_error(session, "Database integrity error.", 500) - except OperationalError: - return handle_error(session, "Database operational error.", 500) - except ValueError: - return handle_error(session, "Bad request: malformed data.", 400) + except IntegrityError as e: + return handle_error(session, "Database integrity error.", e, body.subtask_id, 500) + except OperationalError as e: + return handle_error(session, "Database operational error.", e, body.subtask_id, 500) + except ValueError as e: + return handle_error(session, "Bad request: malformed data.", e, body.subtask_id, 400) except Exception as e: # General exception handling - return handle_error(session, "An unexpected error occurred.", 500) + return handle_error(session, "An unexpected error occurred.", e, body.subtask_id, 500) finally: session.close() From f91ad4c3f12d9b1a9c0dafd93336c2a02432eeb8 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 27 Nov 2025 11:25:36 +0000 Subject: [PATCH 078/202] implemented tasks/subtasks for the first time --- backend/app/db/functions/tasks/Tasks.py | 7 +++++-- backend/engine/engine.py | 5 +++++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/backend/app/db/functions/tasks/Tasks.py b/backend/app/db/functions/tasks/Tasks.py index 5a3ae699..12a2e51b 100644 --- a/backend/app/db/functions/tasks/Tasks.py +++ b/backend/app/db/functions/tasks/Tasks.py @@ -25,17 +25,20 @@ class SubTaskInterface: # -------------------------------------------------------- # CREATE SUBTASK # -------------------------------------------------------- - def create_subtask(self, task_id: UUID, inputs: Optional[Dict[str, Any]] = None): + def create_subtask(self, task_id: UUID, inputs: Optional[Dict[str, Any]] = None, status=None): with get_db_session() as session: task = session.get(Task, task_id) if not task: raise ValueError(f"Task {task_id} not found") + # We treat waiting as the default status + status = "waiting" if status is None else status + subtask = SubTask( task_id=task_id, inputs=json.dumps(inputs) if inputs else None, - status="waiting", + status=status, job_started=None, job_completed=None, ) diff --git a/backend/engine/engine.py b/backend/engine/engine.py index a172972a..be770d8e 100644 --- a/backend/engine/engine.py +++ b/backend/engine/engine.py @@ -5,6 +5,7 @@ from datetime import datetime from tqdm import tqdm import pandas as pd import numpy as np +from uuid import UUID from backend.Funding import Funding from backend.SearchEpc import SearchEpc @@ -17,6 +18,7 @@ from starlette.responses import Response from backend.app.config import get_settings, get_prediction_buckets from backend.app.db.connection import db_engine import backend.app.db.functions as db_funcs +from backend.app.db.functions.tasks.Tasks import SubTaskInterface from backend.app.db.models.portfolio import rating_lookup from backend.app.plan.schemas import PlanTriggerRequest, WALL_INSULATION_MEASURES, ROOF_INSULATION_MEASURES @@ -1277,6 +1279,9 @@ async def model_engine(body: PlanTriggerRequest): finally: session.close() + # Mark the subtask as successful + SubTaskInterface().update_subtask_status(subtask_id=UUID(body.subtask_id), status="failed") + logger.info("Model Engine completed successfully") return Response(status_code=200) From 3933942cd8e1c5de27ee6b27176c3aa6b1caf855 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 27 Nov 2025 12:51:25 +0000 Subject: [PATCH 079/202] added cloudwatch url --- backend/app/db/functions/tasks/Tasks.py | 6 +++- backend/app/plan/utils.py | 34 +++++++++++++++++-- backend/engine/engine.py | 43 +++++++++++++++++-------- 3 files changed, 66 insertions(+), 17 deletions(-) diff --git a/backend/app/db/functions/tasks/Tasks.py b/backend/app/db/functions/tasks/Tasks.py index 12a2e51b..30acf370 100644 --- a/backend/app/db/functions/tasks/Tasks.py +++ b/backend/app/db/functions/tasks/Tasks.py @@ -55,13 +55,14 @@ class SubTaskInterface: # UPDATE STATUS (in progress, complete, failed) # -------------------------------------------------------- def update_subtask_status( - self, subtask_id: UUID, status: str, outputs=None + self, subtask_id: UUID, status: str, outputs=None, cloud_logs_url=None ): """ Update the status of a subtask, and recalculate the parent task progress. :param subtask_id: UUID of the subtask to update :param status: New status (in progress, complete, failed) :param outputs: Optional outputs to set + :param cloud_logs_url: Optional cloud logs URL to set :return: """ now = datetime.now(timezone.utc) @@ -86,6 +87,9 @@ class SubTaskInterface: if outputs is not None: subtask.outputs = json.dumps(outputs) + if cloud_logs_url is not None: + subtask.cloud_logs_url = cloud_logs_url + session.add(subtask) session.commit() diff --git a/backend/app/plan/utils.py b/backend/app/plan/utils.py index 3f2e0e1f..569eafd1 100644 --- a/backend/app/plan/utils.py +++ b/backend/app/plan/utils.py @@ -1,3 +1,5 @@ +import os +import time import msgpack from uuid import UUID from typing import Any @@ -213,12 +215,40 @@ def parse_eco_packages(config: dict[str, Any], prepared_epc) -> tuple[list[str], return measures, mapped["target_sap"], mapped["plan_type"], already_installed -def handle_error(session, msg, e, subtask_id, status=500): +def build_cloudwatch_log_url(start_ms: int, end_ms: int) -> str: + """ + Build a CloudWatch Logs URL for the current Lambda invocation, + including timestamp window from start_ms to end_ms (epoch ms). + """ + region = os.environ["AWS_REGION"] + log_group = os.environ["AWS_LAMBDA_LOG_GROUP_NAME"] + log_stream = os.environ["AWS_LAMBDA_LOG_STREAM_NAME"] + + # CloudWatch console requires / encoded as $252F + encoded_group = log_group.replace("/", "$252F") + encoded_stream = log_stream.replace("/", "$252F") + + # Return the full URL with time range + return ( + f"https://console.aws.amazon.com/cloudwatch/home?" + f"region={region}" + f"#logsV2:log-groups/log-group/{encoded_group}" + f"/log-events/{encoded_stream}" + f"$3Fstart={start_ms}" + f"$26end={end_ms}" + ) + + +def handle_error(session, msg, e, subtask_id, status=500, start_ms=None): # When the pipeline fails, handles error process + end_ms = int(time.time() * 1000) + cloud_logs_url = build_cloudwatch_log_url(start_ms, end_ms) + SubTaskInterface().update_subtask_status( subtask_id=UUID(subtask_id), status="failed", - outputs=str(e) + outputs=str(e), + cloud_logs_url=cloud_logs_url ) logger.error(msg, exc_info=True) session.rollback() diff --git a/backend/engine/engine.py b/backend/engine/engine.py index be770d8e..ce0505e2 100644 --- a/backend/engine/engine.py +++ b/backend/engine/engine.py @@ -1,3 +1,5 @@ +import os +import time import json from copy import deepcopy from datetime import datetime @@ -23,7 +25,7 @@ from backend.app.db.functions.tasks.Tasks import SubTaskInterface from backend.app.db.models.portfolio import rating_lookup from backend.app.plan.schemas import PlanTriggerRequest, WALL_INSULATION_MEASURES, ROOF_INSULATION_MEASURES from backend.app.plan.utils import ( - get_cleaned, patch_epc, extract_property_request_data, parse_eco_packages, handle_error + get_cleaned, patch_epc, extract_property_request_data, parse_eco_packages, handle_error, build_cloudwatch_log_url ) from backend.app.utils import sap_to_epc import backend.app.assumptions as assumptions @@ -409,6 +411,7 @@ async def model_engine(body: PlanTriggerRequest): logger.info("Connecting to db") session = sessionmaker(bind=db_engine)() created_at = datetime.now().isoformat() + start_ms = int(time.time() * 1000) # TODO: if the measure is already installed, it should actually be the very first phase @@ -619,19 +622,28 @@ async def model_engine(body: PlanTriggerRequest): # if we have a remote assment data type, we pull the additional data and include it epc_page_source = {} if (body.event_type == "remote_assessment") and not (epc_searcher.newest_epc.get("estimated")): - logger.info("Retrieving find my epc data") try: property_non_invasive_recommendations, patch, epc_page_source = RetrieveFindMyEpc.get_from_epc( epc_searcher.newest_epc, epc_page, rrn=rrn ) except Exception as e: logger.error(f"Failed to retrieve without cleaning address {e}") - for k in ["address", "address1"]: - epc_searcher.newest_epc[k] = epc_searcher.address_clean - property_non_invasive_recommendations, patch, epc_page_source = RetrieveFindMyEpc.get_from_epc( - epc_searcher.newest_epc, epc_page, rrn=rrn - ) - + try: + epc_to_use = deepcopy(epc_searcher.newest_epc) + for k in ["address", "address1"]: + epc_to_use[k] = epc_searcher.address_clean + property_non_invasive_recommendations, patch, epc_page_source = RetrieveFindMyEpc.get_from_epc( + epc_to_use, epc_page, rrn=rrn + ) + except Exception as e: + # Final attempt + logger.error(f"Failed to retrieve without cleaning address {e}") + epc_to_use = deepcopy(epc_searcher.newest_epc) + for k in ["address", "address1"]: + epc_to_use[k] = config["address"] + property_non_invasive_recommendations, patch, epc_page_source = RetrieveFindMyEpc.get_from_epc( + epc_to_use, epc_page, rrn=rrn + ) # If we have a property type, this means when we pull the epc data, we might need to make a patch epc_records = patch_epc(patch, epc_records) @@ -1267,20 +1279,23 @@ async def model_engine(body: PlanTriggerRequest): # Commit final changes session.commit() - except IntegrityError as e: - return handle_error(session, "Database integrity error.", e, body.subtask_id, 500) + return handle_error(session, "Database integrity error.", e, body.subtask_id, 500, start_ms) except OperationalError as e: - return handle_error(session, "Database operational error.", e, body.subtask_id, 500) + return handle_error(session, "Database operational error.", e, body.subtask_id, 500, start_ms) except ValueError as e: - return handle_error(session, "Bad request: malformed data.", e, body.subtask_id, 400) + return handle_error(session, "Bad request: malformed data.", e, body.subtask_id, 400, start_ms) except Exception as e: # General exception handling - return handle_error(session, "An unexpected error occurred.", e, body.subtask_id, 500) + return handle_error(session, "An unexpected error occurred.", e, body.subtask_id, 500, start_ms) finally: session.close() + end_ms = int(time.time() * 1000) + cloud_logs_url = build_cloudwatch_log_url(start_ms, end_ms) # Mark the subtask as successful - SubTaskInterface().update_subtask_status(subtask_id=UUID(body.subtask_id), status="failed") + SubTaskInterface().update_subtask_status( + subtask_id=UUID(body.subtask_id), status="complete", cloud_logs_url=cloud_logs_url + ) logger.info("Model Engine completed successfully") From 133a1255ebf0a98d9029997ef0f29191fcec6ddb Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 27 Nov 2025 17:50:26 +0000 Subject: [PATCH 080/202] fixed missing task and subtask for single remote assessments --- BaseUtility.py | 1 - backend/Property.py | 54 +++++++++---------- backend/app/plan/router.py | 8 +++ backend/app/plan/utils.py | 3 +- backend/engine/engine.py | 3 +- etl/epc/settings.py | 2 + .../epc_attributes/FloorAttributes.py | 11 ++-- .../epc_attributes/MainheatAttributes.py | 2 +- .../test_mainheat_attributes_cases.py | 17 +++++- 9 files changed, 62 insertions(+), 39 deletions(-) diff --git a/BaseUtility.py b/BaseUtility.py index 1a31c5d0..fb5d3d67 100644 --- a/BaseUtility.py +++ b/BaseUtility.py @@ -1,5 +1,4 @@ from etl.epc.settings import DATA_ANOMALY_MATCHES as data_anon_matches -from etl.epc.settings import DATA_ANOMALY_MATCHES as data_anon_matches class Definitions: diff --git a/backend/Property.py b/backend/Property.py index d0d85565..c01e4353 100644 --- a/backend/Property.py +++ b/backend/Property.py @@ -592,13 +592,21 @@ class Property: :return: """ - if not cleaned: - raise ValueError("Cleaner does not contain cleaned data") + # if not cleaned: + # raise ValueError("Cleaner does not contain cleaned data") if not self.data: raise ValueError("Property does not contain data") - for description, attribute in cleaned.items(): + components = [ + 'floor-description', 'hotwater-description', 'main-fuel', 'mainheat-description', + 'mainheatcont-description', 'roof-description', 'walls-description', 'windows-description', + 'lighting-description' + ] + + for description in components: + + cleaner_cls = all_cleaner_map[description] if self.data[description] in self.DATA_ANOMALY_MATCHES: template = cleaned[description][0] @@ -616,35 +624,22 @@ class Property: ) continue - attributes = [ - x - for x in cleaned[description] - if x["original_description"] == self.data[description] - ] + if description == "lighting-description": + cleaner_cls = cleaner_cls(self.data[description], averages=None) + else: + cleaner_cls = cleaner_cls(self.data[description]) - if len(attributes) > 1: - raise ValueError( - "Either No attributes or multiple found for %s" % description + processed = { + "original_description": self.data[description], + "clean_description": cleaner_cls.description.replace( + "(assumed)", "" ) + .rstrip() + .capitalize(), + **cleaner_cls.process(), + } - if len(attributes) == 0: - # We attempt to perform the clean on the fly - cleaner_cls = all_cleaner_map[description] - if description == "lighting-description": - cleaner_cls = cleaner_cls(self.data[description], averages=None) - else: - cleaner_cls = cleaner_cls(self.data[description]) - processed = { - "original_description": self.data[description], - "clean_description": cleaner_cls.description.replace( - "(assumed)", "" - ) - .rstrip() - .capitalize(), - **cleaner_cls.process(), - } - - attributes = [processed] + attributes = [processed] setattr(self, self.ATTRIBUTE_MAP[description], attributes[0]) @@ -1160,6 +1155,7 @@ class Property: 'has_community_scheme': 'Varied (Community Scheme)', "has_dual_fuel_mineral_and_wood": 'Wood Logs', "has_electricaire": 'Electricity', + "has_wood_chips": 'Wood Logs' } # Hot water diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index d143dc95..5611a53d 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -129,6 +129,14 @@ async def trigger_plan_entrypoint(body: PlanTriggerRequest): else: # Fallback: Just send a single message try: + task_id, subtask_id = TasksInterface.create_task( + task_source="backend/plan/router.py:trigger_plan_entrypoint", + service="plan_engine", + inputs=data, + task_only=False + ) + data["task_id"] = task_id + data["subtask_id"] = subtask_id message_body = json.dumps(data) response = sqs_client.send_message( QueueUrl=settings.ENGINE_SQS_URL, diff --git a/backend/app/plan/utils.py b/backend/app/plan/utils.py index 569eafd1..c18968f9 100644 --- a/backend/app/plan/utils.py +++ b/backend/app/plan/utils.py @@ -215,7 +215,7 @@ def parse_eco_packages(config: dict[str, Any], prepared_epc) -> tuple[list[str], return measures, mapped["target_sap"], mapped["plan_type"], already_installed -def build_cloudwatch_log_url(start_ms: int, end_ms: int) -> str: +def build_cloudwatch_log_url(start_ms: int) -> str: """ Build a CloudWatch Logs URL for the current Lambda invocation, including timestamp window from start_ms to end_ms (epoch ms). @@ -235,7 +235,6 @@ def build_cloudwatch_log_url(start_ms: int, end_ms: int) -> str: f"#logsV2:log-groups/log-group/{encoded_group}" f"/log-events/{encoded_stream}" f"$3Fstart={start_ms}" - f"$26end={end_ms}" ) diff --git a/backend/engine/engine.py b/backend/engine/engine.py index ce0505e2..ebb0a6b8 100644 --- a/backend/engine/engine.py +++ b/backend/engine/engine.py @@ -1290,8 +1290,7 @@ async def model_engine(body: PlanTriggerRequest): finally: session.close() - end_ms = int(time.time() * 1000) - cloud_logs_url = build_cloudwatch_log_url(start_ms, end_ms) + cloud_logs_url = build_cloudwatch_log_url(start_ms) # Mark the subtask as successful SubTaskInterface().update_subtask_status( subtask_id=UUID(body.subtask_id), status="complete", cloud_logs_url=cloud_logs_url diff --git a/etl/epc/settings.py b/etl/epc/settings.py index 16619fa2..d453080e 100644 --- a/etl/epc/settings.py +++ b/etl/epc/settings.py @@ -51,6 +51,8 @@ DATA_ANOMALY_MATCHES = { "UNKNOWN", # "Unknown", + # Observed error case + "(error), (error)", } # Add the post_sap10 date to indicate if the epc is post sap10 diff --git a/etl/epc_clean/epc_attributes/FloorAttributes.py b/etl/epc_clean/epc_attributes/FloorAttributes.py index 23c7dd8e..62767638 100644 --- a/etl/epc_clean/epc_attributes/FloorAttributes.py +++ b/etl/epc_clean/epc_attributes/FloorAttributes.py @@ -55,6 +55,7 @@ class FloorAttributes(Definitions): or (description in self.DATA_ANOMALY_MATCHES) or (description in self.OBSERVED_ERRORS) or (self.description == "sap05:floor") + or not self.description ) # Try and perform a translation, incase it's in welsh @@ -63,8 +64,8 @@ class FloorAttributes(Definitions): if not self.nodata and not any( rt in self.description for rt in self.FLOOR_TYPES - + self.DWELLING_BELOW - + ["average thermal transmittance"] + + self.DWELLING_BELOW + + ["average thermal transmittance"] ): raise ValueError("Invalid description") @@ -97,7 +98,11 @@ class FloorAttributes(Definitions): def process(self) -> Dict[str, Union[str, bool, int, None]]: if self.nodata: - return {"no_data": True} + return { + 'thermal_transmittance': None, 'thermal_transmittance_unit': None, 'is_assumed': True, + 'is_to_unheated_space': False, 'is_to_external_air': False, 'is_suspended': True, 'is_solid': False, + 'another_property_below': False, 'insulation_thickness': 'none', 'no_data': True + } result: Dict[str, Union[float, str, bool, None]] = {} description = self.description diff --git a/etl/epc_clean/epc_attributes/MainheatAttributes.py b/etl/epc_clean/epc_attributes/MainheatAttributes.py index 312fa9fe..d20d9290 100644 --- a/etl/epc_clean/epc_attributes/MainheatAttributes.py +++ b/etl/epc_clean/epc_attributes/MainheatAttributes.py @@ -20,7 +20,7 @@ class MainHeatAttributes(Definitions): ] FUEL_TYPES = ["electric", "mains gas", "wood logs", "coal", "oil", "wood pellets", "anthracite", "dual fuel mineral and wood", "smokeless fuel", "lpg", "b30k", "mineral and wood", - "dual fuel appliance"] + "dual fuel appliance", "wood chips"] DISTRIBUTION_SYSTEMS = ["radiators", "fan coil units", "pipes in screed above insulation", "pipes in insulated timber floor", "pipes in concrete slab"] OTHERS = ["assumed", "electricaire", "assumed for most rooms"] diff --git a/etl/epc_clean/tests/test_data/test_mainheat_attributes_cases.py b/etl/epc_clean/tests/test_data/test_mainheat_attributes_cases.py index 45994b1d..e1939a7d 100644 --- a/etl/epc_clean/tests/test_data/test_mainheat_attributes_cases.py +++ b/etl/epc_clean/tests/test_data/test_mainheat_attributes_cases.py @@ -1752,6 +1752,21 @@ mainheat_cases = [ 'has_dual_fuel_mineral_and_wood': False, 'has_smokeless_fuel': False, 'has_lpg': False, 'has_b30k': False, 'has_mineral_and_wood': False, 'has_dual_fuel_appliance': False, 'has_assumed': False, 'has_electricaire': False, 'has_assumed_for_most_rooms': False, 'has_underfloor_heating': False - + }, + { + 'original_description': 'Boiler and radiators, wood chips', + 'has_radiators': True, 'has_fan_coil_units': False, 'has_pipes_in_screed_above_insulation': False, + 'has_pipes_in_insulated_timber_floor': False, 'has_pipes_in_concrete_slab': False, 'has_boiler': True, + 'has_air_source_heat_pump': False, 'has_room_heaters': False, 'has_electric_storage_heaters': False, + 'has_warm_air': False, 'has_electric_underfloor_heating': False, 'has_electric_ceiling_heating': False, + 'has_community_scheme': False, 'has_ground_source_heat_pump': False, 'has_no_system_present': False, + 'has_portable_electric_heaters': False, 'has_water_source_heat_pump': False, 'has_electric_heat_pump': False, + 'has_micro-cogeneration': False, 'has_solar_assisted_heat_pump': False, 'has_exhaust_source_heat_pump': False, + 'has_community_heat_pump': False, 'has_hot-water-only': False, 'has_electric': False, 'has_mains_gas': False, + 'has_wood_logs': False, 'has_coal': False, 'has_oil': False, 'has_wood_pellets': False, 'has_anthracite': False, + 'has_dual_fuel_mineral_and_wood': False, 'has_smokeless_fuel': False, 'has_lpg': False, 'has_b30k': False, + 'has_mineral_and_wood': False, 'has_dual_fuel_appliance': False, 'has_wood_chips': True, 'has_assumed': False, + 'has_electricaire': False, 'has_assumed_for_most_rooms': False, 'has_underfloor_heating': False } + ] From c19232c33e3eee9ca82c72a252038a46cb3acdeb Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 27 Nov 2025 17:52:11 +0000 Subject: [PATCH 081/202] adding back in cleaned check --- backend/Property.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/Property.py b/backend/Property.py index c01e4353..58909c40 100644 --- a/backend/Property.py +++ b/backend/Property.py @@ -592,8 +592,8 @@ class Property: :return: """ - # if not cleaned: - # raise ValueError("Cleaner does not contain cleaned data") + if not cleaned: + raise ValueError("Cleaner does not contain cleaned data") if not self.data: raise ValueError("Property does not contain data") From cfc7f2a247e4f840d3156c3f98b49526cc47c662 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 28 Nov 2025 06:20:47 +0000 Subject: [PATCH 082/202] Debugging epc clean for new data and new error cases --- backend/app/db/functions/tasks/Tasks.py | 6 ++++-- etl/epc/settings.py | 11 ++++++++++- .../epc_attributes/MainheatControlAttributes.py | 8 +++++++- 3 files changed, 21 insertions(+), 4 deletions(-) diff --git a/backend/app/db/functions/tasks/Tasks.py b/backend/app/db/functions/tasks/Tasks.py index 30acf370..4aa7fb43 100644 --- a/backend/app/db/functions/tasks/Tasks.py +++ b/backend/app/db/functions/tasks/Tasks.py @@ -27,6 +27,7 @@ class SubTaskInterface: # -------------------------------------------------------- def create_subtask(self, task_id: UUID, inputs: Optional[Dict[str, Any]] = None, status=None): + now = datetime.now(timezone.utc) with get_db_session() as session: task = session.get(Task, task_id) if not task: @@ -39,7 +40,7 @@ class SubTaskInterface: task_id=task_id, inputs=json.dumps(inputs) if inputs else None, status=status, - job_started=None, + job_started=now, job_completed=None, ) @@ -271,12 +272,13 @@ class TasksInterface: :param task_only: If True, only create the Task record, without a SubTask :return: """ + now = datetime.now(timezone.utc) with get_db_session() as session: task = Task( task_source=task_source, service=service, status="waiting", - job_started=None, + job_started=now, job_completed=None, ) diff --git a/etl/epc/settings.py b/etl/epc/settings.py index d453080e..f4d0e174 100644 --- a/etl/epc/settings.py +++ b/etl/epc/settings.py @@ -51,8 +51,17 @@ DATA_ANOMALY_MATCHES = { "UNKNOWN", # "Unknown", - # Observed error case + # Observed error cases "(error), (error)", + "error , error", + "Description", + "description", + "Undefined Welsh description for crtrl code 2113", + "undefined welsh description for crtrl code 2113", + "Hot water system", + "hot water system", + "Heating system", + "heating system", } # Add the post_sap10 date to indicate if the epc is post sap10 diff --git a/etl/epc_clean/epc_attributes/MainheatControlAttributes.py b/etl/epc_clean/epc_attributes/MainheatControlAttributes.py index 0dcf97c5..3e2df543 100644 --- a/etl/epc_clean/epc_attributes/MainheatControlAttributes.py +++ b/etl/epc_clean/epc_attributes/MainheatControlAttributes.py @@ -119,7 +119,13 @@ class MainheatControlAttributes(Definitions): 'rheoli r tal a llaw': 'manual charge control', 'tal un gyfradd, thermostat ystafell yn unig': 'flat rate charging, room thermostat only', "rheoli'r t l llaw": "manual charge control", - "2205 rhaglennydd ac o leiaf ddau thermostat ystafell": "programmer and at least two room thermostats" + "2205 rhaglennydd ac o leiaf ddau thermostat ystafell": "programmer and at least two room thermostats", + "2603 rhaglennydd a thermostatau ar y cyfarpar": "programmer, room thermostat", + "2404 rheolyddion i wresogyddion storio sygçön cadw llawer o wres": "controls for high heat retention storage " + "heaters", + 'system dalu wedigçöi chysylltu +ó defnyddio gwres cymunedol, rhaglennydd ac o leiaf ddau thermostat ' + 'ystafell': 'charging system linked to use of community heating, programmer and at least two room thermostats' + } NO_DATA_DESCRIPTIONS = [ From c400a67bf6b7cd72af13785a28361ebb732535aa Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 28 Nov 2025 06:47:57 +0000 Subject: [PATCH 083/202] debugging wood chips fuel types --- backend/app/assumptions.py | 2 ++ etl/epc/Dataset.py | 53 +++++++++++++++--------------- etl/epc/Record.py | 4 ++- recommendations/Recommendations.py | 2 +- 4 files changed, 33 insertions(+), 28 deletions(-) diff --git a/backend/app/assumptions.py b/backend/app/assumptions.py index bc8f9cc6..492b9042 100644 --- a/backend/app/assumptions.py +++ b/backend/app/assumptions.py @@ -86,6 +86,8 @@ DESCRIPTIONS_TO_FUEL_TYPES = { "cop": AVERAGE_ASHP_EFFICIENCY / 100}, "Ground source heat pump, underfloor, electric": {"fuel": "Electricity", "cop": AVERAGE_ASHP_EFFICIENCY / 100}, "Electric ceiling heating": {"fuel": "Electricity", "cop": 1}, + "Boiler and radiators, wood chips": {"fuel": "Wood Logs", "cop": 0.85}, + "Oil range cooker, no cylinder thermostat": {"fuel": "Oil", "cop": 0.85}, } # These are the measure types where if there is a ventilation recommendation, we force the inclusion of it diff --git a/etl/epc/Dataset.py b/etl/epc/Dataset.py index 62825cc5..3fa84136 100644 --- a/etl/epc/Dataset.py +++ b/etl/epc/Dataset.py @@ -212,11 +212,11 @@ class TrainingDataset(BaseDataset): common_cols = [[col + "_starting", col + "_ending"] for col in common_cols] self.df = self.df.loc[ - :, - no_suffix_cols - + only_ending_cols - + [col for cols in common_cols for col in cols], - ] + :, + no_suffix_cols + + only_ending_cols + + [col for cols in common_cols for col in cols], + ] def _remove_abnormal_change_in_floor_area(self): """ @@ -394,12 +394,13 @@ class TrainingDataset(BaseDataset): axis=1, ) - roof_starting_uvalue = self.df["roof_thermal_transmittance"].fillna( - roof_starting_uvalue - ) - roof_ending_uvalue = self.df["roof_thermal_transmittance_ending"].fillna( - roof_ending_uvalue - ) + roof_starting_uvalue = pd.to_numeric( + self.df["roof_thermal_transmittance"], errors="coerce" + ).fillna(roof_starting_uvalue) + + roof_ending_uvalue = pd.to_numeric( + self.df["roof_thermal_transmittance_ending"], errors="coerce" + ).fillna(roof_ending_uvalue) # ~~~~~~~~~~~~~~~~~~ # Floor @@ -459,20 +460,20 @@ class TrainingDataset(BaseDataset): axis=1, ) - floor_starting_uvalue = self.df["floor_thermal_transmittance"].fillna( - floor_starting_uvalue - ) - floor_ending_uvalue = self.df["floor_thermal_transmittance_ending"].fillna( - floor_ending_uvalue - ) + floor_starting_uvalue = pd.to_numeric( + self.df["floor_thermal_transmittance"], errors="coerce" + ).fillna(floor_starting_uvalue) + floor_ending_uvalue = pd.to_numeric( + self.df["floor_thermal_transmittance_ending"], errors="coerce" + ).fillna(floor_ending_uvalue) for component in ["walls", "roof", "floor"]: - self.df[f"{component}_thermal_transmittance"] = self.df[ - f"{component}_thermal_transmittance" - ].fillna(eval(f"{component}_starting_uvalue")) - self.df[f"{component}_thermal_transmittance_ending"] = self.df[ - f"{component}_thermal_transmittance_ending" - ].fillna(eval(f"{component}_ending_uvalue")) + self.df[f"{component}_thermal_transmittance"] = pd.to_numeric( + self.df[f"{component}_thermal_transmittance"], errors="coerce" + ).fillna(eval(f"{component}_starting_uvalue")) + self.df[f"{component}_thermal_transmittance_ending"] = pd.to_numeric( + self.df[f"{component}_thermal_transmittance_ending"], errors="coerce" + ).fillna(eval(f"{component}_ending_uvalue")) self.df = self.df.drop( columns=[ @@ -521,7 +522,7 @@ class TrainingDataset(BaseDataset): expanded_df["is_sandstone_or_limestone"] == expanded_df["is_sandstone_or_limestone_ending"] ) - ] + ] elif component == "floor": expanded_df = expanded_df[ (expanded_df["is_suspended"] == expanded_df["is_suspended_ending"]) @@ -538,7 +539,7 @@ class TrainingDataset(BaseDataset): expanded_df["is_to_external_air"] == expanded_df["is_to_external_air_ending"] ) - ] + ] elif component == "roof": expanded_df = expanded_df[ (expanded_df["is_pitched"] == expanded_df["is_pitched_ending"]) @@ -551,7 +552,7 @@ class TrainingDataset(BaseDataset): expanded_df["has_dwelling_above"] == expanded_df["has_dwelling_above_ending"] ) - ] + ] return expanded_df diff --git a/etl/epc/Record.py b/etl/epc/Record.py index ec4dad96..c1c3ff67 100644 --- a/etl/epc/Record.py +++ b/etl/epc/Record.py @@ -38,6 +38,8 @@ DATA_BUCKET = os.environ.get( "DATA_BUCKET", "retrofit-data-dev" if ENVIRONMENT == "dev" else None ) +pd.set_option("future.no_silent_downcasting", True) + @dataclass class EPCRecord: @@ -392,7 +394,7 @@ class EPCRecord: floor_height_data = self.cleaning_data[ (self.cleaning_data["property_type"] == self.prepared_epc["property-type"]) & (self.cleaning_data["built_form"] == self.prepared_epc["built-form"]) - ] + ] average = floor_height_data["floor_height"].mean() sd = floor_height_data["floor_height"].std() # If we're in the top 0.5 percentile of floor heights, we'll set it to the average diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py index 20f0c760..5d97a554 100644 --- a/recommendations/Recommendations.py +++ b/recommendations/Recommendations.py @@ -744,7 +744,7 @@ class Recommendations: # fairly regularly. A task has been added to planner to refactor this # We have observed an edge case where the fuel is described as not being community # but the hot water is. We handle as such - logger.warning("Hot water description not mapped: %s", heating_description) + logger.warning("Hot water description not mapped: %s", hotwater_description) mapped_hotwater = {"fuel": 'Unmapped', "cop": 0.9} return { From e29e96e6177c57aff7ae661bd399f76f4357c7ef Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 28 Nov 2025 07:48:52 +0000 Subject: [PATCH 084/202] fixed errors with logs url --- backend/app/plan/utils.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/backend/app/plan/utils.py b/backend/app/plan/utils.py index c18968f9..d8c54963 100644 --- a/backend/app/plan/utils.py +++ b/backend/app/plan/utils.py @@ -240,8 +240,7 @@ def build_cloudwatch_log_url(start_ms: int) -> str: def handle_error(session, msg, e, subtask_id, status=500, start_ms=None): # When the pipeline fails, handles error process - end_ms = int(time.time() * 1000) - cloud_logs_url = build_cloudwatch_log_url(start_ms, end_ms) + cloud_logs_url = build_cloudwatch_log_url(start_ms) SubTaskInterface().update_subtask_status( subtask_id=UUID(subtask_id), From 54b472dd473742171ccd20ffdc31bfdf58c0ca17 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 28 Nov 2025 08:03:37 +0000 Subject: [PATCH 085/202] fixed cloud logs error --- backend/app/db/functions/address_functions.py | 3 +++ backend/engine/engine.py | 23 +++++++++++++++---- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/backend/app/db/functions/address_functions.py b/backend/app/db/functions/address_functions.py index 34dc48c7..b04f14c9 100644 --- a/backend/app/db/functions/address_functions.py +++ b/backend/app/db/functions/address_functions.py @@ -40,6 +40,9 @@ def get_associated_uprns(session: Session, postcode: str, uprn: str): .filter(func.upper(PostcodeSearch.postcode) == postcode) .first() ) + if not record: + # No record found for this postcode + return [] matched_record = _get_associated_records(results=record.result_data["results"], uprn=uprn) diff --git a/backend/engine/engine.py b/backend/engine/engine.py index ebb0a6b8..e9cb86ea 100644 --- a/backend/engine/engine.py +++ b/backend/engine/engine.py @@ -449,6 +449,10 @@ async def model_engine(body: PlanTriggerRequest): plan_input["built_form"] = plan_input["landlord_built_form"].copy() else: plan_input["built_form"] = None + + if "epc_property_type" not in plan_input.columns: + plan_input["epc_property_type"] = None + plan_input["property_type"] = np.where( plan_input["property_type"] == "unknown", plan_input["epc_property_type"], @@ -468,7 +472,7 @@ async def model_engine(body: PlanTriggerRequest): "bungalow": "Bungalow", "block house": "House", "coach house": "House", - "bedsit": "Flat" + "bedsit": "Flat", } built_form_map = { @@ -480,8 +484,19 @@ async def model_engine(body: PlanTriggerRequest): "enclosed mid-terrace": "Enclosed Mid-Terrace", } # We remap the values to match the EPC expected formats - plan_input["property_type"] = plan_input["property_type"].map(property_type_map) - plan_input["built_form"] = plan_input["built_form"].map(built_form_map) + + # This syntax will actually retain any original values, if they don't get mapped + plan_input["property_type"] = ( + plan_input["property_type"] + .map(property_type_map) + .fillna(plan_input["property_type"]) + ) + + plan_input["built_form"] = ( + plan_input["built_form"] + .map(built_form_map) + .fillna(plan_input["built_form"]) + ) plan_input = plan_input.to_dict("records") @@ -669,7 +684,7 @@ async def model_engine(body: PlanTriggerRequest): address=epc_searcher.address_clean, postcode=epc_searcher.postcode_clean, epc_record=prepared_epc, - already_installed=req_data.already_installed + eco_packages[property_id][3], + already_installed=req_data.already_installed + eco_packages.get(property_id)[3], property_valuation=req_data.valuation, non_invasive_recommendations=property_non_invasive_recommendations, energy_assessment=energy_assessment, From ba230a2ff8f23809f9c86aa20324da7d98cab0d6 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 28 Nov 2025 12:43:02 +0000 Subject: [PATCH 086/202] stringify task id to fix bugs --- backend/Property.py | 52 ++++++++++-------- backend/app/assumptions.py | 1 + backend/app/plan/router.py | 4 +- .../Nov 2025 Consulting Project/data_prep.py | 53 ++++++++++++++++++- 4 files changed, 83 insertions(+), 27 deletions(-) diff --git a/backend/Property.py b/backend/Property.py index 58909c40..6148b40a 100644 --- a/backend/Property.py +++ b/backend/Property.py @@ -598,15 +598,7 @@ class Property: if not self.data: raise ValueError("Property does not contain data") - components = [ - 'floor-description', 'hotwater-description', 'main-fuel', 'mainheat-description', - 'mainheatcont-description', 'roof-description', 'walls-description', 'windows-description', - 'lighting-description' - ] - - for description in components: - - cleaner_cls = all_cleaner_map[description] + for description, attribute in cleaned.items(): if self.data[description] in self.DATA_ANOMALY_MATCHES: template = cleaned[description][0] @@ -624,22 +616,35 @@ class Property: ) continue - if description == "lighting-description": - cleaner_cls = cleaner_cls(self.data[description], averages=None) - else: - cleaner_cls = cleaner_cls(self.data[description]) + attributes = [ + x + for x in cleaned[description] + if x["original_description"] == self.data[description] + ] - processed = { - "original_description": self.data[description], - "clean_description": cleaner_cls.description.replace( - "(assumed)", "" + if len(attributes) > 1: + raise ValueError( + "Either No attributes or multiple found for %s" % description ) - .rstrip() - .capitalize(), - **cleaner_cls.process(), - } - attributes = [processed] + if len(attributes) == 0: + # We attempt to perform the clean on the fly + cleaner_cls = all_cleaner_map[description] + if description == "lighting-description": + cleaner_cls = cleaner_cls(self.data[description], averages=None) + else: + cleaner_cls = cleaner_cls(self.data[description]) + processed = { + "original_description": self.data[description], + "clean_description": cleaner_cls.description.replace( + "(assumed)", "" + ) + .rstrip() + .capitalize(), + **cleaner_cls.process(), + } + + attributes = [processed] setattr(self, self.ATTRIBUTE_MAP[description], attributes[0]) @@ -1340,7 +1345,8 @@ class Property: # If the property currently has an electric boiler, it will still benefit from the ASHP efficiency gain remap_fuel_sources = [ "Natural Gas", "LPG", "Wood Logs", "Oil", "Electricity", "Coal", "Smokeless Fuel", - "Natural Gas + Solar Thermal", "Anthracite", "Wood Pellets", "LPG + Solar Thermal" + "Natural Gas + Solar Thermal", "Anthracite", "Wood Pellets", "LPG + Solar Thermal", + "Natural Gas (Community Scheme)" ] heating_energy_source = self.heating_energy_source diff --git a/backend/app/assumptions.py b/backend/app/assumptions.py index 492b9042..1c46a5c8 100644 --- a/backend/app/assumptions.py +++ b/backend/app/assumptions.py @@ -88,6 +88,7 @@ DESCRIPTIONS_TO_FUEL_TYPES = { "Electric ceiling heating": {"fuel": "Electricity", "cop": 1}, "Boiler and radiators, wood chips": {"fuel": "Wood Logs", "cop": 0.85}, "Oil range cooker, no cylinder thermostat": {"fuel": "Oil", "cop": 0.85}, + "Air source heat pump, Warm air, electric": {"fuel": "Electricity", "cop": AVERAGE_ASHP_EFFICIENCY / 100}, } # These are the measure types where if there is a ventilation recommendation, we force the inclusion of it diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index 5611a53d..b53da6c6 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -135,8 +135,8 @@ async def trigger_plan_entrypoint(body: PlanTriggerRequest): inputs=data, task_only=False ) - data["task_id"] = task_id - data["subtask_id"] = subtask_id + data["task_id"] = str(task_id) + data["subtask_id"] = str(subtask_id) message_body = json.dumps(data) response = sqs_client.send_message( QueueUrl=settings.ENGINE_SQS_URL, diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/data_prep.py b/etl/customers/peabody/Nov 2025 Consulting Project/data_prep.py index 77736aff..e05d82e4 100644 --- a/etl/customers/peabody/Nov 2025 Consulting Project/data_prep.py +++ b/etl/customers/peabody/Nov 2025 Consulting Project/data_prep.py @@ -214,14 +214,26 @@ archetypes_85 = archetypes[archetypes["Cumulative Proportion"] <= 0.80] archetypes_85["Archetypes_85_reference"] = archetypes_85.index + 1 archetypes_85["Archetypes_85_reference"] = "Archetype_Sample_" + archetypes_85["Archetypes_85_reference"].astype(str) +# For the sample, look for invalid looking UPRNs and remove them. +sample_from = sustainability_data.copy() +# 1) Check for UPRNs that are not numeric or begin with a Zero +sample_from["uprn_not_numeric"] = ~sample_from["UPRN"].apply(lambda x: str(x).isnumeric()) +sample_from = sample_from[~sample_from["uprn_not_numeric"]] + +sample_from["uprn_has_leading_zero"] = sample_from["UPRN"].apply(lambda x: str(x).startswith("0")) +sample_from = sample_from[~sample_from["uprn_has_leading_zero"]] + +sample_from = sample_from[~pd.isnull(sample_from["UPRN"])] + # We now take a sample of the properties that represent 85% of the total properties -sustainability_data = sustainability_data.merge( +sample_from = sample_from.merge( archetypes_85, on=archetype_variables, how="inner" ) + # We take 1 random property, by archetype 85 reference -modelling_sample = sustainability_data.groupby("Archetypes_85_reference").apply( +modelling_sample = sample_from.groupby("Archetypes_85_reference").apply( lambda x: x.sample(1, random_state=42) ).reset_index(drop=True) @@ -238,6 +250,43 @@ for col in archetype_variables: print(f"--- {col} ---") print(compare_distributions(sustainability_data, modelling_sample, col)) +# prepare +modelling_sample["domna_property_id"] = modelling_sample.index + 1 +# Rename +modelling_sample = modelling_sample.rename( + columns={ + "Org Ref": "landlord_property_id", "Address 1": "domna_address_1", + "Postcode": "postcode", "Type": "landlord_property_type", + "Attachment": "landlord_built_form", + "Heating": "landlord_heating_system", + "UPRN": "epc_os_uprn" + } +) + +modelling_sample["domna_built_form"] = modelling_sample["domna_built_form"].map( + { + "MidTerrace": "Mid-Terrace", + "EndTerrace": "End-Terrace", + "SemiDetached": "Semi-Detached", + "Detached": "Detached", + "EnclosedEndTerrace": "Enclosed End-Terrace", + "EnclosedMidTerrace": "Enclosed Mid-Terrace", + } +) + +if pd.isnull(modelling_sample["domna_built_form"]).sum(): + raise ValueError("Some built forms are null after mapping") + + +# Placeholder copies +def make_full_address(x): + to_join = [x['domna_address_1'], x['Address 2'], x['Address 3']] + to_join = [x for x in to_join if not pd.isnull(x) and x != ''] + return ", ".join(to_join) + + +modelling_sample["domna_full_address"] = modelling_sample.apply(lambda x: make_full_address(x), axis=1) + # Save this CSV as input modelling_sample.to_excel( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/modelling_sample.xlsx", From 1e9aa98ffc127e9bbef798ec0164e9d2568f04b9 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 28 Nov 2025 17:02:32 +0000 Subject: [PATCH 087/202] debugging fuel types and a zero floor area property --- backend/Property.py | 10 +++++++++- etl/epc/Record.py | 3 +++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/backend/Property.py b/backend/Property.py index 6148b40a..cbc762e6 100644 --- a/backend/Property.py +++ b/backend/Property.py @@ -1212,9 +1212,17 @@ class Property: else: self.heating_energy_source = ['Wood Logs'] - if len(self.heating_energy_source) == 0 or len(self.heating_energy_source) > 1: + if len(self.heating_energy_source) == 0 or ( + len(self.heating_energy_source) > 1 and "Varied (Community Scheme)" not in self.heating_energy_source + ): + # We might have something like heating energy source equal to ['Natural Gas', 'Varied (Community Scheme)'] + # so we treat this as community heating raise Exception("Investigate me") + if len(self.heating_energy_source) > 1: + # We treat this as a community scheme + self.heating_energy_source = ["Varied (Community Scheme)"] + self.heating_energy_source = self.heating_energy_source[0] if self.heating_energy_source == "Varied (Community Scheme)": diff --git a/etl/epc/Record.py b/etl/epc/Record.py index c1c3ff67..e1853361 100644 --- a/etl/epc/Record.py +++ b/etl/epc/Record.py @@ -596,6 +596,9 @@ class EPCRecord: if not self.prepared_epc: raise ValueError("EPC Recrod doesn not contain epc data") + if self.prepared_epc["total-floor-area"] is None: + return + self.prepared_epc["total-floor-area"] = float( self.prepared_epc["total-floor-area"] ) From 1f267e7d47322dfae0d14d6e94e9e68911454c37 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 28 Nov 2025 18:15:56 +0000 Subject: [PATCH 088/202] debugging for peabody - edge case properties and added placeholder averages cleaning --- backend/Property.py | 40 ++++++++++++++++++------ backend/engine/engine.py | 67 +++++++++++++++++++++++++++++++++++++--- 2 files changed, 93 insertions(+), 14 deletions(-) diff --git a/backend/Property.py b/backend/Property.py index cbc762e6..ae79f250 100644 --- a/backend/Property.py +++ b/backend/Property.py @@ -1192,6 +1192,14 @@ class Property: 'oil range cooker': 'Oil' } + fuel_map = { + None: "Natural Gas (Community Scheme)", + "mains gas": "Natural Gas (Community Scheme)", + "biomass": "Smokeless Fuel", + "electricity": "Electricity", + "biogas": "Smokeless Fuel", + } + self.heating_energy_source = list({ fuel for key, fuel in heating_fuel_mapping.items() if self.main_heating.get(key, False) }) @@ -1212,13 +1220,31 @@ class Property: else: self.heating_energy_source = ['Wood Logs'] - if len(self.heating_energy_source) == 0 or ( - len(self.heating_energy_source) > 1 and "Varied (Community Scheme)" not in self.heating_energy_source - ): + if len(self.heating_energy_source) > 1 and "Varied (Community Scheme)" not in self.heating_energy_source: # We might have something like heating energy source equal to ['Natural Gas', 'Varied (Community Scheme)'] # so we treat this as community heating raise Exception("Investigate me") + if len(self.heating_energy_source) == 0: + heating_flags = { + v for k, v in self.main_heating.items() if k not in ["original_description", "clean_description"] + } + hotwater_flags = { + v for k, v in self.hotwater.items() if k not in ["original_description", "clean_description"] + } + + # If all flags are zero, we have a no data example + if (heating_flags == {False} or hotwater_flags == {None}) and ( + hotwater_flags == {False} or hotwater_flags == {None}): + # We have nodata so we try and rely on main fuel + if self.main_fuel["fuel_type"] in fuel_map: # We assume when None as it's unknown + mapped_fuel = fuel_map[self.main_fuel["fuel_type"]] + self.heating_energy_source = mapped_fuel + self.hot_water_energy_source = mapped_fuel + return + else: + raise NotImplementedError(f"Unhandled fuel {self.main_fuel['fuel_type']}") + if len(self.heating_energy_source) > 1: # We treat this as a community scheme self.heating_energy_source = ["Varied (Community Scheme)"] @@ -1226,13 +1252,7 @@ class Property: self.heating_energy_source = self.heating_energy_source[0] if self.heating_energy_source == "Varied (Community Scheme)": - fuel_map = { - None: "Natural Gas (Community Scheme)", - "mains gas": "Natural Gas (Community Scheme)", - "biomass": "Smokeless Fuel", - "electricity": "Electricity", - "biogas": "Smokeless Fuel", - } + if self.main_fuel["fuel_type"] in fuel_map: # We assume when None as it's unknown self.heating_energy_source = fuel_map[self.main_fuel["fuel_type"]] else: diff --git a/backend/engine/engine.py b/backend/engine/engine.py index e9cb86ea..217be3c3 100644 --- a/backend/engine/engine.py +++ b/backend/engine/engine.py @@ -405,6 +405,65 @@ def check_duplicate_uprns(plan_input): return True +def averages_cleaning(prepared_epc: EPCRecord, cleaning_data: pd.DataFrame): + """ + Placeholder cleaning function to handle edge cases where we have missing data for + number of habitable rooms, number of heated rooms and floor height. We take the median + This need was born out of the Peabody project + :param prepared_epc: + :param cleaning_data: + :return: + """ + + if not pd.isnull(prepared_epc.prepared_epc["number_habitable_rooms"]) and not pd.isnull( + prepared_epc.prepared_epc["number_heated_rooms"]) and not pd.isnull(prepared_epc.prepared_epc["floor_height"]): + # Nothing to do + return prepared_epc + + # Clean with cleaning_data + clean_with = cleaning_data[ + (cleaning_data["property_type"] == prepared_epc.prepared_epc["property_type"]) & + (cleaning_data["property_type"] == prepared_epc.prepared_epc["property_type"]) + ] + if prepared_epc.prepared_epc["local_authority"] in clean_with["local_authority"].values: + clean_with = clean_with[ + clean_with["local_authority"] == prepared_epc.prepared_epc["local_authority"] + ] + + floor_area_clean_with = clean_with[ + (clean_with["total_floor_area"] <= prepared_epc.prepared_epc["total_floor_area"] * 1.1) & + (clean_with["total_floor_area"] >= prepared_epc.prepared_epc["total_floor_area"] * 0.9) + ] + + if not floor_area_clean_with.empty: + clean_with = floor_area_clean_with + + clean_n_habitable_rooms = int(round(clean_with["number_habitable_rooms"].median())) + clean_n_heated_rooms = int(round(clean_with["number_heated_rooms"].median())) + if clean_n_heated_rooms > clean_n_habitable_rooms: + clean_n_heated_rooms = clean_n_habitable_rooms + + clean_floor_height = clean_with["floor_height"].median() + + # We now fill + if not pd.isnull(clean_n_habitable_rooms) and pd.isnull( + prepared_epc.prepared_epc["number_habitable_rooms"]): + prepared_epc.prepared_epc["number_habitable_rooms"] = clean_n_habitable_rooms + prepared_epc.number_habitable_rooms = clean_n_habitable_rooms + + if not pd.isnull(clean_n_heated_rooms) and pd.isnull( + prepared_epc.prepared_epc["number_heated_rooms"]): + prepared_epc.prepared_epc["number_heated_rooms"] = clean_n_heated_rooms + prepared_epc.number_heated_rooms = clean_n_heated_rooms + + if not pd.isnull(clean_floor_height) and pd.isnull( + prepared_epc.prepared_epc["floor_height"]): + prepared_epc.prepared_epc["floor_height"] = clean_floor_height + prepared_epc.floor_height = clean_floor_height + + return prepared_epc + + async def model_engine(body: PlanTriggerRequest): logger.info("Model Engine triggered with body: %s", json.loads(body.model_dump_json())) @@ -669,6 +728,10 @@ async def model_engine(body: PlanTriggerRequest): cleaning_data=cleaning_data, ) + # TODO: This is a temp function to handle a specific edge case with Peabody. We should + # factor this into EPCRecord as part of the cleaning however we need some more testing + prepared_epc = averages_cleaning(prepared_epc, cleaning_data) + # If we have an ECO project, we parse the cavity/solar reasons eco_packages[property_id] = parse_eco_packages(config, prepared_epc) @@ -756,10 +819,6 @@ async def model_engine(body: PlanTriggerRequest): input_properties = OpenUprnClient.set_spatial_data(input_properties, bucket_name=get_settings().DATA_BUCKET) [p.set_features(cleaned=cleaned, kwh_client=kwh_client, kwh_predictions=kwh_preds) for p in input_properties] - # TODO: If a property is semi-detached, we might get roof surfaces for the main building + the neighbour - # TODO: If we can't get high image quality, should we use the solar API? Maybe just for semi-detached units with - # extensions, since it doesn't seem to do a great job - logger.info("Performing solar analysis") ofgem_consumption_averages = read_dataframe_from_s3_parquet( From f791a5328d4ee43800378c9e6259491bc0542b70 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 28 Nov 2025 18:59:52 +0000 Subject: [PATCH 089/202] removed potential wiping of task start timestamp --- backend/app/db/functions/tasks/Tasks.py | 5 ++--- backend/app/plan/router.py | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/backend/app/db/functions/tasks/Tasks.py b/backend/app/db/functions/tasks/Tasks.py index 4aa7fb43..d1ab9536 100644 --- a/backend/app/db/functions/tasks/Tasks.py +++ b/backend/app/db/functions/tasks/Tasks.py @@ -194,13 +194,12 @@ class SubTaskInterface: elif "in progress" in statuses: task.status = "in progress" - if task.job_started is None: - task.job_started = now + # if task.job_started is None: + # task.job_started = now else: # All waiting task.status = "waiting" - task.job_started = None task.job_completed = None task.updated_at = now diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index b53da6c6..9216eac0 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -133,7 +133,7 @@ async def trigger_plan_entrypoint(body: PlanTriggerRequest): task_source="backend/plan/router.py:trigger_plan_entrypoint", service="plan_engine", inputs=data, - task_only=False + task_only=False, ) data["task_id"] = str(task_id) data["subtask_id"] = str(subtask_id) From a8417efda4edd4698d49f2b0af3e0966b35e1836 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sat, 29 Nov 2025 08:19:24 +0000 Subject: [PATCH 090/202] handling of SAP05 records --- backend/Property.py | 2 + backend/SearchEpc.py | 73 ++++++++++++++++++++++++++-- backend/app/db/models/materials.py | 3 ++ backend/app/db/models/portfolio.py | 1 + backend/engine/engine.py | 49 +++++-------------- etl/find_my_epc/RetrieveFindMyEpc.py | 45 ++++++++++++++++- 6 files changed, 131 insertions(+), 42 deletions(-) diff --git a/backend/Property.py b/backend/Property.py index ae79f250..bc5660e8 100644 --- a/backend/Property.py +++ b/backend/Property.py @@ -889,6 +889,8 @@ class Property: "current_energy_demand": self.current_energy_consumption, "current_energy_demand_heating_hotwater": self.current_energy_consumption_heating_hotwater, "estimated": self.data.get("estimated", False), + # We indicate if we've overwritten a SAP 05 EPC + "sap_05_overwritten": self.data.get("sap_05_overwritten", False), **self.current_energy_bill } diff --git a/backend/SearchEpc.py b/backend/SearchEpc.py index c47e82c4..61c9cc30 100644 --- a/backend/SearchEpc.py +++ b/backend/SearchEpc.py @@ -199,7 +199,7 @@ class SearchEpc: ) self.data = None - self.newest_epc = None + self.newest_epc = {} self.older_epcs = None self.full_sap_epc = None self.metadata = None @@ -214,6 +214,9 @@ class SearchEpc: self.property_type = property_type self.fast = fast + # By default, this is set to false. This flag indicates whether we should overwrite SAP 2005 entires. + self.overwrite_sap05 = False + def set_strict_property_type_search(self): """ This method sets the strict property type search flag to True. When this flag is set, the search will @@ -531,6 +534,9 @@ class SearchEpc: if uprns: uprn = uprns.pop() + # Convert to int + if not pd.isnull(uprn): + uprn = int(uprn) else: newest_epc["uprn-source"] = self.UPRN_SOURCE_SIMULATED uprn = hash(self.address1 + self.postcode) @@ -649,6 +655,11 @@ class SearchEpc: epc_data["lodgement-datetime"] > (pd.Timestamp.now() - pd.DateOffset(years=10)) ] + # Regardless of whether or not we exclude old, we drop any SAP05 entries, which will be problematic + # if we include them + if not epc_data.empty: + epc_data = epc_data[~epc_data["mainheat-description"].str.lower().str.contains("sap05:")] + if not epc_data.empty: # Further processing of the EPC data @@ -694,6 +705,18 @@ class SearchEpc: estimation_built_form = "End-Terraced" elif (built_form == "") or (pd.isnull(built_form)): estimation_built_form = epc_built_form + elif built_form == "Enclosed Mid-Terrace": + # We check if we have any enclosed and if not, we fall back to mid-terrace + if sum(epc_data["built-form"] == "Enclosed Mid-Terrace") > 0: + estimation_built_form = "Enclosed Mid-Terrace" + else: + estimation_built_form = "Mid-Terrace" + elif built_form == "Enclosed End-Terrace": + # An enclosed end terrace has three two external facing walls so we fall back to mid-terrace + if sum(epc_data["built-form"] == "Enclosed End-Terrace") > 0: + estimation_built_form = "Enclosed Mid-Terrace" + else: + estimation_built_form = "Mid-Terrace" else: estimation_built_form = built_form @@ -917,7 +940,7 @@ class SearchEpc: return agg[key].values[0] - def find_property(self, skip_os=False, api_data=None): + def find_property(self, skip_os=False, api_data=None, overwrite_sap05=False): """ This method will attempt to identify a property. It will, at first, use the EPC api to try and find the EPC for the property and the associated UPRN. If this fails, it will use the Ordnance Survey API to @@ -931,6 +954,10 @@ class SearchEpc: :param skip_os: If True, the ordnance survey api will be skipped and only the EPC api will be used :param api_data: If provided, this data will be used instead of querying the EPC api + :param overwrite_sap05: For extrememly old, SAP05 EPCs, we may wish to overwrite them with an estimated EPC. + This is because the SAP05 EPCs will have missing information such as the main heating + will be described as SAP05:Main-Heating, which isn't particularly useful for the + purpose of providing recommendations. """ # Step 1: use the epc api to find the property and uprn @@ -944,8 +971,22 @@ class SearchEpc: ( self.newest_epc, self.older_epcs, self.full_sap_epc, self.address_clean, self.postcode_clean, self.uprn ) = self.extract_epc_data(address=self.full_address) + + # Before we return, we check if we need to overwrite a SAP05 EPC + # If we have don't have SAP05 in the heating description and overwrite_sap05 is False, we return + is_sap_o5 = "SAP05:" in self.newest_epc.get("mainheat-description", "") + if ( + (not is_sap_o5) and (not overwrite_sap05) and (response["status"] == 200) + ): return + # By default, we don't exclude old but we will do, when we are estimating to overwrite a SAP05 EPC + lmks_to_drop, exclude_old = [], False + if is_sap_o5: + self.overwrite_sap05 = True + lmks_to_drop = [self.newest_epc["lmk-key"]] + exclude_old = True + # Step 2: If we don't have an EPC, we use the ordnance survey api to find the uprn if skip_os: if self.ordnance_survey_client.property_type is not None: @@ -954,10 +995,18 @@ class SearchEpc: property_type=self.ordnance_survey_client.property_type, built_form=self.ordnance_survey_client.built_form, heating_system=self.heating_system, - associated_uprns=self.associated_uprns + associated_uprns=self.associated_uprns, + lmks_to_drop=lmks_to_drop, + exclude_old=exclude_old ) + + if self.overwrite_sap05: + # We keep a record of the fact that we have performed a SAP05 overwrite + estimated_epc["sap_05_overwritten"] = True + + # If we have overwritten a SAP05 EPC, we need to update older_epcs too + self.older_epcs = [] if not self.overwrite_sap05 else [self.newest_epc.copy()] self.newest_epc = estimated_epc - self.older_epcs = [] self.full_sap_epc = {} # Finally, set a standardised address 1 and postcode @@ -1000,6 +1049,20 @@ class SearchEpc: self.postcode_clean = self.ordnance_survey_client.postcode_os return + def set_uprn_source(self, file_format): + """ + Utility function to set the uprn source based on the file format. Only works for domna_asset_lists + and this is very much placeholder until we standardised our input data formats + :param file_format: + :return: + """ + + if not self.newest_epc: + raise ValueError("No EPC data available to set UPRN source - run find_property first") + + if self.newest_epc.get("estimated") and file_format == "domna_asset_list" and (self.newest_epc["uprn"] < 0): + self.newest_epc["uprn-source"] = self.UPRN_SOURCE_SIMULATED + def check_attribute_variations(self): attribute_map = { "walls-description": { @@ -1057,7 +1120,7 @@ class SearchEpc: return "ground" def get_metadata(self): - if self.newest_epc is None: + if not self.newest_epc: raise ValueError("No EPC data available") # We check if the property has ever been downgraded on SAP diff --git a/backend/app/db/models/materials.py b/backend/app/db/models/materials.py index 99759438..8a524491 100644 --- a/backend/app/db/models/materials.py +++ b/backend/app/db/models/materials.py @@ -45,7 +45,10 @@ class MaterialType(enum.Enum): solar_pv = "solar_pv" solar_battery = "solar_battery" scaffolding = "scaffolding" + # Heating systems high_heat_retention_storage_heaters = "high_heat_retention_storage_heaters" + air_soruce_heat_pump = "air_soruce_heat_pump" + boiler_upgrade = "boiler_upgrade" sealing_fireplace = "sealing_fireplace" roomstat_programmer_trvs = "roomstat_programmer_trvs" time_temperature_zone_control = "time_temperature_zone_control" diff --git a/backend/app/db/models/portfolio.py b/backend/app/db/models/portfolio.py index fbe9661b..7fec8c14 100644 --- a/backend/app/db/models/portfolio.py +++ b/backend/app/db/models/portfolio.py @@ -175,6 +175,7 @@ class PropertyDetailsEpcModel(Base): current_energy_demand = Column(Float) current_energy_demand_heating_hotwater = Column(Float) estimated = Column(Boolean, default=False) + sap_05_overwritten = Column(Boolean, default=False) # Include estimates for energy bills, across the different types of energy heating_cost_current = Column(Float) hot_water_cost_current = Column(Float) diff --git a/backend/engine/engine.py b/backend/engine/engine.py index 217be3c3..c0261e57 100644 --- a/backend/engine/engine.py +++ b/backend/engine/engine.py @@ -637,12 +637,8 @@ async def model_engine(body: PlanTriggerRequest): epc_searcher.ordnance_survey_client.property_type = config.get("property_type", None) # For the moment, our OS API access is unavailable, so we skip and interpolate - epc_searcher.find_property(skip_os=True, api_data=epc_api_data) - - if epc_searcher.newest_epc.get("estimated") and body.file_format == "domna_asset_list" and ( - epc_searcher.newest_epc["uprn"] < 0 - ): - epc_searcher.newest_epc["uprn-source"] = epc_searcher.UPRN_SOURCE_SIMULATED + epc_searcher.find_property(skip_os=True, api_data=epc_api_data, overwrite_sap05=True) + epc_searcher.set_uprn_source(file_format=body.file_format) # We check for an energy assessment we have performed on this property: energy_assessment = db_funcs.energy_assessment_functions.get_latest_assessment_by_uprn( @@ -658,12 +654,6 @@ async def model_engine(body: PlanTriggerRequest): if not is_new and not body.multi_plan: continue - if epc_searcher.newest_epc is None: - raise ValueError( - "No EPCs found for this property and did not estimate - likely need to provide a" - "property type and built form" - ) - if is_new: db_funcs.property_functions.create_property_targets( session, @@ -690,35 +680,20 @@ async def model_engine(body: PlanTriggerRequest): uprn=epc_searcher.uprn, ) # Pull this out as it may get overwritten - property_non_invasive_recommendations = req_data.non_invasive_recommendations - patch = req_data.patch + property_non_invasive_recommendations, patch = req_data.non_invasive_recommendations, req_data.patch # if we have a remote assment data type, we pull the additional data and include it epc_page_source = {} if (body.event_type == "remote_assessment") and not (epc_searcher.newest_epc.get("estimated")): - try: - property_non_invasive_recommendations, patch, epc_page_source = RetrieveFindMyEpc.get_from_epc( - epc_searcher.newest_epc, epc_page, rrn=rrn + property_non_invasive_recommendations, patch, epc_page_source = ( + RetrieveFindMyEpc.get_from_epc_with_fallback( + epc=epc_searcher.newest_epc, + epc_page=epc_page, + rrn=rrn, + cleaned_address=epc_searcher.address_clean, + config_address=config["address"] ) - except Exception as e: - logger.error(f"Failed to retrieve without cleaning address {e}") - try: - epc_to_use = deepcopy(epc_searcher.newest_epc) - for k in ["address", "address1"]: - epc_to_use[k] = epc_searcher.address_clean - property_non_invasive_recommendations, patch, epc_page_source = RetrieveFindMyEpc.get_from_epc( - epc_to_use, epc_page, rrn=rrn - ) - except Exception as e: - # Final attempt - logger.error(f"Failed to retrieve without cleaning address {e}") - epc_to_use = deepcopy(epc_searcher.newest_epc) - for k in ["address", "address1"]: - epc_to_use[k] = config["address"] - property_non_invasive_recommendations, patch, epc_page_source = RetrieveFindMyEpc.get_from_epc( - epc_to_use, epc_page, rrn=rrn - ) - # If we have a property type, this means when we pull the epc data, we might need to make a patch + ) epc_records = patch_epc(patch, epc_records) @@ -1190,6 +1165,8 @@ async def model_engine(body: PlanTriggerRequest): # when we have buildings, we tweak our solar PV recommendations as if one unit needs it, we apply it to all # of them # TODO: We can probably do better and optimise at the building level - this is temp + # Idea: - optimise all measures except solar at the unit level. Then, test with and without solar for + # all units at the same time logger.info("Adjusting solar PV recommendations for buildings") building_ids = set([p.building_id for p in input_properties if p.building_id is not None]) diff --git a/etl/find_my_epc/RetrieveFindMyEpc.py b/etl/find_my_epc/RetrieveFindMyEpc.py index 519c3e52..e28af4f5 100644 --- a/etl/find_my_epc/RetrieveFindMyEpc.py +++ b/etl/find_my_epc/RetrieveFindMyEpc.py @@ -1,7 +1,8 @@ import time import re -import pandas as pd import requests +import pandas as pd +from copy import deepcopy from bs4 import BeautifulSoup from datetime import datetime @@ -697,6 +698,7 @@ class RetrieveFindMyEpc: "Increase loft insulation to 250mm": ["loft_insulation"], "Solar photovoltaics panels, 25% of roof area": ["solar_pv"], 'Air or ground source heat pump': ["air_source_heat_pump"], + "Add PV Battery": ["solar_pv_battery"], } survey = True @@ -777,3 +779,44 @@ class RetrieveFindMyEpc: } return non_invasive_recommendations, patch, page_source + + @classmethod + def get_from_epc_with_fallback( + cls, epc, epc_page, rrn, cleaned_address=None, config_address=None + ): + """ + Attempt get_from_epc with: + 1) Original EPC + 2) EPC with cleaned address + 3) EPC with configured address + in that order. + """ + + # The data we'll use to attempt retrieval + # 1) Original + attempts = [epc] + + # 2) Cleaned + if cleaned_address: + modified = deepcopy(epc) + for k in ["address", "address1"]: + modified[k] = cleaned_address + attempts.append(modified) + + # 3) Config address fallback + if config_address: + modified = deepcopy(epc) + for k in ["address", "address1"]: + modified[k] = config_address + attempts.append(modified) + + # Iterate attempts + last_error = None + for idx, attempt in enumerate(attempts, start=1): + try: + return cls.get_from_epc(attempt, epc_page, rrn=rrn) + except Exception as e: + last_error = e + logger.error(f"Attempt {idx} failed: {e}") + + raise RuntimeError(f"All EPC retrieval attempts failed: {last_error}") From 720720c4d0e638a2aded1b7f98ee76b6531c6f88 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sun, 30 Nov 2025 18:16:09 +0000 Subject: [PATCH 091/202] implemented some handling for mixed translation descriptions --- backend/Funding.py | 2 + backend/SearchEpc.py | 12 ++-- backend/app/assumptions.py | 1 + etl/epc/Dataset.py | 5 +- etl/epc_clean/app.py | 1 - .../epc_attributes/FloorAttributes.py | 4 ++ .../epc_attributes/HotWaterAttributes.py | 5 +- .../epc_attributes/LightingAttributes.py | 5 +- .../epc_attributes/MainFuelAttributes.py | 6 +- .../epc_attributes/MainheatAttributes.py | 20 ++++++- .../MainheatControlAttributes.py | 4 +- .../epc_attributes/RoofAttributes.py | 7 ++- .../epc_attributes/WallAttributes.py | 55 ++++++++++++++----- .../epc_attributes/WindowAttributes.py | 5 +- .../epc_attributes/attribute_utils.py | 16 ++++++ recommendations/recommendation_utils.py | 14 ++++- 16 files changed, 126 insertions(+), 36 deletions(-) diff --git a/backend/Funding.py b/backend/Funding.py index fe5165f6..4ec57f8a 100644 --- a/backend/Funding.py +++ b/backend/Funding.py @@ -368,6 +368,8 @@ class Funding: starting_str = "1.7" elif closest_starting == 1: starting_str = "1.0" + elif closest_starting == 0.6: + starting_str = "0.6" else: starting_str = f"{closest_starting:.2f}" diff --git a/backend/SearchEpc.py b/backend/SearchEpc.py index 61c9cc30..6cb767e7 100644 --- a/backend/SearchEpc.py +++ b/backend/SearchEpc.py @@ -803,12 +803,13 @@ class SearchEpc: # Check if it's a new build EPC. A property that doesn't have an EPC is not going to be a new build # so we avoid comparing it to new builds - # TODO - this is experimental + # TODO - this is experimental - if we have the year the property was built, we should utilise that + # here newer_age_bands = [ "England and Wales: 1996-2002", "England and Wales: 2003-2006", "England and Wales: 2007-2011", "England and Wales: 2012 onwards" ] - + # We also remove EPCs that are for new dwellings if (~epc_data["construction-age-band"].isin(newer_age_bands)).sum(): # We have some older age bands, so we need to filter them out epc_data = epc_data[~epc_data["construction-age-band"].isin(newer_age_bands)].copy() @@ -975,9 +976,10 @@ class SearchEpc: # Before we return, we check if we need to overwrite a SAP05 EPC # If we have don't have SAP05 in the heating description and overwrite_sap05 is False, we return is_sap_o5 = "SAP05:" in self.newest_epc.get("mainheat-description", "") - if ( - (not is_sap_o5) and (not overwrite_sap05) and (response["status"] == 200) - ): + good_data = not is_sap_o5 and (response["status"] == 200) + + if good_data or not overwrite_sap05: + # If the data is fine, or we're preventing SAP05 overwrites, we just exit here return # By default, we don't exclude old but we will do, when we are estimating to overwrite a SAP05 EPC diff --git a/backend/app/assumptions.py b/backend/app/assumptions.py index 1c46a5c8..8edc03db 100644 --- a/backend/app/assumptions.py +++ b/backend/app/assumptions.py @@ -89,6 +89,7 @@ DESCRIPTIONS_TO_FUEL_TYPES = { "Boiler and radiators, wood chips": {"fuel": "Wood Logs", "cop": 0.85}, "Oil range cooker, no cylinder thermostat": {"fuel": "Oil", "cop": 0.85}, "Air source heat pump, Warm air, electric": {"fuel": "Electricity", "cop": AVERAGE_ASHP_EFFICIENCY / 100}, + "Boiler and underfloor heating, electric": {"fuel": "Electricity", "cop": 1}, } # These are the measure types where if there is a ventilation recommendation, we force the inclusion of it diff --git a/etl/epc/Dataset.py b/etl/epc/Dataset.py index 3fa84136..2e5646ac 100644 --- a/etl/epc/Dataset.py +++ b/etl/epc/Dataset.py @@ -454,10 +454,7 @@ class TrainingDataset(BaseDataset): lambda row: self._lambda_function_to_generate_floor_uvalue(row), axis=1 ) floor_ending_uvalue = self.df.apply( - lambda row: self._lambda_function_to_generate_floor_uvalue( - row, is_end=True - ), - axis=1, + lambda row: self._lambda_function_to_generate_floor_uvalue(row, is_end=True), axis=1 ) floor_starting_uvalue = pd.to_numeric( diff --git a/etl/epc_clean/app.py b/etl/epc_clean/app.py index 1f320a9b..99de1d03 100644 --- a/etl/epc_clean/app.py +++ b/etl/epc_clean/app.py @@ -48,7 +48,6 @@ def app(): data.columns = [c.replace("_", "-").lower() for c in data.columns] # Take just date before the date threshold data = data[data["lodgement-date"] >= "2011-01-01"] - # Convert to list of dictioaries as returned by the api data = data.to_dict("records") diff --git a/etl/epc_clean/epc_attributes/FloorAttributes.py b/etl/epc_clean/epc_attributes/FloorAttributes.py index 62767638..27f133cc 100644 --- a/etl/epc_clean/epc_attributes/FloorAttributes.py +++ b/etl/epc_clean/epc_attributes/FloorAttributes.py @@ -4,6 +4,7 @@ from BaseUtility import Definitions from etl.epc_clean.epc_attributes.attribute_utils import ( extract_thermal_transmittance, extract_component_types, + handle_mixed_translation ) @@ -61,6 +62,9 @@ class FloorAttributes(Definitions): # Try and perform a translation, incase it's in welsh self.translate_welsh_text() + # We handle seemind occurances of mixed translations + self.description = handle_mixed_translation(self.description) + if not self.nodata and not any( rt in self.description for rt in self.FLOOR_TYPES diff --git a/etl/epc_clean/epc_attributes/HotWaterAttributes.py b/etl/epc_clean/epc_attributes/HotWaterAttributes.py index d1124e08..9966edea 100644 --- a/etl/epc_clean/epc_attributes/HotWaterAttributes.py +++ b/etl/epc_clean/epc_attributes/HotWaterAttributes.py @@ -1,6 +1,6 @@ from typing import Dict, Union from BaseUtility import Definitions -from etl.epc_clean.epc_attributes.attribute_utils import clean_description, find_keyword +from etl.epc_clean.epc_attributes.attribute_utils import clean_description, find_keyword, handle_mixed_translation class HotWaterAttributes(Definitions): @@ -153,6 +153,9 @@ class HotWaterAttributes(Definitions): self.nodata = False self.description = translation + # We handle seemind occurances of mixed translations + self.description = handle_mixed_translation(self.description) + if not self.nodata and not any( self._keyword_in_description(keywords) for keywords in [ diff --git a/etl/epc_clean/epc_attributes/LightingAttributes.py b/etl/epc_clean/epc_attributes/LightingAttributes.py index 78b31142..52cae764 100644 --- a/etl/epc_clean/epc_attributes/LightingAttributes.py +++ b/etl/epc_clean/epc_attributes/LightingAttributes.py @@ -1,6 +1,6 @@ import re from BaseUtility import Definitions -from etl.epc_clean.epc_attributes.attribute_utils import clean_description +from etl.epc_clean.epc_attributes.attribute_utils import clean_description, handle_mixed_translation from etl.epc_clean.utils import correct_spelling @@ -25,6 +25,9 @@ class LightingAttributes(Definitions): self.description = correct_spelling(self.description) self.averages = averages + # We handle seemind occurances of mixed translations + self.description = handle_mixed_translation(self.description) + self.nodata = (not description) or (description in self.DATA_ANOMALY_MATCHES) or ( description in self.OBSERVED_ERRORS) or (description == "SAP05:Lighting") diff --git a/etl/epc_clean/epc_attributes/MainFuelAttributes.py b/etl/epc_clean/epc_attributes/MainFuelAttributes.py index 9bb53ff1..a818a043 100644 --- a/etl/epc_clean/epc_attributes/MainFuelAttributes.py +++ b/etl/epc_clean/epc_attributes/MainFuelAttributes.py @@ -1,6 +1,8 @@ from typing import Dict, Union from BaseUtility import Definitions -from etl.epc_clean.epc_attributes.attribute_utils import clean_description, remove_punctuation, find_keyword +from etl.epc_clean.epc_attributes.attribute_utils import ( + clean_description, remove_punctuation, find_keyword, handle_mixed_translation +) class MainFuelAttributes(Definitions): @@ -56,6 +58,8 @@ class MainFuelAttributes(Definitions): def __init__(self, description: str): self.description: str = remove_punctuation(clean_description(description.lower())) + # We handle seemind occurances of mixed translations + self.description = handle_mixed_translation(self.description) self.is_community = 'community' in self.description and 'not community' not in self.description self.is_unknown = False diff --git a/etl/epc_clean/epc_attributes/MainheatAttributes.py b/etl/epc_clean/epc_attributes/MainheatAttributes.py index d20d9290..283c4724 100644 --- a/etl/epc_clean/epc_attributes/MainheatAttributes.py +++ b/etl/epc_clean/epc_attributes/MainheatAttributes.py @@ -1,5 +1,7 @@ from BaseUtility import Definitions -from etl.epc_clean.epc_attributes.attribute_utils import clean_description, process_part, switch_chars +from etl.epc_clean.epc_attributes.attribute_utils import ( + clean_description, process_part, switch_chars, handle_mixed_translation +) from typing import Dict, Union @@ -77,7 +79,17 @@ class MainHeatAttributes(Definitions): 'awyr gynnes, nwy prif gyflenwad': 'warm air, mains gas', "bwyler a rheiddiaduron, nwy prif gyflenwad, gwresogyddion ystafell, trydan": "Boiler and radiators, " "mains gas, Room heaters, " - "electric" + "electric", + # an unusual example, containing both english and welsh that was found in the data + "boiler and radiators, |bwyler a rheiddiaduron, |mains gas|nwy prif gyflenwad": "boiler and radiators, " + "mains gas", + "room heaters, |gwresogyddion ystafell, |electric|trydan": "room heaters, electric", + "air source heat pump, |pwmp gwres sy'n tarddu yn yr awyr, |, radiators, |, rheiddiaduron, |electric|trydan": + "air source heat pump, radiators, electric", + "boiler and underfloor heating, |bwyler a gwres dan y llawr, |wood pellets|pelenni coed": "boiler and " + "underfloor " + "heating, " + "wood pellets", } REMAP = { @@ -95,6 +107,7 @@ class MainHeatAttributes(Definitions): "air sourceheat pump, radiators, electric": "air source heat pump, radiators, electric", "bwyler gyda rheiddiaduron a gwres dan y llawr, nwy prif gyflenwad": "Boiler and radiators, mains gas, " "Boiler and underfloor heating, mains gas", + } edge_case_result = {} @@ -115,6 +128,9 @@ class MainHeatAttributes(Definitions): self.nodata = False self.description = translation + # We handle seemind occurances of mixed translations + self.description = handle_mixed_translation(self.description) + remapped = [] for term in self.description.split(", "): remap = self.REMAP.get(term) diff --git a/etl/epc_clean/epc_attributes/MainheatControlAttributes.py b/etl/epc_clean/epc_attributes/MainheatControlAttributes.py index 3e2df543..b9ef4eca 100644 --- a/etl/epc_clean/epc_attributes/MainheatControlAttributes.py +++ b/etl/epc_clean/epc_attributes/MainheatControlAttributes.py @@ -1,6 +1,6 @@ from typing import Dict, Union from BaseUtility import Definitions -from etl.epc_clean.epc_attributes.attribute_utils import clean_description, find_keyword +from etl.epc_clean.epc_attributes.attribute_utils import clean_description, find_keyword, handle_mixed_translation class MainheatControlAttributes(Definitions): @@ -136,6 +136,8 @@ class MainheatControlAttributes(Definitions): def __init__(self, description: str): self.description: str = clean_description(description.lower()).strip() + # We handle seemind occurances of mixed translations + self.description = handle_mixed_translation(self.description) self.nodata = not self.description or description in self.DATA_ANOMALY_MATCHES or ( description in self.NO_DATA_DESCRIPTIONS ) diff --git a/etl/epc_clean/epc_attributes/RoofAttributes.py b/etl/epc_clean/epc_attributes/RoofAttributes.py index 153fb548..98998e5a 100644 --- a/etl/epc_clean/epc_attributes/RoofAttributes.py +++ b/etl/epc_clean/epc_attributes/RoofAttributes.py @@ -4,6 +4,7 @@ from BaseUtility import Definitions from etl.epc_clean.epc_attributes.attribute_utils import ( extract_component_types, extract_thermal_transmittance, + handle_mixed_translation ) @@ -79,6 +80,8 @@ class RoofAttributes(Definitions): """ self.description: str = description.lower().strip() + # We handle seemind occurances of mixed translations + self.description = handle_mixed_translation(self.description) self.nodata = ( not description or description in self.DATA_ANOMALY_MATCHES @@ -90,8 +93,8 @@ class RoofAttributes(Definitions): if not self.nodata and not any( rt in self.description for rt in self.ROOF_TYPES - + self.DWELLING_ABOVE - + ["average thermal transmittance"] + + self.DWELLING_ABOVE + + ["average thermal transmittance"] ): raise ValueError("Invalid description") diff --git a/etl/epc_clean/epc_attributes/WallAttributes.py b/etl/epc_clean/epc_attributes/WallAttributes.py index 8cf32a0b..a390e0a5 100644 --- a/etl/epc_clean/epc_attributes/WallAttributes.py +++ b/etl/epc_clean/epc_attributes/WallAttributes.py @@ -4,6 +4,7 @@ from BaseUtility import Definitions from etl.epc_clean.epc_attributes.attribute_utils import ( extract_component_types, extract_thermal_transmittance, + handle_mixed_translation ) @@ -23,33 +24,56 @@ class WallAttributes(Definitions): ] WELSH_TEXT = { - "Briciau solet, fel yGÇÖu hadeiladwyd, dim inswleiddio (rhagdybiaeth)": "Solid brick, as built, no insulation (assumed)", - "Waliau ceudod, fel yGÇÖu hadeiladwyd, inswleiddio rhannol (rhagdybiaeth)": "Cavity wall, as built, partial insulation (assumed)", + "Briciau solet, fel yGÇÖu hadeiladwyd, dim inswleiddio (rhagdybiaeth)": "Solid brick, as built, no insulation " + "(assumed)", + "Waliau ceudod, fel yGÇÖu hadeiladwyd, inswleiddio rhannol (rhagdybiaeth)": "Cavity wall, as built, " + "partial insulation (assumed)", "Waliau ceudod, fel yGÇÖu hadeiladwyd, inswleiddio rhannol": "Cavity wall, as built, partial insulation", - "Waliau ceudod, fel yGÇÖu hadeiladwyd, dim inswleiddio (rhagdybiaeth)": "Cavity wall, as built, no insulation (assumed)", + "Waliau ceudod, fel yGÇÖu hadeiladwyd, dim inswleiddio (rhagdybiaeth)": "Cavity wall, as built, no insulation" + " (assumed)", "Waliau ceudod, fel yGÇÖu hadeiladwyd, dim inswleiddio": "Cavity wall, as built, no insulation", - "Tywodfaen, fel yGÇÖu hadeiladwyd, dim inswleiddio (rhagdybiaeth)": "Sandstone or limestone, as built, no insulation (assumed)", + "Tywodfaen, fel yGÇÖu hadeiladwyd, dim inswleiddio (rhagdybiaeth)": "Sandstone or limestone, as built, " + "no insulation (assumed)", "Tywodfaen, fel yGÇÖu hadeiladwyd, dim inswleiddio": "Sandstone or limestone, as built, no insulation", "Waliau ceudod, ceudod wediGÇÖi lenwi": "Cavity wall, filled cavity", - "Waliau ceudod, fel yGÇÖu hadeiladwyd, wediGÇÖu hinswleiddio (rhagdybiaeth)": "Cavity wall, as built, insulated (assumed)", + "Waliau ceudod, fel yGÇÖu hadeiladwyd, wediGÇÖu hinswleiddio (rhagdybiaeth)": "Cavity wall, as built, " + "insulated (assumed)", "Waliau ceudod, fel yGÇÖu hadeiladwyd, wediGÇÖu hinswleiddio": "Cavity wall, as built, insulated", - "Gwenithfaen neu risgraig, fel yGÇÖu hadeiladwyd, dim inswleiddio (rhagdybiaeth)": "Granite or whinstone, as built, no insulation (assumed)", + "Gwenithfaen neu risgraig, fel yGÇÖu hadeiladwyd, dim inswleiddio (rhagdybiaeth)": "Granite or whinstone, " + "as built, no insulation (" + "assumed)", "Waliau ceudod,": "Cavity wall, as built, no insulation", - "Ffr+óm bren, fel yGÇÖu hadeiladwyd, wediGÇÖu hinswleiddio (rhagdybiaeth)": "Timber frame, as built, insulated (assumed)", + "Ffr+óm bren, fel yGÇÖu hadeiladwyd, wediGÇÖu hinswleiddio (rhagdybiaeth)": "Timber frame, as built, " + "insulated (assumed)", "Ffr+óm bren, fel yGÇÖu hadeiladwyd, wediGÇÖu hinswleiddio": "Timber frame, as built, insulated", "Gwenithfaen neu risgraig, gydag inswleiddio allanol": "Granite or whinstone, with external insulation", - "WediGÇÖu hadeiladu yn +¦l system, fel yGÇÖu hadeiladwyd, dim inswleiddio (rhagdybiaeth)": "System built, as built, no insulation (assumed)", + "WediGÇÖu hadeiladu yn +¦l system, fel yGÇÖu hadeiladwyd, dim inswleiddio (rhagdybiaeth)": "System built, " + "as built, " + "no insulation (" + "assumed)", "Tywodfaen, gydag inswleiddio mewnol": "Sandstone or limestone, with internal insulation", "Waliau ceudod, ynysydd allanol a llenwi ceudod": "Cavity wall, filled cavity and external insulation", "Gwenithfaen neu risgraig, gydag inswleiddio mewnol": "Granite or whinstone, with internal insulation", - "Ffr+óm bren, fel yGÇÖu hadeiladwyd, inswleiddio rhannol (rhagdybiaeth)": "Timber frame, as built, partial insulation (assumed)", - "WediGÇÖu hadeiladu yn +¦l system, fel yGÇÖu hadeiladwyd, wediGÇÖu hinswleiddio (rhagdybiaeth)": "System built, as built, insulated (assumed)", - "WediGÇÖu hadeiladu yn +¦l system, fel yGÇÖu hadeiladwyd, wediGÇÖu hinswleiddio": "System built, as built, insulated", + "Ffr+óm bren, fel yGÇÖu hadeiladwyd, inswleiddio rhannol (rhagdybiaeth)": "Timber frame, as built, " + "partial insulation (assumed)", + "WediGÇÖu hadeiladu yn +¦l system, fel yGÇÖu hadeiladwyd, wediGÇÖu hinswleiddio (rhagdybiaeth)": "System " + "built, " + "as built, " + "insulated (" + "assumed)", + "WediGÇÖu hadeiladu yn +¦l system, fel yGÇÖu hadeiladwyd, wediGÇÖu hinswleiddio": "System built, as built, " + "insulated", "WediGÇÖu hadeiladu yn +¦l system, gydag inswleiddio allanol": "System built, with external insulation", "Briciau solet, gydag inswleiddio mewnol": "Solid brick, with internal insulation", - "WediGÇÖu hadeiladu yn +¦l system, fel yGÇÖu hadeiladwyd, inswleiddio rhannol (rhagdybiaeth)": "System built, as built, partial insulation (assumed)", - "WediGÇÖu hadeiladu yn +¦l system, fel yGÇÖu hadeiladwyd, inswleiddio rhannol": "System built, as built, partial insulation", - "Ffr+óm bren, fel yGÇÖu hadeiladwyd, dim inswleiddio (rhagdybiaeth)": "Timber frame, as built, no insulation (assumed)", + "WediGÇÖu hadeiladu yn +¦l system, fel yGÇÖu hadeiladwyd, inswleiddio rhannol (rhagdybiaeth)": "System built, " + "as built, " + "partial " + "insulation (" + "assumed)", + "WediGÇÖu hadeiladu yn +¦l system, fel yGÇÖu hadeiladwyd, inswleiddio rhannol": "System built, as built, " + "partial insulation", + "Ffr+óm bren, fel yGÇÖu hadeiladwyd, dim inswleiddio (rhagdybiaeth)": "Timber frame, as built, no insulation " + "(assumed)", "Ffr+óm bren, fel yGÇÖu hadeiladwyd, dim inswleiddio": "Timber frame, as built, no insulation", "Tywodfaen, gydag inswleiddio allanol": "Sandstone or limestone, with external insulation", "Waliau ceudod, gydag inswleiddio allanol": "Cavity wall, with external insulation", @@ -92,6 +116,9 @@ class WallAttributes(Definitions): self.welsh_translation_search() + # We handle seemind occurances of mixed translations + self.description = handle_mixed_translation(self.description) + self.nodata = not description or description in self.DATA_ANOMALY_MATCHES def welsh_translation_search(self): diff --git a/etl/epc_clean/epc_attributes/WindowAttributes.py b/etl/epc_clean/epc_attributes/WindowAttributes.py index 2b1dc172..f5edac2d 100644 --- a/etl/epc_clean/epc_attributes/WindowAttributes.py +++ b/etl/epc_clean/epc_attributes/WindowAttributes.py @@ -1,6 +1,6 @@ from typing import Dict, Union from BaseUtility import Definitions -from etl.epc_clean.epc_attributes.attribute_utils import clean_description +from etl.epc_clean.epc_attributes.attribute_utils import clean_description, handle_mixed_translation class WindowAttributes(Definitions): @@ -53,6 +53,9 @@ class WindowAttributes(Definitions): self.nodata = False self.description = translation + # We handle seemind occurances of mixed translations + self.description = handle_mixed_translation(self.description) + if not self.nodata: if not any( rt in self.description for rt in diff --git a/etl/epc_clean/epc_attributes/attribute_utils.py b/etl/epc_clean/epc_attributes/attribute_utils.py index 28f958a8..2e98d869 100644 --- a/etl/epc_clean/epc_attributes/attribute_utils.py +++ b/etl/epc_clean/epc_attributes/attribute_utils.py @@ -155,3 +155,19 @@ def find_keyword(description, keywords, synonyms=None): return synonyms.get(keyword, keyword) return None + + +def handle_mixed_translation(description): + """ + We handle some edge cases where welsh and english are mixed in the same description + :param description: str description to process + :return: + """ + if "|" not in description: + return description + + parts = description.split("|") + # The pattern that we see is that in index 0, we have english. Then welsh and then english again + # So, the even indexes are english + remapped_parts = [parts[i] for i in range(len(parts)) if i % 2 == 0] + return "".join(remapped_parts) diff --git a/recommendations/recommendation_utils.py b/recommendations/recommendation_utils.py index 7c39668a..6acc04f9 100644 --- a/recommendations/recommendation_utils.py +++ b/recommendations/recommendation_utils.py @@ -205,7 +205,7 @@ def get_wall_u_value( mapped_value = wall_uvalues_df[ wall_uvalues_df["Wall_type"] == mapped_description - ][age_band].values[0] + ][age_band].values[0] if pd.isnull(mapped_value) and "Park home" in mapped_description: # We don't know enough in this case so we default to 0 @@ -553,7 +553,15 @@ def get_floor_u_value( lambda_ins = 0.035 # thermal conductivity of floor insulation in W/m·K wall_thickness = [ x[age_band] for x in default_wall_thickness if x["type"] == wall_type - ][0] + ] + if not wall_thickness: + # In some cases, we may estimate an EPC and end up with a slightly mixed EPC, with some fields associated + # to a new build and others to an existing. So we might end up with a None wall type here, because of this. + # If this happens, nothing will be in the wall_thickness list so this is the fallback, the defauly thickness + # for many EPC assessment systems like Elmhurst + wall_thickness = 300 + else: + wall_thickness = wall_thickness[0] if wall_thickness is None and wall_type == "park home": # We don't know enough and likely won't make recommendations return 0 @@ -563,7 +571,7 @@ def get_floor_u_value( insulation_lookup = s11[ s11["Age_band"].str.contains(age_band) & s11["Floor_construction"] == floor_type - ] + ] if insulation_lookup.empty: insulation_thickness = 0 else: From 5a9e018b1989cbd36db41a3b7044414b958d2a32 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sun, 30 Nov 2025 18:57:13 +0000 Subject: [PATCH 092/202] fixed ashp non invasic rec bug --- .../db/functions/recommendations_functions.py | 215 ++++++++++++++---- backend/engine/engine.py | 2 +- recommendations/HeatingRecommender.py | 4 +- 3 files changed, 174 insertions(+), 47 deletions(-) diff --git a/backend/app/db/functions/recommendations_functions.py b/backend/app/db/functions/recommendations_functions.py index 8c6e710a..2d444544 100644 --- a/backend/app/db/functions/recommendations_functions.py +++ b/backend/app/db/functions/recommendations_functions.py @@ -1,3 +1,4 @@ +from tqdm import tqdm from sqlalchemy import insert, delete from sqlalchemy.orm import Session from sqlalchemy.exc import SQLAlchemyError @@ -169,62 +170,188 @@ def upload_recommendations(session: Session, recommendations_to_upload, property return False -def clear_portfolio(session: Session, portfolio_id: int): - # Fetch all property IDs associated with the given portfolio - property_ids = session.query(PropertyModel.id).filter(PropertyModel.portfolio_id == portfolio_id).all() - property_ids = [p.id for p in property_ids] +# def clear_portfolio(session: Session, portfolio_id: int): +# # Fetch all property IDs associated with the given portfolio +# property_ids = session.query(PropertyModel.id).filter(PropertyModel.portfolio_id == portfolio_id).all() +# property_ids = [p.id for p in property_ids] +# +# # Fetch all recommendation IDs associated with the properties +# recommendation_ids = session.query(Recommendation.id).filter(Recommendation.property_id.in_(property_ids)).all() +# recommendation_ids = [r.id for r in recommendation_ids] +# +# # Fetch all plan IDs associated with the portfolio +# plan_ids = session.query(Plan.id).filter(Plan.portfolio_id == portfolio_id).all() +# plan_ids = [p.id for p in plan_ids] +# +# # Delete all entries from RecommendationMaterials for these recommendations +# session.execute( +# delete(RecommendationMaterials).where(RecommendationMaterials.recommendation_id.in_(recommendation_ids)) +# ) +# +# # Delete all entries from PlanRecommendations that reference plans in the portfolio +# session.execute(delete(PlanRecommendations).where(PlanRecommendations.plan_id.in_( +# session.query(Plan.id).filter(Plan.portfolio_id == portfolio_id).subquery().as_scalar() +# ))) +# +# # Delete FundingPackageMeasures → FundingPackage → Plan +# session.execute( +# delete(FundingPackageMeasures).where(FundingPackageMeasures.funding_package_id.in_( +# session.query(FundingPackage.id).filter(FundingPackage.plan_id.in_(plan_ids)) +# )) +# ) +# session.execute( +# delete(FundingPackage).where(FundingPackage.plan_id.in_(plan_ids)) +# ) +# +# # Delete all Plans associated with the portfolio +# session.execute(delete(Plan).where(Plan.portfolio_id == portfolio_id)) +# +# # Delete all Scenarios associated with the portfolio +# session.execute(delete(Scenario).where(Scenario.portfolio_id == portfolio_id)) +# +# # Delete all Recommendations associated with the properties +# session.execute(delete(Recommendation).where(Recommendation.property_id.in_(property_ids))) +# +# session.execute( +# delete(InspectionModel) +# .where(InspectionModel.property_id.in_( +# session.query(PropertyModel.id).filter(PropertyModel.portfolio_id == portfolio_id) +# )) +# .execution_options(synchronize_session=False) +# ) +# +# # Now, delete the PropertyModels and related details +# # Delete PropertyTargetsModel, PropertyDetailsMeter, PropertyDetailsEpcModel, and PropertyModel +# session.execute(delete(PropertyTargetsModel).where(PropertyTargetsModel.portfolio_id == portfolio_id)) +# # session.execute(delete(PropertyDetailsMeter).where(PropertyDetailsMeter.uprn.in_(property_ids))) +# session.execute(delete(PropertyDetailsEpcModel).where(PropertyDetailsEpcModel.portfolio_id == portfolio_id)) +# session.execute(delete(PropertyModel).where(PropertyModel.portfolio_id == portfolio_id)) +# +# # Commit the changes +# session.commit() - # Fetch all recommendation IDs associated with the properties - recommendation_ids = session.query(Recommendation.id).filter(Recommendation.property_id.in_(property_ids)).all() - recommendation_ids = [r.id for r in recommendation_ids] - # Fetch all plan IDs associated with the portfolio - plan_ids = session.query(Plan.id).filter(Plan.portfolio_id == portfolio_id).all() - plan_ids = [p.id for p in plan_ids] +def chunked(iterable, size=500): + for i in range(0, len(iterable), size): + yield iterable[i:i + size] - # Delete all entries from RecommendationMaterials for these recommendations - session.execute( - delete(RecommendationMaterials).where(RecommendationMaterials.recommendation_id.in_(recommendation_ids)) - ) - # Delete all entries from PlanRecommendations that reference plans in the portfolio - session.execute(delete(PlanRecommendations).where(PlanRecommendations.plan_id.in_( - session.query(Plan.id).filter(Plan.portfolio_id == portfolio_id).subquery().as_scalar() - ))) +def clear_portfolio(session: Session, portfolio_id: int, batch_size=500): + # -------------------------- + # Collect IDs up-front + # -------------------------- + property_ids = [ + p.id for p in session.query(PropertyModel.id) + .filter(PropertyModel.portfolio_id == portfolio_id) + ] - # Delete FundingPackageMeasures → FundingPackage → Plan - session.execute( - delete(FundingPackageMeasures).where(FundingPackageMeasures.funding_package_id.in_( - session.query(FundingPackage.id).filter(FundingPackage.plan_id.in_(plan_ids)) - )) - ) - session.execute( - delete(FundingPackage).where(FundingPackage.plan_id.in_(plan_ids)) - ) + recommendation_ids = [ + r.id for r in session.query(Recommendation.id) + .filter(Recommendation.property_id.in_(property_ids)) + ] - # Delete all Plans associated with the portfolio - session.execute(delete(Plan).where(Plan.portfolio_id == portfolio_id)) + plan_ids = [ + p.id for p in session.query(Plan.id) + .filter(Plan.portfolio_id == portfolio_id) + ] - # Delete all Scenarios associated with the portfolio + funding_package_ids = [ + fp.id for fp in session.query(FundingPackage.id) + .filter(FundingPackage.plan_id.in_(plan_ids)) + ] + + # -------------------------- + # Batch deletes with tqdm + # -------------------------- + + # RecommendationMaterials + for chunk in tqdm(chunked(recommendation_ids, batch_size), + total=(len(recommendation_ids) // batch_size) + 1, + desc="Deleting RecommendationMaterials"): + session.execute( + delete(RecommendationMaterials) + .where(RecommendationMaterials.recommendation_id.in_(chunk)) + ) + + # PlanRecommendations + for chunk in tqdm(chunked(plan_ids, batch_size), + total=(len(plan_ids) // batch_size) + 1, + desc="Deleting PlanRecommendations"): + session.execute( + delete(PlanRecommendations) + .where(PlanRecommendations.plan_id.in_(chunk)) + ) + + # FundingPackageMeasures + for chunk in tqdm(chunked(funding_package_ids, batch_size), + total=(len(funding_package_ids) // batch_size) + 1, + desc="Deleting FundingPackageMeasures"): + session.execute( + delete(FundingPackageMeasures) + .where(FundingPackageMeasures.funding_package_id.in_(chunk)) + ) + + # FundingPackage + for chunk in tqdm(chunked(plan_ids, batch_size), + total=(len(plan_ids) // batch_size) + 1, + desc="Deleting FundingPackages"): + session.execute( + delete(FundingPackage) + .where(FundingPackage.plan_id.in_(chunk)) + ) + + # Plans + for chunk in tqdm(chunked(plan_ids, batch_size), + total=(len(plan_ids) // batch_size) + 1, + desc="Deleting Plans"): + session.execute( + delete(Plan) + .where(Plan.id.in_(chunk)) + ) + + # Scenarios (no chunks needed) + tqdm.write("Deleting Scenarios…") session.execute(delete(Scenario).where(Scenario.portfolio_id == portfolio_id)) - # Delete all Recommendations associated with the properties - session.execute(delete(Recommendation).where(Recommendation.property_id.in_(property_ids))) + # Recommendations + for chunk in tqdm(chunked(property_ids, batch_size), + total=(len(property_ids) // batch_size) + 1, + desc="Deleting Recommendations"): + session.execute( + delete(Recommendation) + .where(Recommendation.property_id.in_(chunk)) + ) + # Inspections + for chunk in tqdm(chunked(property_ids, batch_size), + total=(len(property_ids) // batch_size) + 1, + desc="Deleting Inspections"): + session.execute( + delete(InspectionModel) + .where(InspectionModel.property_id.in_(chunk)) + ) + + # Property-related detail tables + tqdm.write("Deleting PropertyTargetsModel…") session.execute( - delete(InspectionModel) - .where(InspectionModel.property_id.in_( - session.query(PropertyModel.id).filter(PropertyModel.portfolio_id == portfolio_id) - )) - .execution_options(synchronize_session=False) + delete(PropertyTargetsModel) + .where(PropertyTargetsModel.portfolio_id == portfolio_id) ) - # Now, delete the PropertyModels and related details - # Delete PropertyTargetsModel, PropertyDetailsMeter, PropertyDetailsEpcModel, and PropertyModel - session.execute(delete(PropertyTargetsModel).where(PropertyTargetsModel.portfolio_id == portfolio_id)) - # session.execute(delete(PropertyDetailsMeter).where(PropertyDetailsMeter.uprn.in_(property_ids))) - session.execute(delete(PropertyDetailsEpcModel).where(PropertyDetailsEpcModel.portfolio_id == portfolio_id)) - session.execute(delete(PropertyModel).where(PropertyModel.portfolio_id == portfolio_id)) + tqdm.write("Deleting PropertyDetailsEpcModel…") + session.execute( + delete(PropertyDetailsEpcModel) + .where(PropertyDetailsEpcModel.portfolio_id == portfolio_id) + ) + + # Properties + for chunk in tqdm(chunked(property_ids, batch_size), + total=(len(property_ids) // batch_size) + 1, + desc="Deleting Properties"): + session.execute( + delete(PropertyModel) + .where(PropertyModel.id.in_(chunk)) + ) - # Commit the changes session.commit() + tqdm.write("Portfolio cleared.") diff --git a/backend/engine/engine.py b/backend/engine/engine.py index c0261e57..bf533117 100644 --- a/backend/engine/engine.py +++ b/backend/engine/engine.py @@ -612,7 +612,7 @@ async def model_engine(body: PlanTriggerRequest): address1 = config.get("domna_address_1", None) address1 = str(int(address1)) if isinstance(address1, float) else str(address1) - full_address = config["domna_full_address"] if body.file_format == "domna_asset_list" else None + full_address = config.get("domna_full_address") if body.file_format == "domna_asset_list" else None heating_system = parse_heating_system(config) associated_uprns = [] diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py index fdd4376d..c5aa8b38 100644 --- a/recommendations/HeatingRecommender.py +++ b/recommendations/HeatingRecommender.py @@ -321,7 +321,7 @@ class HeatingRecommender: # if we have a non-invasive ashp recommendation, we get the configuration directly from the property instance non_invasive_ashp_recommendation = next( (r for r in self.property.non_invasive_recommendations if r["type"] == "air_source_heat_pump"), - {"suitable": True} + {"survey": False} ) # We allow for the non-invasive recommendation to be that ASHP is not suitable @@ -366,7 +366,7 @@ class HeatingRecommender: if ( self.property.is_ashp_valid(measures=measures) and - non_invasive_ashp_recommendation["suitable"] and + len(non_invasive_ashp_recommendation) and not self.has_ashp and not self.has_gshp ): self.recommend_air_source_heat_pump( From 4f6a9fdc07afc9f2adbaee6b0ee77294f9c8a711 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sun, 30 Nov 2025 18:59:16 +0000 Subject: [PATCH 093/202] minor on batch portfolio delete --- backend/app/db/functions/recommendations_functions.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/app/db/functions/recommendations_functions.py b/backend/app/db/functions/recommendations_functions.py index 2d444544..7905be89 100644 --- a/backend/app/db/functions/recommendations_functions.py +++ b/backend/app/db/functions/recommendations_functions.py @@ -231,12 +231,12 @@ def upload_recommendations(session: Session, recommendations_to_upload, property # session.commit() -def chunked(iterable, size=500): +def chunked(iterable, size=100): for i in range(0, len(iterable), size): yield iterable[i:i + size] -def clear_portfolio(session: Session, portfolio_id: int, batch_size=500): +def clear_portfolio(session: Session, portfolio_id: int, batch_size=100): # -------------------------- # Collect IDs up-front # -------------------------- From 852420a8fae627e43b17e9c95faec6699d9748b0 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sun, 30 Nov 2025 20:12:50 +0000 Subject: [PATCH 094/202] handling more cases --- backend/SearchEpc.py | 17 ++++-- .../db/functions/recommendations_functions.py | 61 ------------------- backend/engine/engine.py | 3 +- .../epc_attributes/FloorAttributes.py | 10 +++ .../test_data/test_floor_attributes_cases.py | 8 ++- etl/find_my_epc/RetrieveFindMyEpc.py | 51 ++++++---------- 6 files changed, 50 insertions(+), 100 deletions(-) diff --git a/backend/SearchEpc.py b/backend/SearchEpc.py index 6cb767e7..686843c3 100644 --- a/backend/SearchEpc.py +++ b/backend/SearchEpc.py @@ -208,6 +208,7 @@ class SearchEpc: # These are the address and postcode values, which we store in the database self.address_clean = None self.postcode_clean = None + self.address_postal_town = None self.size = size if size is not None else 25 @@ -490,7 +491,11 @@ class SearchEpc: postcode = postcode.upper() - return address, postcode + # We also return a "postal town variant - useful for edge cases when fetching from find my EPC + address_postal_town = ", ".join( + [newest_epc["address1"], newest_epc["address2"], newest_epc["posttown"]]).strip().title() + + return address, postcode, address_postal_town def extract_epc_data(self, address=None): @@ -545,9 +550,9 @@ class SearchEpc: return newest_epc, [], {}, "", "", None # Retrieve postcode and address - address_epc, postcode_epc = self.format_address(newest_epc=newest_epc) + address_epc, postcode_epc, address_postal_town = self.format_address(newest_epc=newest_epc) - return newest_epc, older_epcs, full_sap_epc, address_epc, postcode_epc, uprn + return newest_epc, older_epcs, full_sap_epc, address_epc, postcode_epc, uprn, address_postal_town @staticmethod def filter_newest_epc(list_of_epcs: List): @@ -970,7 +975,8 @@ class SearchEpc: if response["status"] == 200: ( - self.newest_epc, self.older_epcs, self.full_sap_epc, self.address_clean, self.postcode_clean, self.uprn + self.newest_epc, self.older_epcs, self.full_sap_epc, self.address_clean, self.postcode_clean, self.uprn, + self.address_postal_town ) = self.extract_epc_data(address=self.full_address) # Before we return, we check if we need to overwrite a SAP05 EPC @@ -1032,7 +1038,8 @@ class SearchEpc: response = self.get_epc() if response["status"] == 200: ( - self.newest_epc, self.older_epcs, self.full_sap_epc, self.address_clean, self.postcode_clean, self.uprn + self.newest_epc, self.older_epcs, self.full_sap_epc, self.address_clean, self.postcode_clean, self.uprn, + self.address_postal_town ) = self.extract_epc_data() return diff --git a/backend/app/db/functions/recommendations_functions.py b/backend/app/db/functions/recommendations_functions.py index 7905be89..48ed3de3 100644 --- a/backend/app/db/functions/recommendations_functions.py +++ b/backend/app/db/functions/recommendations_functions.py @@ -170,67 +170,6 @@ def upload_recommendations(session: Session, recommendations_to_upload, property return False -# def clear_portfolio(session: Session, portfolio_id: int): -# # Fetch all property IDs associated with the given portfolio -# property_ids = session.query(PropertyModel.id).filter(PropertyModel.portfolio_id == portfolio_id).all() -# property_ids = [p.id for p in property_ids] -# -# # Fetch all recommendation IDs associated with the properties -# recommendation_ids = session.query(Recommendation.id).filter(Recommendation.property_id.in_(property_ids)).all() -# recommendation_ids = [r.id for r in recommendation_ids] -# -# # Fetch all plan IDs associated with the portfolio -# plan_ids = session.query(Plan.id).filter(Plan.portfolio_id == portfolio_id).all() -# plan_ids = [p.id for p in plan_ids] -# -# # Delete all entries from RecommendationMaterials for these recommendations -# session.execute( -# delete(RecommendationMaterials).where(RecommendationMaterials.recommendation_id.in_(recommendation_ids)) -# ) -# -# # Delete all entries from PlanRecommendations that reference plans in the portfolio -# session.execute(delete(PlanRecommendations).where(PlanRecommendations.plan_id.in_( -# session.query(Plan.id).filter(Plan.portfolio_id == portfolio_id).subquery().as_scalar() -# ))) -# -# # Delete FundingPackageMeasures → FundingPackage → Plan -# session.execute( -# delete(FundingPackageMeasures).where(FundingPackageMeasures.funding_package_id.in_( -# session.query(FundingPackage.id).filter(FundingPackage.plan_id.in_(plan_ids)) -# )) -# ) -# session.execute( -# delete(FundingPackage).where(FundingPackage.plan_id.in_(plan_ids)) -# ) -# -# # Delete all Plans associated with the portfolio -# session.execute(delete(Plan).where(Plan.portfolio_id == portfolio_id)) -# -# # Delete all Scenarios associated with the portfolio -# session.execute(delete(Scenario).where(Scenario.portfolio_id == portfolio_id)) -# -# # Delete all Recommendations associated with the properties -# session.execute(delete(Recommendation).where(Recommendation.property_id.in_(property_ids))) -# -# session.execute( -# delete(InspectionModel) -# .where(InspectionModel.property_id.in_( -# session.query(PropertyModel.id).filter(PropertyModel.portfolio_id == portfolio_id) -# )) -# .execution_options(synchronize_session=False) -# ) -# -# # Now, delete the PropertyModels and related details -# # Delete PropertyTargetsModel, PropertyDetailsMeter, PropertyDetailsEpcModel, and PropertyModel -# session.execute(delete(PropertyTargetsModel).where(PropertyTargetsModel.portfolio_id == portfolio_id)) -# # session.execute(delete(PropertyDetailsMeter).where(PropertyDetailsMeter.uprn.in_(property_ids))) -# session.execute(delete(PropertyDetailsEpcModel).where(PropertyDetailsEpcModel.portfolio_id == portfolio_id)) -# session.execute(delete(PropertyModel).where(PropertyModel.portfolio_id == portfolio_id)) -# -# # Commit the changes -# session.commit() - - def chunked(iterable, size=100): for i in range(0, len(iterable), size): yield iterable[i:i + size] diff --git a/backend/engine/engine.py b/backend/engine/engine.py index bf533117..688c1379 100644 --- a/backend/engine/engine.py +++ b/backend/engine/engine.py @@ -691,7 +691,8 @@ async def model_engine(body: PlanTriggerRequest): epc_page=epc_page, rrn=rrn, cleaned_address=epc_searcher.address_clean, - config_address=config["address"] + config_address=config["address"], + address_postal_town=epc_searcher.address_postal_town ) ) diff --git a/etl/epc_clean/epc_attributes/FloorAttributes.py b/etl/epc_clean/epc_attributes/FloorAttributes.py index 27f133cc..47013aaa 100644 --- a/etl/epc_clean/epc_attributes/FloorAttributes.py +++ b/etl/epc_clean/epc_attributes/FloorAttributes.py @@ -48,6 +48,12 @@ class FloorAttributes(Definitions): "crog, inswleiddio cyfyngedig (rhagdybiaeth)": "suspended, limited insulation (assumed)", } + REMAP = { + # Have only seen this once - though perhaps need to investigate older EPCs in the production of EPC clean. + # When looking at a newer EPC, which had been re-assessed as another dwelling below + "above unheated space or full exposed": "(another dwelling below)", + } + def __init__(self, description: str): self.description: str = description.lower() @@ -62,6 +68,10 @@ class FloorAttributes(Definitions): # Try and perform a translation, incase it's in welsh self.translate_welsh_text() + # Remap known issues + if self.description in self.REMAP: + self.description = self.REMAP[self.description] + # We handle seemind occurances of mixed translations self.description = handle_mixed_translation(self.description) diff --git a/etl/epc_clean/tests/test_data/test_floor_attributes_cases.py b/etl/epc_clean/tests/test_data/test_floor_attributes_cases.py index 81ec7a32..080f59be 100644 --- a/etl/epc_clean/tests/test_data/test_floor_attributes_cases.py +++ b/etl/epc_clean/tests/test_data/test_floor_attributes_cases.py @@ -375,6 +375,12 @@ clean_floor_cases = [ 'thermal_transmittance_unit': 'w/m-¦k', 'is_assumed': False, 'is_to_unheated_space': False, 'is_to_external_air': False, 'is_suspended': False, 'is_solid': False, 'another_property_below': False, 'insulation_thickness': None + }, + { + # This example gets remapped to another dwelling below + "description": "Above unheated space or full exposed", + 'thermal_transmittance': 0, 'thermal_transmittance_unit': 'w/m-¦k', 'is_assumed': False, + 'is_to_unheated_space': False, 'is_to_external_air': False, 'is_suspended': False, 'is_solid': False, + 'another_property_below': True, 'insulation_thickness': None } - ] diff --git a/etl/find_my_epc/RetrieveFindMyEpc.py b/etl/find_my_epc/RetrieveFindMyEpc.py index e28af4f5..a7767273 100644 --- a/etl/find_my_epc/RetrieveFindMyEpc.py +++ b/etl/find_my_epc/RetrieveFindMyEpc.py @@ -22,7 +22,7 @@ class RetrieveFindMyEpc: 'Chrome/111.0.0.0 Safari/537.36' } - def __init__(self, address: str, postcode: str, rrn: str = None): + def __init__(self, address: str, postcode: str, rrn: str = None, address_postal_town: str = ""): """ This class is tasked with retrieving the latest EPC data from the find my epc website :param address: The address of the property @@ -36,6 +36,10 @@ class RetrieveFindMyEpc: self.address_cleaned = self.address.replace(",", "").replace(" ", "").lower() self.walls = [] + self.address_postal_town = address_postal_town + if self.address_postal_town: + self.address_postal_town = self.address_postal_town.replace(",", "").replace(" ", "").lower() + @staticmethod def extract_low_carbon_sources(soup): # Find the section header @@ -363,7 +367,12 @@ class RetrieveFindMyEpc: extracted_address.replace(",", "").replace(" ", "").lower() ) - if not extracted_address_cleaned.startswith(self.address_cleaned): + no_primary_match = not extracted_address_cleaned.startswith(self.address_cleaned) + no_backup_match = True if not self.address_postal_town else not ( + extracted_address_cleaned.startswith(self.address_postal_town) + ) + + if no_primary_match and no_backup_match: continue # If the address is a match, we can extract the data @@ -394,7 +403,9 @@ class RetrieveFindMyEpc: return chosen_epc, epc_certificate - def retrieve_newest_find_my_epc_data(self, sap_2012_date=None, return_page=False, epc_page_source=None, rrn=None): + def retrieve_newest_find_my_epc_data( + self, sap_2012_date=None, return_page=False, epc_page_source=None, rrn=None + ): """ For a post code and address, we pull out all the required data from the find my epc website """ @@ -725,37 +736,13 @@ class RetrieveFindMyEpc: return formatted_recommendations @classmethod - def get_from_epc(cls, epc, epc_page_source=None, rrn=None): + def get_from_epc(cls, epc, epc_page_source=None, rrn=None, address_postal_town=None): if epc_page_source is not None and rrn is None: raise ValueError("rrn must be provided if epc_page_source is provided") - # Attempt both methods: - try: - searcher = cls(address=epc["address"], postcode=epc["postcode"]) - find_epc_data = searcher.retrieve_newest_find_my_epc_data(epc_page_source=epc_page_source, rrn=rrn) - except Exception as e: - logger.error(f"Error retrieving find my epc data: {e}") - - # We try two backup approaches. The first is to trim the final section off the end of the address - address1 = ",".join(epc["address"].split(",")[:-1]) - try: - searcher = cls(address=address1, postcode=epc["postcode"]) - find_epc_data = searcher.retrieve_newest_find_my_epc_data(epc_page_source=epc_page_source, rrn=rrn) - logger.info("Successfully retrieved find my epc data using trimmed address") - except Exception as e2: - logger.error(f"Error retrieving find my epc data using trimmed address: {e2}") - # Attempt final approach - - if epc["address1"] == epc["address"]: - # There's no benefit of using the same address, so we split on comma - address1 = epc["address"].split(",")[0] - else: - address1 = epc["address1"] - # We attempt with the backup add - searcher = cls(address=address1, postcode=epc["postcode"]) - find_epc_data = searcher.retrieve_newest_find_my_epc_data(epc_page_source=epc_page_source, rrn=rrn) - logger.info("Successfully retrieved find my epc data using backup address") + searcher = cls(address=epc["address"], postcode=epc["postcode"], address_postal_town=address_postal_town) + find_epc_data = searcher.retrieve_newest_find_my_epc_data(epc_page_source=epc_page_source, rrn=rrn) non_invasive_recommendations = { "uprn": epc["uprn"], @@ -782,7 +769,7 @@ class RetrieveFindMyEpc: @classmethod def get_from_epc_with_fallback( - cls, epc, epc_page, rrn, cleaned_address=None, config_address=None + cls, epc, epc_page, rrn, cleaned_address=None, config_address=None, address_postal_town=None ): """ Attempt get_from_epc with: @@ -814,7 +801,7 @@ class RetrieveFindMyEpc: last_error = None for idx, attempt in enumerate(attempts, start=1): try: - return cls.get_from_epc(attempt, epc_page, rrn=rrn) + return cls.get_from_epc(attempt, epc_page, rrn=rrn, address_postal_town=address_postal_town) except Exception as e: last_error = e logger.error(f"Attempt {idx} failed: {e}") From 110f461d49bb4e48e050e15daabecb6f7ccf3f49 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sun, 30 Nov 2025 20:40:07 +0000 Subject: [PATCH 095/202] handling the case of there not being any suitable recommendations for a property --- backend/engine/engine.py | 65 +++++++++++-------- .../optimiser/funding_optimiser.py | 8 +++ 2 files changed, 45 insertions(+), 28 deletions(-) diff --git a/backend/engine/engine.py b/backend/engine/engine.py index 688c1379..93ddc085 100644 --- a/backend/engine/engine.py +++ b/backend/engine/engine.py @@ -1043,38 +1043,47 @@ async def model_engine(body: PlanTriggerRequest): work_package=eco_packages[p.id][2] ) - # If the solution isn't eligible, we can't really consider it - solutions = solutions[ - (solutions["is_eligible"] & (solutions["scheme"] != "none")) | (solutions["scheme"] == "none") - ] - - if solutions["meets_upgrade_target"].any(): - # If we have a solution that meets the upgrade target, we select that one - optimal_solution = solutions[solutions["meets_upgrade_target"]].iloc[0] + # if handle the empty case + if solutions.empty: + scheme = "none" + funded_measures, solution = [], [] + ( + project_funding, total_uplift, full_project_score, partial_project_score, uplift_project_score + ) = 0, 0, 0, 0, 0 else: - # Pick the cheapest - optimal_solution = solutions.iloc[0] - # This is the list of measures that we will recommend - scheme = optimal_solution["scheme"] + # If the solution isn't eligible, we can't really consider it + solutions = solutions[ + (solutions["is_eligible"] & (solutions["scheme"] != "none")) | (solutions["scheme"] == "none") + ] - # We create this full list of selected measures, which is used in the next section for setting - # default measures - solution = deepcopy(optimal_solution["items"]) + deepcopy(optimal_solution["unfunded_items"]) - funded_measures = deepcopy(optimal_solution["items"]) if scheme != "none" else [] + if solutions["meets_upgrade_target"].any(): + # If we have a solution that meets the upgrade target, we select that one + optimal_solution = solutions[solutions["meets_upgrade_target"]].iloc[0] + else: + # Pick the cheapest + optimal_solution = solutions.iloc[0] - # This is the total amount of funding that the project will produce (EXCLUDING uplifts) (£) - project_funding = optimal_solution["full_project_funding"] if scheme == "eco4" else \ - optimal_solution["partial_project_funding"] - # This is the total amount of funding associated to the uplift (£) - total_uplift = optimal_solution["total_uplift"] - # This is the funding scheme selected - # This is the full project ABS - full_project_score = optimal_solution["project_score"] - # This is the partial project ABS - partial_project_score = optimal_solution["partial_project_score"] - # This is the uplift score ABS - uplift_project_score = optimal_solution["total_uplift_score"] + # This is the list of measures that we will recommend + scheme = optimal_solution["scheme"] + + # We create this full list of selected measures, which is used in the next section for setting + # default measures + solution = deepcopy(optimal_solution["items"]) + deepcopy(optimal_solution["unfunded_items"]) + funded_measures = deepcopy(optimal_solution["items"]) if scheme != "none" else [] + + # This is the total amount of funding that the project will produce (EXCLUDING uplifts) (£) + project_funding = optimal_solution["full_project_funding"] if scheme == "eco4" else \ + optimal_solution["partial_project_funding"] + # This is the total amount of funding associated to the uplift (£) + total_uplift = optimal_solution["total_uplift"] + # This is the funding scheme selected + # This is the full project ABS + full_project_score = optimal_solution["project_score"] + # This is the partial project ABS + partial_project_score = optimal_solution["partial_project_score"] + # This is the uplift score ABS + uplift_project_score = optimal_solution["total_uplift_score"] else: # We optimise and then we determine eligibility for funding, based on the measures selected optimiser = ( diff --git a/recommendations/optimiser/funding_optimiser.py b/recommendations/optimiser/funding_optimiser.py index 855d7e5c..925a818f 100644 --- a/recommendations/optimiser/funding_optimiser.py +++ b/recommendations/optimiser/funding_optimiser.py @@ -502,6 +502,10 @@ def optimise_with_funding_paths( solutions = pd.DataFrame(solutions) + if solutions.empty: + # We return a blank dataframe + return solutions + # Given the scheme, we now check if the packages are eligible. If they *are* eligible, but they don't meet the # final upgrade target, we then look to perform a final optimisation pass to meet the target gain. solutions["meets_upgrade_target"] = solutions["total_gain"] >= target_gain - 0.1 @@ -779,6 +783,10 @@ def run_optimizer(input_measures, budget=None, sub_target_gain=None, allow_slack Thin wrapper over your optimisers. Returns: list[dict] selected_options """ + + if not input_measures: + return None, 0.0, 0.0 + if budget is not None: opt = GainOptimiser( input_measures, max_cost=budget, max_gain=(sub_target_gain or float("inf")), From 52e99f179b87fce98df9e3b10598618fb4d33983 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sun, 30 Nov 2025 21:30:53 +0000 Subject: [PATCH 096/202] handling property with sap score of 217 --- backend/apis/GoogleSolarApi.py | 4 ++-- backend/engine/engine.py | 2 -- backend/ml_models/AnnualBillSavings.py | 14 +++++++------- 3 files changed, 9 insertions(+), 11 deletions(-) diff --git a/backend/apis/GoogleSolarApi.py b/backend/apis/GoogleSolarApi.py index 00fb98d8..a0970ac5 100644 --- a/backend/apis/GoogleSolarApi.py +++ b/backend/apis/GoogleSolarApi.py @@ -704,7 +704,7 @@ class GoogleSolarApi: # We set the target rating to EPC C, which is the typical EPC rating we would expect the # property to achieve post retrofit of just the fabric "energy_consumption": cls.estimate_new_consumption( - current_energy_efficiency=p.data["current-energy-efficiency"], + current_energy_efficiency=min(p.data["current-energy-efficiency"], 100), target_efficiency="69", current_consumption=p.estimate_electrical_consumption( assumed_ashp_efficiency=assumptions.AVERAGE_ASHP_EFFICIENCY, exclusions=body.exclusions @@ -723,7 +723,7 @@ class GoogleSolarApi: # We set the target rating to EPC C, which is the typical EPC rating we would expect the # property to achieve post retrofit of just the fabric "energy_consumption": cls.estimate_new_consumption( - current_energy_efficiency=p.data["current-energy-efficiency"], + current_energy_efficiency=min(p.data["current-energy-efficiency"], 100), target_efficiency="69", current_consumption=p.estimate_electrical_consumption( assumed_ashp_efficiency=assumptions.AVERAGE_ASHP_EFFICIENCY, exclusions=body.exclusions diff --git a/backend/engine/engine.py b/backend/engine/engine.py index 93ddc085..215adfe4 100644 --- a/backend/engine/engine.py +++ b/backend/engine/engine.py @@ -472,8 +472,6 @@ async def model_engine(body: PlanTriggerRequest): created_at = datetime.now().isoformat() start_ms = int(time.time() * 1000) - # TODO: if the measure is already installed, it should actually be the very first phase - try: session.begin() logger.info("Getting the inputs") diff --git a/backend/ml_models/AnnualBillSavings.py b/backend/ml_models/AnnualBillSavings.py index 4a2a6f1f..569d43c1 100644 --- a/backend/ml_models/AnnualBillSavings.py +++ b/backend/ml_models/AnnualBillSavings.py @@ -26,21 +26,21 @@ class AnnualBillSavings: AVERAGE_ELECTRICITY_CONSUMPTION = 2700 AVERAGE_GAS_CONSUMPTION = 11500 - # Latest price cap figures from Ofgem are for April 2024 + # Latest price cap figures from Ofgem are for Jan 2026 to March 2026 # https://www.ofgem.gov.uk/energy-price-cap - ELECTRICITY_PRICE_CAP = 0.2573 - GAS_PRICE_CAP = 0.0633 - # This is the most recent export payment figure, at 9.28p/kWh + ELECTRICITY_PRICE_CAP = 0.2769 + GAS_PRICE_CAP = 0.593 + # This is the most recent export payment figure, at 13p/kWh - Updated Nov 2025 # Smart export guarantee rates can be found here: # https://www.sunsave.energy/solar-panels-advice/exporting-to-the-grid/best-seg-rates - ELECTRICITY_EXPORT_PAYMENT = 0.0928 + ELECTRICITY_EXPORT_PAYMENT = 0.13 # This is a weighted mean of the price caps, using the consumption figures above as weights PRICE_FACTOR = 0.09549999999999999 # Daily standard charge, based on average across England, Scotland and Wales, and includes VAT - DAILY_STANDARD_CHARGE_GAS = 0.2982 - DAILY_STANDARD_CHARGE_ELECTRICITY = 0.5137 + DAILY_STANDARD_CHARGE_GAS = 0.3509 + DAILY_STANDARD_CHARGE_ELECTRICITY = 0.5475 # Based on https://www.nottenergy.com/advice-and-tools/project-energy-cost-comparison # For July 2024. These quotes are based on the east midlands region, so we From ce75f5e9b438f29275be3d8b98230009d45c8977 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sun, 30 Nov 2025 21:32:31 +0000 Subject: [PATCH 097/202] covering another heating system type --- backend/app/assumptions.py | 1 + 1 file changed, 1 insertion(+) diff --git a/backend/app/assumptions.py b/backend/app/assumptions.py index 8edc03db..31acbe29 100644 --- a/backend/app/assumptions.py +++ b/backend/app/assumptions.py @@ -90,6 +90,7 @@ DESCRIPTIONS_TO_FUEL_TYPES = { "Oil range cooker, no cylinder thermostat": {"fuel": "Oil", "cop": 0.85}, "Air source heat pump, Warm air, electric": {"fuel": "Electricity", "cop": AVERAGE_ASHP_EFFICIENCY / 100}, "Boiler and underfloor heating, electric": {"fuel": "Electricity", "cop": 1}, + "Community scheme with CHP, mains gas": {"fuel": "Natural Gas", "cop": 0.85}, } # These are the measure types where if there is a ventilation recommendation, we force the inclusion of it From b84dc3a6a88309156f0b5fedd8224edee50206b0 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sun, 30 Nov 2025 21:40:10 +0000 Subject: [PATCH 098/202] put in some placeholder code for fast deletes --- .../db/functions/recommendations_functions.py | 37 ++++++++++++++++--- 1 file changed, 31 insertions(+), 6 deletions(-) diff --git a/backend/app/db/functions/recommendations_functions.py b/backend/app/db/functions/recommendations_functions.py index 48ed3de3..0a3b7164 100644 --- a/backend/app/db/functions/recommendations_functions.py +++ b/backend/app/db/functions/recommendations_functions.py @@ -1,5 +1,5 @@ from tqdm import tqdm -from sqlalchemy import insert, delete +from sqlalchemy import insert, delete, text from sqlalchemy.orm import Session from sqlalchemy.exc import SQLAlchemyError from backend.app.db.models.recommendations import ( @@ -175,6 +175,34 @@ def chunked(iterable, size=100): yield iterable[i:i + size] +def fast_delete_recommendations(session, chunk): + values = ",".join(f"({pid})" for pid in chunk) + + sql = text(f""" + WITH ids(property_id) AS ( + VALUES {values} + ) + DELETE FROM recommendation r + USING ids + WHERE r.property_id = ids.property_id; + """) + + session.execute(sql) + + # Note; we may be able to go even faster like this: + # def delete_with_temp_table(session, chunk): + # session.execute(text("CREATE TEMP TABLE tmp_ids (id bigint) ON COMMIT DROP;")) + # + # insert_sql = "INSERT INTO tmp_ids (id) VALUES " + ",".join(f"({i})" for i in chunk) + # session.execute(text(insert_sql)) + # + # session.execute(text(""" + # DELETE FROM recommendation r + # USING tmp_ids t + # WHERE r.property_id = t.id; + # """)) + + def clear_portfolio(session: Session, portfolio_id: int, batch_size=100): # -------------------------- # Collect IDs up-front @@ -252,14 +280,11 @@ def clear_portfolio(session: Session, portfolio_id: int, batch_size=100): tqdm.write("Deleting Scenarios…") session.execute(delete(Scenario).where(Scenario.portfolio_id == portfolio_id)) - # Recommendations + # Recommendations - fast delete for chunk in tqdm(chunked(property_ids, batch_size), total=(len(property_ids) // batch_size) + 1, desc="Deleting Recommendations"): - session.execute( - delete(Recommendation) - .where(Recommendation.property_id.in_(chunk)) - ) + fast_delete_recommendations(session, chunk) # Inspections for chunk in tqdm(chunked(property_ids, batch_size), From 6c8b65f3fb49b2a2fd1cbe56d49e7a1f32018a8d Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sun, 30 Nov 2025 22:01:03 +0000 Subject: [PATCH 099/202] removing tqdm --- .../db/functions/recommendations_functions.py | 130 +++++++++++------- 1 file changed, 84 insertions(+), 46 deletions(-) diff --git a/backend/app/db/functions/recommendations_functions.py b/backend/app/db/functions/recommendations_functions.py index 0a3b7164..51ff59c5 100644 --- a/backend/app/db/functions/recommendations_functions.py +++ b/backend/app/db/functions/recommendations_functions.py @@ -176,18 +176,18 @@ def chunked(iterable, size=100): def fast_delete_recommendations(session, chunk): - values = ",".join(f"({pid})" for pid in chunk) + placeholders = ",".join([f"({i})" for i in range(len(chunk))]) sql = text(f""" - WITH ids(property_id) AS ( - VALUES {values} - ) - DELETE FROM recommendation r - USING ids - WHERE r.property_id = ids.property_id; - """) + WITH ids(property_id) AS ( + VALUES {placeholders} + ) + DELETE FROM recommendation r + USING ids + WHERE r.property_id = ids.property_id; + """) - session.execute(sql) + session.execute(sql, execution_options={"synchronize_session": False}) # Note; we may be able to go even faster like this: # def delete_with_temp_table(session, chunk): @@ -204,6 +204,9 @@ def fast_delete_recommendations(session, chunk): def clear_portfolio(session: Session, portfolio_id: int, batch_size=100): + def print_progress(prefix, i, total): + print(f"{prefix} ({i}/{total})") + # -------------------------- # Collect IDs up-front # -------------------------- @@ -227,95 +230,130 @@ def clear_portfolio(session: Session, portfolio_id: int, batch_size=100): .filter(FundingPackage.plan_id.in_(plan_ids)) ] - # -------------------------- - # Batch deletes with tqdm - # -------------------------- + # ========== BATCH HELPERS ========== + def chunked(lst, n): + for i in range(0, len(lst), n): + yield lst[i:i + n] - # RecommendationMaterials - for chunk in tqdm(chunked(recommendation_ids, batch_size), - total=(len(recommendation_ids) // batch_size) + 1, - desc="Deleting RecommendationMaterials"): + # -------------------------- + # Deleting RecommendationMaterials + # -------------------------- + rm_chunks = list(chunked(recommendation_ids, batch_size)) + total = len(rm_chunks) + for i, chunk in enumerate(rm_chunks, start=1): + print_progress("Deleting RecommendationMaterials", i, total) session.execute( delete(RecommendationMaterials) .where(RecommendationMaterials.recommendation_id.in_(chunk)) ) + # -------------------------- # PlanRecommendations - for chunk in tqdm(chunked(plan_ids, batch_size), - total=(len(plan_ids) // batch_size) + 1, - desc="Deleting PlanRecommendations"): + # -------------------------- + pr_chunks = list(chunked(plan_ids, batch_size)) + total = len(pr_chunks) + for i, chunk in enumerate(pr_chunks, start=1): + print_progress("Deleting PlanRecommendations", i, total) session.execute( delete(PlanRecommendations) .where(PlanRecommendations.plan_id.in_(chunk)) ) + # -------------------------- # FundingPackageMeasures - for chunk in tqdm(chunked(funding_package_ids, batch_size), - total=(len(funding_package_ids) // batch_size) + 1, - desc="Deleting FundingPackageMeasures"): + # -------------------------- + fpm_chunks = list(chunked(funding_package_ids, batch_size)) + total = len(fpm_chunks) + for i, chunk in enumerate(fpm_chunks, start=1): + print_progress("Deleting FundingPackageMeasures", i, total) session.execute( delete(FundingPackageMeasures) .where(FundingPackageMeasures.funding_package_id.in_(chunk)) ) - # FundingPackage - for chunk in tqdm(chunked(plan_ids, batch_size), - total=(len(plan_ids) // batch_size) + 1, - desc="Deleting FundingPackages"): + # -------------------------- + # FundingPackages + # -------------------------- + fp_chunks = list(chunked(plan_ids, batch_size)) + total = len(fp_chunks) + for i, chunk in enumerate(fp_chunks, start=1): + print_progress("Deleting FundingPackages", i, total) session.execute( delete(FundingPackage) .where(FundingPackage.plan_id.in_(chunk)) ) + # -------------------------- # Plans - for chunk in tqdm(chunked(plan_ids, batch_size), - total=(len(plan_ids) // batch_size) + 1, - desc="Deleting Plans"): + # -------------------------- + plan_chunks = list(chunked(plan_ids, batch_size)) + total = len(plan_chunks) + for i, chunk in enumerate(plan_chunks, start=1): + print_progress("Deleting Plans", i, total) session.execute( delete(Plan) .where(Plan.id.in_(chunk)) ) - # Scenarios (no chunks needed) - tqdm.write("Deleting Scenarios…") - session.execute(delete(Scenario).where(Scenario.portfolio_id == portfolio_id)) + # -------------------------- + # Scenarios + # -------------------------- + print("Deleting Scenarios…") + session.execute( + delete(Scenario) + .where(Scenario.portfolio_id == portfolio_id) + ) - # Recommendations - fast delete - for chunk in tqdm(chunked(property_ids, batch_size), - total=(len(property_ids) // batch_size) + 1, - desc="Deleting Recommendations"): + # -------------------------- + # Recommendations (fast delete) + # -------------------------- + rec_chunks = list(chunked(property_ids, batch_size)) + total = len(rec_chunks) + for i, chunk in enumerate(rec_chunks, start=1): + print_progress("Deleting Recommendations", i, total) fast_delete_recommendations(session, chunk) + # -------------------------- # Inspections - for chunk in tqdm(chunked(property_ids, batch_size), - total=(len(property_ids) // batch_size) + 1, - desc="Deleting Inspections"): + # -------------------------- + insp_chunks = list(chunked(property_ids, batch_size)) + total = len(insp_chunks) + for i, chunk in enumerate(insp_chunks, start=1): + print_progress("Deleting Inspections", i, total) session.execute( delete(InspectionModel) .where(InspectionModel.property_id.in_(chunk)) ) - # Property-related detail tables - tqdm.write("Deleting PropertyTargetsModel…") + # -------------------------- + # PropertyTargetsModel + # -------------------------- + print("Deleting PropertyTargetsModel…") session.execute( delete(PropertyTargetsModel) .where(PropertyTargetsModel.portfolio_id == portfolio_id) ) - tqdm.write("Deleting PropertyDetailsEpcModel…") + # -------------------------- + # PropertyDetailsEpcModel + # -------------------------- + print("Deleting PropertyDetailsEpcModel…") session.execute( delete(PropertyDetailsEpcModel) .where(PropertyDetailsEpcModel.portfolio_id == portfolio_id) ) + # -------------------------- # Properties - for chunk in tqdm(chunked(property_ids, batch_size), - total=(len(property_ids) // batch_size) + 1, - desc="Deleting Properties"): + # -------------------------- + prop_chunks = list(chunked(property_ids, batch_size)) + total = len(prop_chunks) + for i, chunk in enumerate(prop_chunks, start=1): + print_progress("Deleting Properties", i, total) session.execute( delete(PropertyModel) .where(PropertyModel.id.in_(chunk)) ) session.commit() - tqdm.write("Portfolio cleared.") + print("Portfolio cleared.") From f75c630dd1132aae493cc0955aa5f20dd102922c Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sun, 30 Nov 2025 22:14:48 +0000 Subject: [PATCH 100/202] actually removed tqdm --- backend/app/db/functions/recommendations_functions.py | 1 - 1 file changed, 1 deletion(-) diff --git a/backend/app/db/functions/recommendations_functions.py b/backend/app/db/functions/recommendations_functions.py index 51ff59c5..9d01f288 100644 --- a/backend/app/db/functions/recommendations_functions.py +++ b/backend/app/db/functions/recommendations_functions.py @@ -1,4 +1,3 @@ -from tqdm import tqdm from sqlalchemy import insert, delete, text from sqlalchemy.orm import Session from sqlalchemy.exc import SQLAlchemyError From 9e7ed1efd5b5cbc99a536023e8f7401b1f227c45 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 1 Dec 2025 09:35:07 +0000 Subject: [PATCH 101/202] making epc searching more specific --- backend/SearchEpc.py | 14 +++++++-- etl/find_my_epc/RetrieveFindMyEpc.py | 43 ++++++++++++++++++++++++---- 2 files changed, 48 insertions(+), 9 deletions(-) diff --git a/backend/SearchEpc.py b/backend/SearchEpc.py index 686843c3..5ceac5f9 100644 --- a/backend/SearchEpc.py +++ b/backend/SearchEpc.py @@ -447,11 +447,19 @@ class SearchEpc: ] elif best_match1[1] > best_match2[1]: - # Get all of the scores - rows_filtered = [r for r in rows if ", ".join([r["address"], r["posttown"]]) == best_match1[0]] + # Get all of the scores - make sure we keep uprn + rows_filtered = [ + r for r in rows if + ( + (", ".join([r["address"], r["posttown"]]) == best_match1[0]) or + (str(r["uprn"]) == str(self.uprn)) + ) + ] else: # Get all of the scores - rows_filtered = [r for r in rows if r["address"] == best_match2[0]] + rows_filtered = [ + r for r in rows if (r["address"] == best_match2[0]) or (str(r["uprn"]) == str(self.uprn)) + ] # If we have multiple, we filter on newest lodgment date if len(rows_filtered) > 1: diff --git a/etl/find_my_epc/RetrieveFindMyEpc.py b/etl/find_my_epc/RetrieveFindMyEpc.py index a7767273..5bb0e89c 100644 --- a/etl/find_my_epc/RetrieveFindMyEpc.py +++ b/etl/find_my_epc/RetrieveFindMyEpc.py @@ -22,7 +22,9 @@ class RetrieveFindMyEpc: 'Chrome/111.0.0.0 Safari/537.36' } - def __init__(self, address: str, postcode: str, rrn: str = None, address_postal_town: str = ""): + def __init__( + self, address: str, postcode: str, rrn: str = None, address_postal_town: str = "", sap_rating: int = None + ): """ This class is tasked with retrieving the latest EPC data from the find my epc website :param address: The address of the property @@ -40,6 +42,8 @@ class RetrieveFindMyEpc: if self.address_postal_town: self.address_postal_town = self.address_postal_town.replace(",", "").replace(" ", "").lower() + self.sap_rating = sap_rating + @staticmethod def extract_low_carbon_sources(soup): # Find the section header @@ -351,7 +355,7 @@ class RetrieveFindMyEpc: postcode_res = BeautifulSoup(postcode_response.text, features="html.parser") rows = postcode_res.find_all('tr', class_='govuk-table__row') - extracted_table = [] + extracted_table, backup_flat = [], [] for row in rows: # Extract the address and URL address_tag = row.find('a', class_='govuk-link') @@ -373,6 +377,17 @@ class RetrieveFindMyEpc: ) if no_primary_match and no_backup_match: + if self.address_cleaned.startswith("flat"): + # We have a flat address, so we can try and match without the flat number + flat_removed_address = self.address_cleaned[4:] + if extracted_address_cleaned.startswith(flat_removed_address): + # We have a backup match + backup_flat.append( + { + "extracted_address": extracted_address, + "extracted_address_url": extracted_address_url, + } + ) continue # If the address is a match, we can extract the data @@ -391,9 +406,12 @@ class RetrieveFindMyEpc: } ) - if not extracted_table: + if not extracted_table and not backup_flat: raise ValueError("No EPC found") + if not extracted_table: + extracted_table = deepcopy(backup_flat) + if len(extracted_table) > 1: # We take the one with the most recent expiry date extracted_table = sorted(extracted_table, key=lambda x: x['expiry_date'], reverse=True) @@ -439,6 +457,12 @@ class RetrieveFindMyEpc: potential_rating = ratings.split(".")[1] current_sap = int(current_rating.split(' ')[-1]) + if current_sap != self.sap_rating: + raise ValueError( + f"SAP rating mismatch: expected {self.sap_rating}, got {current_sap} for address {self.address}, " + f"postcode {self.postcode}" + ) + # Retrieve the energy consumption bills = address_res.find('div', {'id': 'bills-affected'}) bills_list = bills.find_all('li') @@ -736,12 +760,15 @@ class RetrieveFindMyEpc: return formatted_recommendations @classmethod - def get_from_epc(cls, epc, epc_page_source=None, rrn=None, address_postal_town=None): + def get_from_epc(cls, epc, epc_page_source=None, rrn=None, address_postal_town=None, sap_rating=None): if epc_page_source is not None and rrn is None: raise ValueError("rrn must be provided if epc_page_source is provided") - searcher = cls(address=epc["address"], postcode=epc["postcode"], address_postal_town=address_postal_town) + searcher = cls( + address=epc["address"], postcode=epc["postcode"], address_postal_town=address_postal_town, + sap_rating=sap_rating + ) find_epc_data = searcher.retrieve_newest_find_my_epc_data(epc_page_source=epc_page_source, rrn=rrn) non_invasive_recommendations = { @@ -797,11 +824,15 @@ class RetrieveFindMyEpc: modified[k] = config_address attempts.append(modified) + sap_rating = float(epc["current-energy-efficiency"]) + # Iterate attempts last_error = None for idx, attempt in enumerate(attempts, start=1): try: - return cls.get_from_epc(attempt, epc_page, rrn=rrn, address_postal_town=address_postal_town) + return cls.get_from_epc( + attempt, epc_page, rrn=rrn, address_postal_town=address_postal_town, sap_rating=sap_rating + ) except Exception as e: last_error = e logger.error(f"Attempt {idx} failed: {e}") From 753ef66f4891b0070d549f32c1f91b4bb70e720f Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 1 Dec 2025 13:40:07 +0000 Subject: [PATCH 102/202] allowing us not to retrieve find my epc data, when we very clearly don't have the right epc --- backend/apis/GoogleSolarApi.py | 2 +- .../db/functions/recommendations_functions.py | 32 ++++++----------- backend/app/plan/utils.py | 2 +- etl/find_my_epc/RetrieveFindMyEpc.py | 35 ++++++++++++------- 4 files changed, 34 insertions(+), 37 deletions(-) diff --git a/backend/apis/GoogleSolarApi.py b/backend/apis/GoogleSolarApi.py index a0970ac5..f7aa311f 100644 --- a/backend/apis/GoogleSolarApi.py +++ b/backend/apis/GoogleSolarApi.py @@ -723,7 +723,7 @@ class GoogleSolarApi: # We set the target rating to EPC C, which is the typical EPC rating we would expect the # property to achieve post retrofit of just the fabric "energy_consumption": cls.estimate_new_consumption( - current_energy_efficiency=min(p.data["current-energy-efficiency"], 100), + current_energy_efficiency=min(int(p.data["current-energy-efficiency"]), 100), target_efficiency="69", current_consumption=p.estimate_electrical_consumption( assumed_ashp_efficiency=assumptions.AVERAGE_ASHP_EFFICIENCY, exclusions=body.exclusions diff --git a/backend/app/db/functions/recommendations_functions.py b/backend/app/db/functions/recommendations_functions.py index 9d01f288..55bf5824 100644 --- a/backend/app/db/functions/recommendations_functions.py +++ b/backend/app/db/functions/recommendations_functions.py @@ -175,31 +175,19 @@ def chunked(iterable, size=100): def fast_delete_recommendations(session, chunk): - placeholders = ",".join([f"({i})" for i in range(len(chunk))]) + placeholders = ",".join(["(:p{})".format(i) for i in range(len(chunk))]) + params = {f"p{i}": chunk[i] for i in range(len(chunk))} sql = text(f""" - WITH ids(property_id) AS ( - VALUES {placeholders} - ) - DELETE FROM recommendation r - USING ids - WHERE r.property_id = ids.property_id; - """) + WITH ids(property_id) AS ( + VALUES {placeholders} + ) + DELETE FROM recommendation r + USING ids + WHERE r.property_id = ids.property_id; + """) - session.execute(sql, execution_options={"synchronize_session": False}) - - # Note; we may be able to go even faster like this: - # def delete_with_temp_table(session, chunk): - # session.execute(text("CREATE TEMP TABLE tmp_ids (id bigint) ON COMMIT DROP;")) - # - # insert_sql = "INSERT INTO tmp_ids (id) VALUES " + ",".join(f"({i})" for i in chunk) - # session.execute(text(insert_sql)) - # - # session.execute(text(""" - # DELETE FROM recommendation r - # USING tmp_ids t - # WHERE r.property_id = t.id; - # """)) + session.execute(sql, params, execution_options={"synchronize_session": False}) def clear_portfolio(session: Session, portfolio_id: int, batch_size=100): diff --git a/backend/app/plan/utils.py b/backend/app/plan/utils.py index d8c54963..ebf1dd9c 100644 --- a/backend/app/plan/utils.py +++ b/backend/app/plan/utils.py @@ -43,7 +43,7 @@ def patch_epc(patch, epc_records): if patch_variable in ["address", "postcode"]: continue - if patch_value == "": + if patch_value in ["", None]: continue if patch_variable in epc_records["original_epc"]: epc_records["original_epc"][patch_variable] = patch_value diff --git a/etl/find_my_epc/RetrieveFindMyEpc.py b/etl/find_my_epc/RetrieveFindMyEpc.py index 5bb0e89c..2e23c7e6 100644 --- a/etl/find_my_epc/RetrieveFindMyEpc.py +++ b/etl/find_my_epc/RetrieveFindMyEpc.py @@ -407,7 +407,10 @@ class RetrieveFindMyEpc: ) if not extracted_table and not backup_flat: - raise ValueError("No EPC found") + # This is a relatively new change, as of November 2025, but we see cases where properties do not + # have data appearing on the find my EPC website, particularly for older EPCs. In this case, we allo + # for us to not find any information and return nothing + return None, None if not extracted_table: extracted_table = deepcopy(backup_flat) @@ -430,6 +433,11 @@ class RetrieveFindMyEpc: if epc_page_source is None and rrn is None: chosen_epc, rrn = self._find_epc_page() + if chosen_epc is None: + # We have no resulting data + logger.info("No EPC found for address %s, postcode %s", self.address, self.postcode) + return {} + address_response = requests.get(chosen_epc, headers=self.HEADERS) epc_page_source = address_response.text address_res = BeautifulSoup(address_response.text, features="html.parser") @@ -458,10 +466,11 @@ class RetrieveFindMyEpc: current_sap = int(current_rating.split(' ')[-1]) if current_sap != self.sap_rating: - raise ValueError( - f"SAP rating mismatch: expected {self.sap_rating}, got {current_sap} for address {self.address}, " - f"postcode {self.postcode}" - ) + # This means we likely have the wrong data. If we are in this scenario, we return nothing + return { + "epc_certificate": None, + "page_source": None, + } # Retrieve the energy consumption bills = address_res.find('div', {'id': 'bills-affected'}) @@ -775,21 +784,21 @@ class RetrieveFindMyEpc: "uprn": epc["uprn"], "address": epc["address"], "postcode": epc["postcode"], - "recommendations": find_epc_data["recommendations"], + "recommendations": find_epc_data.get("recommendations", []), } # We need to add the patch information patch = { - "current-energy-rating": find_epc_data["current_epc_rating"], - "current-energy-efficiency": find_epc_data["current_epc_efficiency"], - "potential-energy-rating": find_epc_data["potential_epc_rating"], - "potential-energy-efficiency": find_epc_data["potential_epc_efficiency"], - **find_epc_data["epc_data"], + "current-energy-rating": find_epc_data.get("current_epc_rating"), + "current-energy-efficiency": find_epc_data.get("current_epc_efficiency"), + "potential-energy-rating": find_epc_data.get("potential_epc_rating"), + "potential-energy-efficiency": find_epc_data.get("potential_epc_efficiency"), + **find_epc_data.get("epc_data", {}), } page_source = { - "rrn": find_epc_data["epc_certificate"], - "page_source": find_epc_data["page_source"] + "rrn": find_epc_data.get("epc_certificate"), + "page_source": find_epc_data.get("page_source") } return non_invasive_recommendations, patch, page_source From 4e07049edb50a3569a8fc91816570c2cf44df13a Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 1 Dec 2025 16:54:18 +0000 Subject: [PATCH 103/202] debugging bigger peabody portfolio - setting default floor type with warning --- backend/Property.py | 7 ++++++- backend/app/plan/router.py | 4 +++- .../Nov 2025 Consulting Project/data_prep.py | 20 +++++++++---------- etl/find_my_epc/RetrieveFindMyEpc.py | 1 + 4 files changed, 20 insertions(+), 12 deletions(-) diff --git a/backend/Property.py b/backend/Property.py index bc5660e8..dd92a902 100644 --- a/backend/Property.py +++ b/backend/Property.py @@ -1078,7 +1078,12 @@ class Property: elif self.floor["thermal_transmittance"] is not None: self.floor_type = "solid" else: - raise NotImplementedError("Implement this floor type") + # in this case, it's not super clear what the floor type is, so we default - this is a temp + logger.warning( + f"Could not determine floor type, given: '{self.floor['original_description']}', defaulting to " + f"suspended for property {self.uprn}" + ) + self.floor_type = "suspended" @staticmethod def _extract_component( diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index 9216eac0..3ec1bf5f 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -120,7 +120,9 @@ async def trigger_plan_entrypoint(body: PlanTriggerRequest): f"Chunk {i} sent to SQS. Rows {index_start}–{index_end}. Message ID: {response.get('MessageId')}" ) - await asyncio.sleep(random.uniform(0.1, 0.5)) # Delay to reduce spike pressure + await asyncio.sleep(0.1) # Small delay to avoid SQS throttling + + # await asyncio.sleep(random.uniform(0.1, 0.5)) # Delay to reduce spike pressure except Exception as e: logger.error("Error during Excel file handling: %s", e) diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/data_prep.py b/etl/customers/peabody/Nov 2025 Consulting Project/data_prep.py index e05d82e4..76871b39 100644 --- a/etl/customers/peabody/Nov 2025 Consulting Project/data_prep.py +++ b/etl/customers/peabody/Nov 2025 Consulting Project/data_prep.py @@ -210,9 +210,9 @@ archetypes = sustainability_data[archetype_variables + ["UPRN"]].dropna().groupb archetypes["Cumulative Count"] = archetypes["Count"].cumsum() archetypes["Cumulative Proportion"] = archetypes["Cumulative Count"] / archetypes["Count"].sum() -archetypes_85 = archetypes[archetypes["Cumulative Proportion"] <= 0.80] -archetypes_85["Archetypes_85_reference"] = archetypes_85.index + 1 -archetypes_85["Archetypes_85_reference"] = "Archetype_Sample_" + archetypes_85["Archetypes_85_reference"].astype(str) +archetypes_95 = archetypes[archetypes["Cumulative Proportion"] <= 0.97] +archetypes_95["Archetypes_95_reference"] = archetypes_95.index + 1 +archetypes_95["Archetypes_95_reference"] = "Archetype_Sample_" + archetypes_95["Archetypes_95_reference"].astype(str) # For the sample, look for invalid looking UPRNs and remove them. sample_from = sustainability_data.copy() @@ -227,13 +227,13 @@ sample_from = sample_from[~pd.isnull(sample_from["UPRN"])] # We now take a sample of the properties that represent 85% of the total properties sample_from = sample_from.merge( - archetypes_85, + archetypes_95, on=archetype_variables, how="inner" ) -# We take 1 random property, by archetype 85 reference -modelling_sample = sample_from.groupby("Archetypes_85_reference").apply( +# We take 1 random property, by archetype reference +modelling_sample = sample_from.groupby("Archetypes_95_reference").apply( lambda x: x.sample(1, random_state=42) ).reset_index(drop=True) @@ -248,7 +248,7 @@ def compare_distributions(full_df, sample_df, column): for col in archetype_variables: print(f"--- {col} ---") - print(compare_distributions(sustainability_data, modelling_sample, col)) + print(compare_distributions(sustainability_data, sample_from, col)) # prepare modelling_sample["domna_property_id"] = modelling_sample.index + 1 @@ -263,7 +263,7 @@ modelling_sample = modelling_sample.rename( } ) -modelling_sample["domna_built_form"] = modelling_sample["domna_built_form"].map( +modelling_sample["landlord_built_form"] = modelling_sample["landlord_built_form"].map( { "MidTerrace": "Mid-Terrace", "EndTerrace": "End-Terrace", @@ -274,7 +274,7 @@ modelling_sample["domna_built_form"] = modelling_sample["domna_built_form"].map( } ) -if pd.isnull(modelling_sample["domna_built_form"]).sum(): +if pd.isnull(modelling_sample["landlord_built_form"]).sum(): raise ValueError("Some built forms are null after mapping") @@ -292,7 +292,7 @@ modelling_sample.to_excel( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/modelling_sample.xlsx", ) # Save the archetype definitions -archetypes_85.to_excel( +archetypes_95.to_excel( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/archetypes_85.xlsx", ) # Save the full archetypes diff --git a/etl/find_my_epc/RetrieveFindMyEpc.py b/etl/find_my_epc/RetrieveFindMyEpc.py index 2e23c7e6..e0370725 100644 --- a/etl/find_my_epc/RetrieveFindMyEpc.py +++ b/etl/find_my_epc/RetrieveFindMyEpc.py @@ -684,6 +684,7 @@ class RetrieveFindMyEpc: "Heating controls recommendation": [], "Replace boiler with Band A condensing boiler": ["boiler_upgrade"], "Band A condensing gas boiler": ["boiler_upgrade"], + "Install Band A condensing heating unit": ["boiler_upgrade"], "Solar panel recommendation": [], "Double glazing recommendation": [], "Solid wall insulation recommendation": [], From 08908659f6d9f6d93669009b5dc409660b74bf67 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 1 Dec 2025 20:28:32 +0000 Subject: [PATCH 104/202] making plan api timeout longer and dispatch messages faster --- backend/app/plan/router.py | 2 +- serverless.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index 3ec1bf5f..e6e6052f 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -120,7 +120,7 @@ async def trigger_plan_entrypoint(body: PlanTriggerRequest): f"Chunk {i} sent to SQS. Rows {index_start}–{index_end}. Message ID: {response.get('MessageId')}" ) - await asyncio.sleep(0.1) # Small delay to avoid SQS throttling + await asyncio.sleep(0.05) # Small delay to avoid SQS throttling # await asyncio.sleep(random.uniform(0.1, 0.5)) # Delay to reduce spike pressure diff --git a/serverless.yml b/serverless.yml index 6eea03eb..7e4d78b6 100644 --- a/serverless.yml +++ b/serverless.yml @@ -48,7 +48,7 @@ functions: fastapi-backend: handler: backend.app.main.handler - timeout: 30 + timeout: 60 memorySize: 512 role: FastApiLambdaRole events: From b5ccdc41875216243d57716f20380628b82aeda5 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 2 Dec 2025 10:49:18 +0000 Subject: [PATCH 105/202] corrected gas fuel price --- backend/ml_models/AnnualBillSavings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/ml_models/AnnualBillSavings.py b/backend/ml_models/AnnualBillSavings.py index 569d43c1..27d1b5be 100644 --- a/backend/ml_models/AnnualBillSavings.py +++ b/backend/ml_models/AnnualBillSavings.py @@ -29,7 +29,7 @@ class AnnualBillSavings: # Latest price cap figures from Ofgem are for Jan 2026 to March 2026 # https://www.ofgem.gov.uk/energy-price-cap ELECTRICITY_PRICE_CAP = 0.2769 - GAS_PRICE_CAP = 0.593 + GAS_PRICE_CAP = 0.0593 # This is the most recent export payment figure, at 13p/kWh - Updated Nov 2025 # Smart export guarantee rates can be found here: # https://www.sunsave.energy/solar-panels-advice/exporting-to-the-grid/best-seg-rates From 0876e948c9f8c496e0b2aedfafa000468dad7fb8 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 3 Dec 2025 22:07:52 +0000 Subject: [PATCH 106/202] increase timeout of engine dispatcher --- .../Nov 2025 Consulting Project/data_prep.py | 187 +++++++++--------- serverless.yml | 2 +- 2 files changed, 95 insertions(+), 94 deletions(-) diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/data_prep.py b/etl/customers/peabody/Nov 2025 Consulting Project/data_prep.py index 76871b39..4dd07ee5 100644 --- a/etl/customers/peabody/Nov 2025 Consulting Project/data_prep.py +++ b/etl/customers/peabody/Nov 2025 Consulting Project/data_prep.py @@ -74,125 +74,125 @@ sustainability_data["Floor Area Band"] = sustainability_data["Total Floor Area ( # Roof insulation category # 1) Split roof insulation into > 100mm loft and <= 100mm loft sustainability_data["Roof Insulation Category"] = sustainability_data["Roof Insulation"].copy() -sustainability_data["Roof Insulation Category"] = np.where( - sustainability_data["Roof Insulation Category"].isin( - ['mm200', 'mm300', 'mm250', 'mm150', 'mm270', 'mm400', 'mm350'], - ), - "LI > 100mm", - sustainability_data["Roof Insulation Category"], -) +# sustainability_data["Roof Insulation Category"] = np.where( +# sustainability_data["Roof Insulation Category"].isin( +# ['mm200', 'mm300', 'mm250', 'mm150', 'mm270', 'mm400', 'mm350'], +# ), +# "LI > 100mm", +# sustainability_data["Roof Insulation Category"], +# ) -sustainability_data["Roof Insulation Category"] = np.where( - sustainability_data["Roof Insulation Category"].isin( - ['mm100', 'mm50', 'mm75', 'mm25'], - ), - "LI <= 100mm", - sustainability_data["Roof Insulation Category"], -) +# sustainability_data["Roof Insulation Category"] = np.where( +# sustainability_data["Roof Insulation Category"].isin( +# ['mm100', 'mm50', 'mm75', 'mm25'], +# ), +# "LI <= 100mm", +# sustainability_data["Roof Insulation Category"], +# ) # 2) Group all of the glazed together (e.g. double glazed, secondary glazed, triple glazed) sustainability_data["Glazing Type"] = sustainability_data["Glazing"].copy() -sustainability_data["Glazing Type"] = np.where( - sustainability_data["Glazing Type"].isin( - ['Double 2002 or later', 'Double before 2002', 'Double but age unknown', 'DoubleKnownData'] - ), - "Double Glazed", - sustainability_data["Glazing Type"], -) -sustainability_data["Glazing Type"] = np.where( - sustainability_data["Glazing Type"].isin(['Triple', 'TripleKnownData']), - "Triple Glazed", - sustainability_data["Glazing Type"], -) +# sustainability_data["Glazing Type"] = np.where( +# sustainability_data["Glazing Type"].isin( +# ['Double 2002 or later', 'Double before 2002', 'Double but age unknown', 'DoubleKnownData'] +# ), +# "Double Glazed", +# sustainability_data["Glazing Type"], +# ) +# sustainability_data["Glazing Type"] = np.where( +# sustainability_data["Glazing Type"].isin(['Triple', 'TripleKnownData']), +# "Triple Glazed", +# sustainability_data["Glazing Type"], +# ) # 3) Group up boiler efficiency A, B-D, E - G? or someting like this sustainability_data["Boiler Efficiency Group"] = sustainability_data["Boiler Efficiency"].copy() -sustainability_data["Boiler Efficiency Group"] = np.where( - sustainability_data["Boiler Efficiency Group"].isin(['B', 'C', 'D']), - "B-D", - sustainability_data["Boiler Efficiency Group"], -) -sustainability_data["Boiler Efficiency Group"] = np.where( - sustainability_data["Boiler Efficiency Group"].isin(['E', 'F', 'G']), - "E-G", - sustainability_data["Boiler Efficiency Group"], -) +# sustainability_data["Boiler Efficiency Group"] = np.where( +# sustainability_data["Boiler Efficiency Group"].isin(['B', 'C', 'D']), +# "B-D", +# sustainability_data["Boiler Efficiency Group"], +# ) +# sustainability_data["Boiler Efficiency Group"] = np.where( +# sustainability_data["Boiler Efficiency Group"].isin(['E', 'F', 'G']), +# "E-G", +# sustainability_data["Boiler Efficiency Group"], +# ) # 4) Group up main fuel into gas, electric, oil, other? sustainability_data["Main Fuel Group"] = sustainability_data["Main Fuel"].copy() -sustainability_data["Main Fuel Group"] = np.where( - sustainability_data["Main Fuel Group"].isin( - ["SmokelessCoal", "BiomassCommunity", "B30DCommunity"] - ), - "Other Fuel", - sustainability_data["Main Fuel Group"], -) +# sustainability_data["Main Fuel Group"] = np.where( +# sustainability_data["Main Fuel Group"].isin( +# ["SmokelessCoal", "BiomassCommunity", "B30DCommunity"] +# ), +# "Other Fuel", +# sustainability_data["Main Fuel Group"], +# ) # 5) Wall Construction - group up Sandstone and Granite into one category -sustainability_data["Wall Construction"] = np.where( - sustainability_data["Wall Construction"].isin(["Sandstone", "Granite"]), - "Sandstone/Granite", - sustainability_data["Wall Construction"] -) +# sustainability_data["Wall Construction"] = np.where( +# sustainability_data["Wall Construction"].isin(["Sandstone", "Granite"]), +# "Sandstone/Granite", +# sustainability_data["Wall Construction"] +# ) -sustainability_data["Wall Construction"] = np.where( - sustainability_data["Wall Construction"].isin(["Timber Frame", "System", "Solid Brick"]), - "Solid", - sustainability_data["Wall Construction"] -) +# sustainability_data["Wall Construction"] = np.where( +# sustainability_data["Wall Construction"].isin(["Timber Frame", "System"]), +# "Timber/System", +# sustainability_data["Wall Construction"] +# ) # 6) Reduce or remove floor construction -sustainability_data["Floor Construction"] = np.where( - sustainability_data["Floor Construction"].isin(["SuspendedTimber", "SuspendedNotTimber"]), - "Suspended Floor", - sustainability_data["Floor Construction"] -) +# sustainability_data["Floor Construction"] = np.where( +# sustainability_data["Floor Construction"].isin(["SuspendedTimber", "SuspendedNotTimber"]), +# "Suspended Floor", +# sustainability_data["Floor Construction"] +# ) # 7) Reduce wall insulation -sustainability_data["Wall Insulation"] = np.where( - sustainability_data["Wall Insulation"].isin( - ["FilledCavityPlusInternal", "FilledCavityPlusExternal", "FilledCavity", "External", "Internal"] - ), - "Insulated", - sustainability_data["Wall Insulation"] -) +# sustainability_data["Wall Insulation"] = np.where( +# sustainability_data["Wall Insulation"].isin( +# ["FilledCavityPlusInternal", "FilledCavityPlusExternal", "FilledCavity", "External", "Internal"] +# ), +# "Insulated", +# sustainability_data["Wall Insulation"] +# ) # 8) Fill floor insulation sustainability_data["Floor Insulation"] = sustainability_data["Floor Insulation"].fillna("Unknown") # 9) Reduce Age bands -sustainability_data["Construction Years"] = np.where( - sustainability_data["Construction Years"].isin(["2003-2006", "2007-2011", "2012 onwards"]), - "2003 onwards", - sustainability_data["Construction Years"], -) +# sustainability_data["Construction Years"] = np.where( +# sustainability_data["Construction Years"].isin(["2003-2006", "2007-2011", "2012 onwards"]), +# "2003 onwards", +# sustainability_data["Construction Years"], +# ) -sustainability_data["Construction Years"] = np.where( - sustainability_data["Construction Years"].isin(["Before 1900", "1900-1929"]), - "Before 1929", - sustainability_data["Construction Years"], -) +# sustainability_data["Construction Years"] = np.where( +# sustainability_data["Construction Years"].isin(["Before 1900", "1900-1929"]), +# "Before 1929", +# sustainability_data["Construction Years"], +# ) -sustainability_data["Construction Years"] = np.where( - sustainability_data["Construction Years"].isin(["1983-1990", "1991-1995"]), - "1983-1995", - sustainability_data["Construction Years"], -) +# sustainability_data["Construction Years"] = np.where( +# sustainability_data["Construction Years"].isin(["1983-1990", "1991-1995"]), +# "1983-1995", +# sustainability_data["Construction Years"], +# ) -sustainability_data["Construction Years"] = np.where( - sustainability_data["Construction Years"].isin(["1950-1966", "1967-1975", "1976-1982"]), - "1950-1982", - sustainability_data["Construction Years"], -) +# sustainability_data["Construction Years"] = np.where( +# sustainability_data["Construction Years"].isin(["1950-1966", "1967-1975", "1976-1982"]), +# "1950-1982", +# sustainability_data["Construction Years"], +# ) # Roof -sustainability_data["Roof Construction"] = np.where( - sustainability_data["Roof Construction"].isin( - ["PitchedNormalLoftAccess", "PitchedThatched", "PitchedNormalNoLoftAccess", "PitchedWithSlopingCeiling"] - ), - "Pitched Roof", - sustainability_data["Roof Construction"] -) +# sustainability_data["Roof Construction"] = np.where( +# sustainability_data["Roof Construction"].isin( +# ["PitchedNormalLoftAccess", "PitchedThatched", "PitchedNormalNoLoftAccess", "PitchedWithSlopingCeiling"] +# ), +# "Pitched Roof", +# sustainability_data["Roof Construction"] +# ) archetype_variables = [ "Type", "Attachment", "Construction Years", "Wall Construction", "Wall Insulation", @@ -210,7 +210,7 @@ archetypes = sustainability_data[archetype_variables + ["UPRN"]].dropna().groupb archetypes["Cumulative Count"] = archetypes["Count"].cumsum() archetypes["Cumulative Proportion"] = archetypes["Cumulative Count"] / archetypes["Count"].sum() -archetypes_95 = archetypes[archetypes["Cumulative Proportion"] <= 0.97] +archetypes_95 = archetypes.copy() archetypes_95["Archetypes_95_reference"] = archetypes_95.index + 1 archetypes_95["Archetypes_95_reference"] = "Archetype_Sample_" + archetypes_95["Archetypes_95_reference"].astype(str) @@ -290,6 +290,7 @@ modelling_sample["domna_full_address"] = modelling_sample.apply(lambda x: make_f # Save this CSV as input modelling_sample.to_excel( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/modelling_sample.xlsx", + sheet_name="Standardised Asset List" ) # Save the archetype definitions archetypes_95.to_excel( diff --git a/serverless.yml b/serverless.yml index 7e4d78b6..d2d8f50a 100644 --- a/serverless.yml +++ b/serverless.yml @@ -48,7 +48,7 @@ functions: fastapi-backend: handler: backend.app.main.handler - timeout: 60 + timeout: 600 memorySize: 512 role: FastApiLambdaRole events: From bdc4c213ad1925093e4744153a1d2254cd9fede0 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 5 Dec 2025 09:40:24 +0000 Subject: [PATCH 107/202] working on cleaning epc data for old records --- backend/Property.py | 11 ++++--- backend/app/assumptions.py | 9 ++++++ backend/app/db/models/recommendations.py | 11 +++++++ backend/engine/engine.py | 39 ++++++++++++++++++++++-- 4 files changed, 62 insertions(+), 8 deletions(-) diff --git a/backend/Property.py b/backend/Property.py index dd92a902..50fc865e 100644 --- a/backend/Property.py +++ b/backend/Property.py @@ -727,11 +727,12 @@ class Property: self.energy_cost_estimates = { "unadjusted": unadjusted_heating_costs, - "epc": { - "heating": float(self.data["heating-cost-current"]), - "hot_water": float(self.data["hot-water-cost-current"]), - "lighting": float(self.data["lighting-cost-current"]), - } + # Don't think we need the EPC + # "epc": { + # "heating": float(self.data["heating-cost-current"]), + # "hot_water": float(self.data["hot-water-cost-current"]), + # "lighting": float(self.data["lighting-cost-current"]), + # } } self.energy_consumption_estimates = { diff --git a/backend/app/assumptions.py b/backend/app/assumptions.py index 31acbe29..898f586b 100644 --- a/backend/app/assumptions.py +++ b/backend/app/assumptions.py @@ -101,3 +101,12 @@ measures_needing_ventilation = [ # If we have a property beyond this size, we assume it's likely large enough to have an ASHP ASHP_FLOOR_AREA_THRESHOLD = 120 # m2 + +# Is a placeholder, used for cleaning data. Is a flat average based on the estimated +AVERAGE_LIGHTING_COST = 100 + +# Average bill, based on british gas is #1,838.71. Subtract 100 for lighting, 228 for hot water. This will include +# appliances so appliances should be removed when this is used +AVERAGE_HEATING_AND_APPLIANCE_COST = 1510.71 +# Based on https://energysavingtrust.org.uk/sites/default/files/reports/AtHomewithWater%287%29.pdf +AVERAGE_HOT_WATER_COST = 228 diff --git a/backend/app/db/models/recommendations.py b/backend/app/db/models/recommendations.py index 2b7bf7c7..4c02268d 100644 --- a/backend/app/db/models/recommendations.py +++ b/backend/app/db/models/recommendations.py @@ -3,6 +3,7 @@ from sqlalchemy.orm import declarative_base from sqlalchemy.sql import func from backend.app.db.models.portfolio import Portfolio, PropertyModel from backend.app.db.models.materials import Material +from backend.app.db.models.portfolio import Epc from datatypes.enums import QuantityUnits import enum @@ -78,6 +79,16 @@ class Plan(Base): ), nullable=True, ) + post_sap_points = Column(Float) + post_epc_rating = Column(Enum(Epc)) + post_co2_emissions = Column(Float) + co2_savings = Column(Float) + post_energy_bill = Column(Float) + energy_bill_savings = Column(Float) + post_energy_consumption = Column(Float) # energy demand in kWh/year + energy_consumption_savings = Column(Float) + valuation_post_retrofit = Column(Float) + valuation_increase = Column(Float) class PlanRecommendations(Base): diff --git a/backend/engine/engine.py b/backend/engine/engine.py index 215adfe4..f92da01a 100644 --- a/backend/engine/engine.py +++ b/backend/engine/engine.py @@ -1,4 +1,3 @@ -import os import time import json from copy import deepcopy @@ -16,6 +15,7 @@ from etl.epc.Record import EPCRecord from sqlalchemy.exc import IntegrityError, OperationalError from sqlalchemy.orm import sessionmaker from starlette.responses import Response +from backend.ml_models.AnnualBillSavings import AnnualBillSavings from backend.app.config import get_settings, get_prediction_buckets from backend.app.db.connection import db_engine @@ -415,8 +415,17 @@ def averages_cleaning(prepared_epc: EPCRecord, cleaning_data: pd.DataFrame): :return: """ - if not pd.isnull(prepared_epc.prepared_epc["number_habitable_rooms"]) and not pd.isnull( - prepared_epc.prepared_epc["number_heated_rooms"]) and not pd.isnull(prepared_epc.prepared_epc["floor_height"]): + variables_to_clean = [ + "number_habitable_rooms", + "number_heated_rooms", + "floor_height", + "lighting_cost_current", + "heating_cost_current", + "hot_water_cost_current", + "energy_consumption_potential", + ] + + if not any([pd.isnull(prepared_epc.prepared_epc[k]) for k in variables_to_clean]): # Nothing to do return prepared_epc @@ -461,6 +470,30 @@ def averages_cleaning(prepared_epc: EPCRecord, cleaning_data: pd.DataFrame): prepared_epc.prepared_epc["floor_height"] = clean_floor_height prepared_epc.floor_height = clean_floor_height + if pd.isnull(prepared_epc.lighting_cost_current): + # This is a basic assumption as an average + prepared_epc.prepared_epc["lighting_cost_current"] = assumptions.AVERAGE_LIGHTING_COST + prepared_epc.lighting_cost_current = assumptions.AVERAGE_LIGHTING_COST + + if pd.isnull(prepared_epc.heating_cost_current): + # This is a basic assumption as an average + appliance_cost = AnnualBillSavings.estimate_appliances_energy_use( + total_floor_area=prepared_epc.total_floor_area + ) * AnnualBillSavings.ELECTRICITY_PRICE_CAP + heating_cleaned_value = assumptions.AVERAGE_HEATING_AND_APPLIANCE_COST - appliance_cost + prepared_epc.prepared_epc["heating_cost_current"] = heating_cleaned_value + prepared_epc.heating_cost_current = heating_cleaned_value + + if pd.isnull(prepared_epc.hot_water_cost_current): + # This is a basic assumption as an average + prepared_epc.prepared_epc["hot_water_cost_current"] = assumptions.AVERAGE_HOT_WATER_COST + prepared_epc.hot_water_cost_current = assumptions.AVERAGE_HOT_WATER_COST + + if pd.isnull(prepared_epc.energy_consumption_potential): + # Set to current + prepared_epc.prepared_epc["energy_consumption_potential"] = prepared_epc.energy_consumption_current + prepared_epc.energy_consumption_potential = prepared_epc.energy_consumption_current + return prepared_epc From c10bf032dc9936084efd3000d6ed173a9581353a Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 5 Dec 2025 14:59:47 +0000 Subject: [PATCH 108/202] debugging fuel code --- backend/Property.py | 6 ++ backend/SearchEpc.py | 61 +++++++++++++--- .../db/functions/recommendations_functions.py | 62 ++++++++++++++++ backend/engine/engine.py | 71 ++++++++----------- 4 files changed, 147 insertions(+), 53 deletions(-) diff --git a/backend/Property.py b/backend/Property.py index 50fc865e..6328ac8c 100644 --- a/backend/Property.py +++ b/backend/Property.py @@ -1220,6 +1220,12 @@ class Property: else: self.heating_energy_source = ['Electricity'] + if set(self.heating_energy_source) == {'Electricity', 'LPG'}: + if self.main_fuel["clean_description"] in ["Lpg not community", "Lpg community"]: + self.heating_energy_source = ['LPG'] + else: + self.heating_energy_source = ['Electricity'] + if set(self.heating_energy_source) == {'Natural Gas', 'Wood Logs'}: # It means they have mixed heating so we take the primary one, based on main fuel # This will probably happen in the case of an extension diff --git a/backend/SearchEpc.py b/backend/SearchEpc.py index 5ceac5f9..a193a65f 100644 --- a/backend/SearchEpc.py +++ b/backend/SearchEpc.py @@ -144,6 +144,11 @@ class SearchEpc: "error": None } + # Keys that we check for missing values to determine if the EPC is incomplete + CHECK_MISSING_KEYS = [ + "lighting-cost-current", "heating-cost-current", "hot-water-cost-current", "energy-consumption-potential" + ] + def __init__( self, address1: str, @@ -217,6 +222,9 @@ class SearchEpc: # By default, this is set to false. This flag indicates whether we should overwrite SAP 2005 entires. self.overwrite_sap05 = False + # Be default, this is set to false. This flag indicates whether we should take the existing EPC, but use + # the estimated EPC to clean missings + self.clean_missing_on_expired = False def set_strict_property_type_search(self): """ @@ -988,20 +996,40 @@ class SearchEpc: ) = self.extract_epc_data(address=self.full_address) # Before we return, we check if we need to overwrite a SAP05 EPC - # If we have don't have SAP05 in the heating description and overwrite_sap05 is False, we return - is_sap_o5 = "SAP05:" in self.newest_epc.get("mainheat-description", "") - good_data = not is_sap_o5 and (response["status"] == 200) + # ---- SAP 05 overwriting logic ---- + is_sap_05 = "SAP05:" in self.newest_epc.get("mainheat-description", "") - if good_data or not overwrite_sap05: + needs_sap_05_overwrite = is_sap_05 and (response["status"] == 200) and overwrite_sap05 + + # ---- Cleaning expired EPC logic ---- + epc_is_expired = (pd.Timestamp.now() - pd.Timestamp( + self.newest_epc.get("lodgement-date", pd.Timestamp.now()))).days > 3650 + + epc_has_missing_key_data = any([self.newest_epc.get(k) in [None, ""] for k in self.CHECK_MISSING_KEYS]) + + epc_needs_cleaning = epc_is_expired and epc_has_missing_key_data + + # ---- We don't have an epc ---- + no_epc = response["status"] != 200 + + # If we don't have to overwrite SAP05, or we don't have missing data on an expired EPC, we return + if not needs_sap_05_overwrite and not epc_needs_cleaning and not no_epc: # If the data is fine, or we're preventing SAP05 overwrites, we just exit here return # By default, we don't exclude old but we will do, when we are estimating to overwrite a SAP05 EPC lmks_to_drop, exclude_old = [], False - if is_sap_o5: - self.overwrite_sap05 = True + if needs_sap_05_overwrite or epc_needs_cleaning: + self.overwrite_sap05 = needs_sap_05_overwrite + self.clean_missing_on_expired = epc_needs_cleaning lmks_to_drop = [self.newest_epc["lmk-key"]] exclude_old = True + self.heating_system = ( + self.newest_epc["mainheat-description"] if + self.clean_missing_on_expired and self.heating_system is None else self.heating_system + ) + self.ordnance_survey_client.property_type = self.newest_epc["property-type"] + self.ordnance_survey_client.built_form = self.newest_epc["built-form"] # Step 2: If we don't have an EPC, we use the ordnance survey api to find the uprn if skip_os: @@ -1016,13 +1044,24 @@ class SearchEpc: exclude_old=exclude_old ) + # If we have overwritten a SAP05 EPC, we need to update older_epcs too if self.overwrite_sap05: # We keep a record of the fact that we have performed a SAP05 overwrite estimated_epc["sap_05_overwritten"] = True + self.older_epcs = [self.newest_epc.copy()] + self.newest_epc = estimated_epc + elif self.clean_missing_on_expired: + # We perform the cleaning + for k in self.CHECK_MISSING_KEYS: + if self.newest_epc[k] in ["", None]: + self.newest_epc[k] = estimated_epc[k] + + self.newest_epc["estimated"] = True + self.older_epcs = [] + else: + self.older_epcs = [] + self.newest_epc = estimated_epc - # If we have overwritten a SAP05 EPC, we need to update older_epcs too - self.older_epcs = [] if not self.overwrite_sap05 else [self.newest_epc.copy()] - self.newest_epc = estimated_epc self.full_sap_epc = {} # Finally, set a standardised address 1 and postcode @@ -1077,7 +1116,9 @@ class SearchEpc: if not self.newest_epc: raise ValueError("No EPC data available to set UPRN source - run find_property first") - if self.newest_epc.get("estimated") and file_format == "domna_asset_list" and (self.newest_epc["uprn"] < 0): + if (self.newest_epc.get("estimated") and + (file_format == "domna_asset_list") and + (float(self.newest_epc["uprn"]) < 0)): self.newest_epc["uprn-source"] = self.UPRN_SOURCE_SIMULATED def check_attribute_variations(self): diff --git a/backend/app/db/functions/recommendations_functions.py b/backend/app/db/functions/recommendations_functions.py index 55bf5824..542dde93 100644 --- a/backend/app/db/functions/recommendations_functions.py +++ b/backend/app/db/functions/recommendations_functions.py @@ -11,6 +11,68 @@ from backend.app.db.models.funding import FundingPackageMeasures, FundingPackage from backend.app.db.models.inspections import InspectionModel +def prepare_plan_data( + p, body, scenario_id, eco_packages, valuations, new_sap_points, new_epc, default_recommendations +): + """ + Utility function to prepare the data that goes into the production of a plan. Is a fairly rough and unstructured + function that will need improving in the future + :param p: Instantiated property + :param body: request body, PlanTriggerRequest + :param scenario_id: unique identifier for the scenario + :param eco_packages: Pre-constructed eco packages for a property + :param valuations: valuation improvement data + :param new_sap_points: sap points, post default recommendations + :param new_epc: new epc rating, post default recommendations + :param default_recommendations: list of default recommendations for a property + :return: + """ + # Plan carbon savings + co2_savings = sum([r["co2_equivalent_savings"] for r in default_recommendations]) + post_co2_emissions = p.data["co2-emissions-current"] - co2_savings + + # Plan bill savings + energy_bill_savings = sum([r["energy_cost_savings"] for r in default_recommendations]) + post_energy_bill = sum(p.current_energy_bill.values()) - energy_bill_savings + + # energy consumption + energy_consumption_savings = sum([r["kwh_savings"] for r in default_recommendations]) + post_energy_consumption = p.current_energy_consumption - energy_consumption_savings + + valuation_post_retrofit, valuation_increase = None, None + if valuations["current_value"]: + valuation_increase = valuations["average_increase"] + valuation_post_retrofit = valuations["average_increased_value"] + + return { + "portfolio_id": body.portfolio_id, + "property_id": p.id, + "scenario_id": scenario_id, + "is_default": True if p.is_new else False, + "name": body.scenario_name, + "valuation_increase_lower_bound": ( + valuations["lower_bound_increased_value"] - valuations["current_value"] + ), + "valuation_increase_upper_bound": ( + valuations["upper_bound_increased_value"] - valuations["current_value"] + ), + "valuation_increase_average": ( + valuations["average_increased_value"] - valuations["current_value"] + ), + "post_sap_points": new_sap_points, + "post_epc_rating": new_epc, + "post_co2_emissions": post_co2_emissions, + "co2_savings": co2_savings, + "post_energy_bill": post_energy_bill, + "energy_bill_savings": energy_bill_savings, + "post_energy_consumption": post_energy_consumption, + "energy_consumption_savings": energy_consumption_savings, + "valuation_post_retrofit": valuation_post_retrofit, + "valuation_increase": valuation_increase, + "plan_type": eco_packages.get(p.id, (None, None, None))[2] + } + + def create_plan(session: Session, plan): """ This function will create a record for the plan in the database if it does not exist. diff --git a/backend/engine/engine.py b/backend/engine/engine.py index f92da01a..2427ca8a 100644 --- a/backend/engine/engine.py +++ b/backend/engine/engine.py @@ -419,10 +419,6 @@ def averages_cleaning(prepared_epc: EPCRecord, cleaning_data: pd.DataFrame): "number_habitable_rooms", "number_heated_rooms", "floor_height", - "lighting_cost_current", - "heating_cost_current", - "hot_water_cost_current", - "energy_consumption_potential", ] if not any([pd.isnull(prepared_epc.prepared_epc[k]) for k in variables_to_clean]): @@ -470,29 +466,29 @@ def averages_cleaning(prepared_epc: EPCRecord, cleaning_data: pd.DataFrame): prepared_epc.prepared_epc["floor_height"] = clean_floor_height prepared_epc.floor_height = clean_floor_height - if pd.isnull(prepared_epc.lighting_cost_current): - # This is a basic assumption as an average - prepared_epc.prepared_epc["lighting_cost_current"] = assumptions.AVERAGE_LIGHTING_COST - prepared_epc.lighting_cost_current = assumptions.AVERAGE_LIGHTING_COST + # if pd.isnull(prepared_epc.lighting_cost_current): + # # This is a basic assumption as an average + # prepared_epc.prepared_epc["lighting_cost_current"] = assumptions.AVERAGE_LIGHTING_COST + # prepared_epc.lighting_cost_current = assumptions.AVERAGE_LIGHTING_COST - if pd.isnull(prepared_epc.heating_cost_current): - # This is a basic assumption as an average - appliance_cost = AnnualBillSavings.estimate_appliances_energy_use( - total_floor_area=prepared_epc.total_floor_area - ) * AnnualBillSavings.ELECTRICITY_PRICE_CAP - heating_cleaned_value = assumptions.AVERAGE_HEATING_AND_APPLIANCE_COST - appliance_cost - prepared_epc.prepared_epc["heating_cost_current"] = heating_cleaned_value - prepared_epc.heating_cost_current = heating_cleaned_value - - if pd.isnull(prepared_epc.hot_water_cost_current): - # This is a basic assumption as an average - prepared_epc.prepared_epc["hot_water_cost_current"] = assumptions.AVERAGE_HOT_WATER_COST - prepared_epc.hot_water_cost_current = assumptions.AVERAGE_HOT_WATER_COST - - if pd.isnull(prepared_epc.energy_consumption_potential): - # Set to current - prepared_epc.prepared_epc["energy_consumption_potential"] = prepared_epc.energy_consumption_current - prepared_epc.energy_consumption_potential = prepared_epc.energy_consumption_current + # if pd.isnull(prepared_epc.heating_cost_current): + # # This is a basic assumption as an average + # appliance_cost = AnnualBillSavings.estimate_appliances_energy_use( + # total_floor_area=prepared_epc.total_floor_area + # ) * AnnualBillSavings.ELECTRICITY_PRICE_CAP + # heating_cleaned_value = assumptions.AVERAGE_HEATING_AND_APPLIANCE_COST - appliance_cost + # prepared_epc.prepared_epc["heating_cost_current"] = heating_cleaned_value + # prepared_epc.heating_cost_current = heating_cleaned_value + # + # if pd.isnull(prepared_epc.hot_water_cost_current): + # # This is a basic assumption as an average + # prepared_epc.prepared_epc["hot_water_cost_current"] = assumptions.AVERAGE_HOT_WATER_COST + # prepared_epc.hot_water_cost_current = assumptions.AVERAGE_HOT_WATER_COST + # + # if pd.isnull(prepared_epc.energy_consumption_potential): + # # Set to current + # prepared_epc.prepared_epc["energy_consumption_potential"] = prepared_epc.energy_consumption_current + # prepared_epc.energy_consumption_potential = prepared_epc.energy_consumption_current return prepared_epc @@ -1281,6 +1277,10 @@ async def model_engine(body: PlanTriggerRequest): ) property_value_increase_ranges[p.id] = valuations + property_plan_data = db_funcs.recommendations_functions.prepare_plan_data( + p, body, scenario_id, eco_packages, valuations, new_sap_points, new_epc, default_recommendations + ) + # TODO - this is not right, especially if the existing run failed if p.is_new: property_details_epc = p.get_property_details_epc( @@ -1300,23 +1300,8 @@ async def model_engine(body: PlanTriggerRequest): if not recommendations_to_upload: continue - new_plan_id = db_funcs.recommendations_functions.create_plan(session, { - "portfolio_id": body.portfolio_id, - "property_id": p.id, - "scenario_id": scenario_id, - "is_default": True if p.is_new else False, - "name": body.scenario_name, - "valuation_increase_lower_bound": ( - valuations["lower_bound_increased_value"] - valuations["current_value"] - ), - "valuation_increase_upper_bound": ( - valuations["upper_bound_increased_value"] - valuations["current_value"] - ), - "valuation_increase_average": ( - valuations["average_increased_value"] - valuations["current_value"] - ), - "plan_type": eco_packages.get(p.id, (None, None, None))[2] - }) + + new_plan_id = db_funcs.recommendations_functions.create_plan(session, plan=property_plan_data) db_funcs.recommendations_functions.upload_recommendations( session, recommendations_to_upload, p.id, new_plan_id From 4e55f4cba3ce9678665a047d837b04a6b6952278 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 5 Dec 2025 17:38:34 +0000 Subject: [PATCH 109/202] handling find my epc missing recommendation --- etl/find_my_epc/RetrieveFindMyEpc.py | 1 + 1 file changed, 1 insertion(+) diff --git a/etl/find_my_epc/RetrieveFindMyEpc.py b/etl/find_my_epc/RetrieveFindMyEpc.py index e0370725..eb330948 100644 --- a/etl/find_my_epc/RetrieveFindMyEpc.py +++ b/etl/find_my_epc/RetrieveFindMyEpc.py @@ -744,6 +744,7 @@ class RetrieveFindMyEpc: "Solar photovoltaics panels, 25% of roof area": ["solar_pv"], 'Air or ground source heat pump': ["air_source_heat_pump"], "Add PV Battery": ["solar_pv_battery"], + "Add PV diverter": ["solar_pv_diverter"], # Don't have a recommendation yet } survey = True From c20b26881de19f7781dc33b77b296450f6b1c12a Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 5 Dec 2025 18:55:27 +0000 Subject: [PATCH 110/202] fixed stupid bug converting db data to float --- .../app/db/functions/recommendations_functions.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/backend/app/db/functions/recommendations_functions.py b/backend/app/db/functions/recommendations_functions.py index 542dde93..dfdd153e 100644 --- a/backend/app/db/functions/recommendations_functions.py +++ b/backend/app/db/functions/recommendations_functions.py @@ -59,14 +59,14 @@ def prepare_plan_data( "valuation_increase_average": ( valuations["average_increased_value"] - valuations["current_value"] ), - "post_sap_points": new_sap_points, + "post_sap_points": float(new_sap_points), "post_epc_rating": new_epc, - "post_co2_emissions": post_co2_emissions, - "co2_savings": co2_savings, - "post_energy_bill": post_energy_bill, - "energy_bill_savings": energy_bill_savings, - "post_energy_consumption": post_energy_consumption, - "energy_consumption_savings": energy_consumption_savings, + "post_co2_emissions": float(post_co2_emissions), + "co2_savings": float(co2_savings), + "post_energy_bill": float(post_energy_bill), + "energy_bill_savings": float(energy_bill_savings), + "post_energy_consumption": float(post_energy_consumption), + "energy_consumption_savings": float(energy_consumption_savings), "valuation_post_retrofit": valuation_post_retrofit, "valuation_increase": valuation_increase, "plan_type": eco_packages.get(p.id, (None, None, None))[2] From d4a45a5ccebb0fead68364bdf9cfad2a6a566fae Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 8 Dec 2025 18:45:46 +0000 Subject: [PATCH 111/202] added additional fields to db and fixed flat roof recommendations bug --- backend/Property.py | 24 ++++++- .../db/functions/recommendations_functions.py | 6 ++ backend/app/db/models/portfolio.py | 4 ++ backend/app/db/models/recommendations.py | 3 + recommendations/recommendation_utils.py | 66 +++++++++++-------- 5 files changed, 74 insertions(+), 29 deletions(-) diff --git a/backend/Property.py b/backend/Property.py index 6328ac8c..a9a1ac1b 100644 --- a/backend/Property.py +++ b/backend/Property.py @@ -833,9 +833,29 @@ class Property: if self.current_energy_bill is None: raise ValueError("Current energy bill has not been set") + # IF we have a SAP05 overwrite, we pull out the relevant information + sap_05_overwritten = self.data.get("sap_05_overwritten", False) + + sap_05_score, sap_05_epc_rating = None, None + if sap_05_overwritten: + if not self.old_data: + # Trying to fetch SAP05 EPC but no data + raise ValueError("Trying to fetch SAP05 EPC but no old data available") + # We get the last rating from the old data + newest_old_epc = max(self.old_data, key=lambda d: pd.to_datetime(d["lodgement-date"])) + # Get the rating and score + sap_05_score = int(newest_old_epc["current-energy-efficiency"]) + sap_05_epc_rating = newest_old_epc["current-energy-rating"] + + lodgement_date = self.data["lodgement-date"] + # We check if the lodgement date is more than 10 years old + is_expired = (datetime.now() - pd.to_datetime(lodgement_date)) > timedelta(days=3650) + property_details_epc = { "property_id": self.id, "portfolio_id": portfolio_id, + "lodgement_date": datetime.fromisoformat(lodgement_date), + "is_expired": is_expired, "full_address": self.data["address"], "total_floor_area": float(self.data["total-floor-area"]), "walls": self.walls["clean_description"], @@ -891,7 +911,9 @@ class Property: "current_energy_demand_heating_hotwater": self.current_energy_consumption_heating_hotwater, "estimated": self.data.get("estimated", False), # We indicate if we've overwritten a SAP 05 EPC - "sap_05_overwritten": self.data.get("sap_05_overwritten", False), + "sap_05_overwritten": sap_05_overwritten, + "sap_05_score": sap_05_score, + "sap_05_epc_rating": sap_05_epc_rating, **self.current_energy_bill } diff --git a/backend/app/db/functions/recommendations_functions.py b/backend/app/db/functions/recommendations_functions.py index dfdd153e..f7b5f5eb 100644 --- a/backend/app/db/functions/recommendations_functions.py +++ b/backend/app/db/functions/recommendations_functions.py @@ -44,6 +44,10 @@ def prepare_plan_data( valuation_increase = valuations["average_increase"] valuation_post_retrofit = valuations["average_increased_value"] + # plan costing data + cost_of_works = sum([r["total"] for r in default_recommendations]) + contingency_cost = sum([r["contingency"] for r in default_recommendations]) + return { "portfolio_id": body.portfolio_id, "property_id": p.id, @@ -69,6 +73,8 @@ def prepare_plan_data( "energy_consumption_savings": float(energy_consumption_savings), "valuation_post_retrofit": valuation_post_retrofit, "valuation_increase": valuation_increase, + "cost_of_works": cost_of_works, + "contingency_cost": contingency_cost, "plan_type": eco_packages.get(p.id, (None, None, None))[2] } diff --git a/backend/app/db/models/portfolio.py b/backend/app/db/models/portfolio.py index 7fec8c14..ea9f9976 100644 --- a/backend/app/db/models/portfolio.py +++ b/backend/app/db/models/portfolio.py @@ -137,6 +137,8 @@ class PropertyDetailsEpcModel(Base): property_id = Column(Integer, ForeignKey('property.id'), nullable=False) portfolio_id = Column(Integer, ForeignKey('portfolio.id'), nullable=False) full_address = Column(Text) + lodgement_date = Column(DateTime) + is_expired = Column(Boolean) total_floor_area = Column(Float) walls = Column(Text) walls_rating = Column(Integer, CheckConstraint('walls_rating>=1 AND walls_rating<=5')) @@ -176,6 +178,8 @@ class PropertyDetailsEpcModel(Base): current_energy_demand_heating_hotwater = Column(Float) estimated = Column(Boolean, default=False) sap_05_overwritten = Column(Boolean, default=False) + sap_05_score = Column(Integer) + sap_05_epc_rating = Column(Enum(Epc)) # Include estimates for energy bills, across the different types of energy heating_cost_current = Column(Float) hot_water_cost_current = Column(Float) diff --git a/backend/app/db/models/recommendations.py b/backend/app/db/models/recommendations.py index 4c02268d..800596ec 100644 --- a/backend/app/db/models/recommendations.py +++ b/backend/app/db/models/recommendations.py @@ -89,6 +89,9 @@ class Plan(Base): energy_consumption_savings = Column(Float) valuation_post_retrofit = Column(Float) valuation_increase = Column(Float) + # Financial metrics, excluding funding + cost_of_works = Column(Float) + contingency_cost = Column(Float) class PlanRecommendations(Base): diff --git a/recommendations/recommendation_utils.py b/recommendations/recommendation_utils.py index 6acc04f9..adbeecf5 100644 --- a/recommendations/recommendation_utils.py +++ b/recommendations/recommendation_utils.py @@ -239,42 +239,52 @@ def get_wall_u_value( return float(mapped_value) +def _try_convert_to_int(value): + try: + return int(value) + except (TypeError, ValueError): + return None + + def extract_thickness(thickness, is_roof_room, is_at_rafters, is_loft, is_flat): + thickness_map = { + "below average": "50", + "average": "100", + "above average": "150", + "none": "0", + } + + # Normalise none value early + if thickness is None: + thickness = "none" + if is_roof_room or is_at_rafters: - # TODO: We get None instead of a string none, this should be fixed - if thickness is None: - thickness = "none" + + int_thickness = _try_convert_to_int(thickness) + if int_thickness is not None: + return int_thickness # We re-map the thickness - thickness_map = { - "below average": "50", - "average": "100", - "above average": "150", - "none": "0", - } - thickness = thickness_map[thickness] + + thickness = thickness_map.get(thickness) + if thickness is None: + return None + + return int(thickness) if is_flat: - try: - thickness = int(thickness) - return thickness - except (TypeError, ValueError): - # If thickness is not a valid number (could be a string or None), return None - return None + return _try_convert_to_int(thickness) - if thickness in ["below average", "average", "above average", "none", None] or ( - not is_loft and not is_roof_room and not is_at_rafters + # Thicknes will never be none + if thickness in thickness_map or ( + not (is_loft or is_roof_room or is_at_rafters) ): return None - elif thickness.endswith("+"): - thickness = int(thickness[:-1]) - return thickness - else: - try: - thickness = int(thickness) - return thickness - except ValueError: - # If thickness is not a valid number (could be a string or None), return None - return None + + if isinstance(thickness, str) and str(thickness).endswith("+"): + return _try_convert_to_int(thickness[:-1]) + + # final attempt + return _try_convert_to_int(thickness) def get_u_value_from_s9( From 85b05563bae875366214817e054016ae824a5233 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 8 Dec 2025 19:52:59 +0000 Subject: [PATCH 112/202] allow optional contingency --- backend/app/db/functions/recommendations_functions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/app/db/functions/recommendations_functions.py b/backend/app/db/functions/recommendations_functions.py index f7b5f5eb..9235e34f 100644 --- a/backend/app/db/functions/recommendations_functions.py +++ b/backend/app/db/functions/recommendations_functions.py @@ -46,7 +46,7 @@ def prepare_plan_data( # plan costing data cost_of_works = sum([r["total"] for r in default_recommendations]) - contingency_cost = sum([r["contingency"] for r in default_recommendations]) + contingency_cost = sum([r.get("contingency", 0) for r in default_recommendations]) return { "portfolio_id": body.portfolio_id, From c7b3a6af9bc566a1a5431cf267e3084a3ff29eb1 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 8 Dec 2025 20:16:07 +0000 Subject: [PATCH 113/202] fixing typing issue --- backend/app/db/functions/recommendations_functions.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/app/db/functions/recommendations_functions.py b/backend/app/db/functions/recommendations_functions.py index 9235e34f..57ccf65c 100644 --- a/backend/app/db/functions/recommendations_functions.py +++ b/backend/app/db/functions/recommendations_functions.py @@ -73,8 +73,8 @@ def prepare_plan_data( "energy_consumption_savings": float(energy_consumption_savings), "valuation_post_retrofit": valuation_post_retrofit, "valuation_increase": valuation_increase, - "cost_of_works": cost_of_works, - "contingency_cost": contingency_cost, + "cost_of_works": float(cost_of_works), + "contingency_cost": float(contingency_cost), "plan_type": eco_packages.get(p.id, (None, None, None))[2] } From 0b026c0c4c3050b9898b4b8dc6912c14137a1c0d Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 8 Dec 2025 22:35:21 +0000 Subject: [PATCH 114/202] Handled windows edge case --- backend/engine/engine.py | 25 +++++++++++++++++++++++ recommendations/WindowsRecommendations.py | 2 +- 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/backend/engine/engine.py b/backend/engine/engine.py index 2427ca8a..8410024d 100644 --- a/backend/engine/engine.py +++ b/backend/engine/engine.py @@ -405,6 +405,29 @@ def check_duplicate_uprns(plan_input): return True +def check_duplicate_property_ids(input_properties): + """ + Simple function to check if the input data contains duplicated property IDs. This will happen in very rare + cases where we have properties across different servers, where the input UPRN is possibly incorrect and we + find the right property via an address search, instead of a UPRN search and so we end up with the same property + twice. + :param input_properties: + :return: + """ + + input_property_ids = [x.id for x in input_properties] + + if input_property_ids: + # Check for dupes + if len(input_property_ids) != len(set(input_property_ids)): + # Find the duplicate property IDs + duplicates = set([x for x in input_property_ids if input_property_ids.count(x) > 1]) + # de-dupe input_uprns + raise ValueError(f"Duplicate property IDs in the input data: {duplicates}") + + return True + + def averages_cleaning(prepared_epc: EPCRecord, cleaning_data: pd.DataFrame): """ Placeholder cleaning function to handle edge cases where we have missing data for @@ -780,6 +803,8 @@ async def model_engine(body: PlanTriggerRequest): if not input_properties: return Response(status_code=204) + check_duplicate_property_ids(input_properties) + # We check if we have inspections data and store it in the database if so. We'll update or create # aginst each property if if inspections_map: diff --git a/recommendations/WindowsRecommendations.py b/recommendations/WindowsRecommendations.py index bc5e6066..a5561d20 100644 --- a/recommendations/WindowsRecommendations.py +++ b/recommendations/WindowsRecommendations.py @@ -190,7 +190,7 @@ class WindowsRecommendations: raise ValueError("Invalid glazing type - implement me") if self.property.data["windows-energy-eff"] == "Very Good": - raise ValueError("Very Good energy efficiency is not supported") + windows_energy_eff = "Very Good" # For post 2002 windows, the energy efficiency is "Good" and so for the simulation, we simulate with "Good" From 8ed1d3b9bd0f8d765415310a3229103be1e99c5f Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 9 Dec 2025 23:17:36 +0000 Subject: [PATCH 115/202] implemented simple scoring model for battery SAP improvement --- backend/app/BatterySapScorer.py | 29 +++++++++++++ etl/battery_model/train.py | 62 ++++++++++++++++++++++++++++ sfr/principal_pitch/2_export_data.py | 34 ++++++++++++--- 3 files changed, 119 insertions(+), 6 deletions(-) create mode 100644 backend/app/BatterySapScorer.py create mode 100644 etl/battery_model/train.py diff --git a/backend/app/BatterySapScorer.py b/backend/app/BatterySapScorer.py new file mode 100644 index 00000000..59462677 --- /dev/null +++ b/backend/app/BatterySapScorer.py @@ -0,0 +1,29 @@ +import numpy as np + + +class BatterySAPScorer: + """ + Lightweight production scorer — no sklearn dependency. + Uses hard-coded coefficients discovered offline. The code for discovering the coefficients + can be found in etl/battery_model/train.py + """ + + INTERCEPT = 10.310168559226678 + COEF_STARTING_SAP = -0.16120648633993315 + COEF_PV_SIZE = 1.0500492005420736 + + @classmethod + def score(cls, starting_sap, pv_size): + """ + heating_system: string used to infer is_electric + """ + + sap_uplift = ( + cls.INTERCEPT + + cls.COEF_STARTING_SAP * starting_sap + + cls.COEF_PV_SIZE * pv_size + ) + + # Round + clamp to [0,5] + sap_uplift = int(np.round(np.clip(sap_uplift, 0, 5))) + return sap_uplift diff --git a/etl/battery_model/train.py b/etl/battery_model/train.py new file mode 100644 index 00000000..086f68cb --- /dev/null +++ b/etl/battery_model/train.py @@ -0,0 +1,62 @@ +import pandas as pd +from sklearn.linear_model import Ridge + + +class SAPUpliftTrainer: + """ + Offline training class — discovers SAP uplift model coefficients. + """ + + def __init__(self, alpha=1.0): + self.alpha = alpha + self.model = Ridge(alpha=self.alpha) + self.feature_names = ["starting SAP", "PV Array size"] + + def prepare_data(self, df): + df = df.copy() + # df["is_electric"] = df["heating"].str.contains( + # "Electric", case=False, na=False + # ).astype(int) + X = df[self.feature_names] + y = df["SAP points"] + return X, y + + def fit(self, df): + X, y = self.prepare_data(df) + self.model.fit(X, y) + + def coefficients(self): + return { + "intercept": float(self.model.intercept_), + **{ + name: float(coef) + for name, coef in zip(self.feature_names, self.model.coef_) + } + } + + def export_runtime_config(self): + """ + Returns a dict suitable for copy-pasting into the runtime scoring class. + """ + coefs = self.coefficients() + return { + "intercept": coefs["intercept"], + "coef_starting_sap": coefs["starting SAP"], + "coef_pv_size": coefs["PV Array size"], + # "coef_is_electric": coefs["is_electric"], + } + + +# The training data can be found in the Domna sharepoint in Product Development > Solar Battery Recommendations +df = pd.read_csv("/Users/khalimconn-kowlessar/Downloads/SAP Movement data(Sheet1).csv") + +trainer = SAPUpliftTrainer(alpha=1.0) +trainer.fit(df) + +print(trainer.coefficients()) +print(trainer.export_runtime_config()) + +# Last updated: 9th December 2025 +# Coefficients: +# {'intercept': 10.310168559226678, 'starting SAP': -0.16120648633993315, 'PV Array size': 1.0500492005420736} +# The code for scoring with this model can be found in backend/app/BatterySapScorer.py diff --git a/sfr/principal_pitch/2_export_data.py b/sfr/principal_pitch/2_export_data.py index 79238273..f6618f22 100644 --- a/sfr/principal_pitch/2_export_data.py +++ b/sfr/principal_pitch/2_export_data.py @@ -11,8 +11,8 @@ from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcMod # PORTFOLIO_ID = 206 # SCENARIOS = [389] -PORTFOLIO_ID = 221 -SCENARIOS = [427] +PORTFOLIO_ID = 388 +SCENARIOS = [803] def get_data(portfolio_id, scenario_ids): @@ -95,6 +95,18 @@ post_install_sap = post_install_sap[post_install_sap["default"]] # Sum up the sap points by property id post_install_sap = post_install_sap.groupby("property_id")[["sap_points"]].sum().reset_index() +# Find dupes by property id and measure type +dupes = recommended_measures_df.duplicated( + subset=["property_id", "measure_type"], keep=False +) +dupe_df = recommended_measures_df[dupes] + +if dupe_df.shape: + # Drop dupes - happened due to a funny bug + recommended_measures_df = recommended_measures_df.drop_duplicates( + subset=["property_id", "measure_type"], keep='first' + ) + recommendations_measures_pivot = recommended_measures_df.pivot( index='property_id', columns='measure_type', @@ -131,10 +143,19 @@ from utils.s3 import read_csv_from_s3, read_excel_from_s3 # asset_list = read_csv_from_s3(bucket_name="retrofit-plan-inputs-dev", filepath='8/206/asset_list.csv') asset_list = read_excel_from_s3( - bucket_name="retrofit-plan-inputs-dev", file_key='8/221/20250722T202328736Z/asset_list.xlsx', - header_row=0, sheet_name="320 - edited" + bucket_name="retrofit-plan-inputs-dev", file_key='2/388/20251208T203603925Z/asset_list.xlsx', + header_row=0, sheet_name="Standardised Asset List" ) asset_list = pd.DataFrame(asset_list) +asset_list = asset_list.rename( + columns={ + "postcode": "domna_postcode" + } +) +if "domna_full_address": + # For Peabody + asset_list["domna_full_address"] = asset_list["domna_address_1"] + asset_list = asset_list[["domna_full_address", "domna_postcode", "epc_os_uprn", ]].copy() asset_list = asset_list.rename(columns={"epc_os_uprn": "uprn"}) df["uprn"] = df["uprn"].astype(str) @@ -179,9 +200,10 @@ asset_list = asset_list.merge( on="uprn" ) -# For exporting NCHA +# For exporting asset_list.to_excel( - "/Users/khalimconn-kowlessar/Documents/hestia/Customers/NCHA/320 Portfolio/asset_list_epc_b.xlsx", + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting " + "Project/20251209_sample_package_data.xlsx", index=False ) From 8745dffd0a7a8b50194404a6697efbb8fb9f1578 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 10 Dec 2025 12:33:22 +0000 Subject: [PATCH 116/202] Added solar recommendations - needs some testing --- backend/Property.py | 16 +++++++-- backend/app/BatterySapScorer.py | 1 + backend/engine/engine.py | 19 +++++++---- etl/epc/DataProcessor.py | 4 +-- etl/epc/Dataset.py | 4 ++- recommendations/WindowsRecommendations.py | 4 +++ .../optimiser/funding_optimiser.py | 22 +++++++++++++ .../optimiser/optimiser_functions.py | 33 +++++++++++++------ 8 files changed, 81 insertions(+), 22 deletions(-) diff --git a/backend/Property.py b/backend/Property.py index a9a1ac1b..cbcb9aa3 100644 --- a/backend/Property.py +++ b/backend/Property.py @@ -301,9 +301,18 @@ class Property: if k in fixed_data_col_names } - difference_record = self.epc_record.create_EPCDifferenceRecord( - self.epc_record, fixed_data - ) + difference_record = self.epc_record.create_EPCDifferenceRecord(self.epc_record, fixed_data) + + # We have rare cases where entire description columns are missing. EpcRecords will convert this to None. + # Due to the sensitivity of the EPCDifferenceRecord creation to missing data, we will fill in these missing + # descriptions with and empty string, for the purpose of creating this scoring record + description_cols = [ + x for x in difference_record.difference_record if + "_description" in x and difference_record.difference_record[x] is None + ] + if description_cols: + for col in description_cols: + difference_record.difference_record[col] = "" self.base_difference_record = TrainingDataset(datasets=[difference_record], cleaned_lookup=cleaned_lookup) @@ -1228,6 +1237,7 @@ class Property: "biomass": "Smokeless Fuel", "electricity": "Electricity", "biogas": "Smokeless Fuel", + "heat network": "Natural Gas (Community Scheme)", } self.heating_energy_source = list({ diff --git a/backend/app/BatterySapScorer.py b/backend/app/BatterySapScorer.py index 59462677..f5e485c4 100644 --- a/backend/app/BatterySapScorer.py +++ b/backend/app/BatterySapScorer.py @@ -6,6 +6,7 @@ class BatterySAPScorer: Lightweight production scorer — no sklearn dependency. Uses hard-coded coefficients discovered offline. The code for discovering the coefficients can be found in etl/battery_model/train.py + We're only concerned with SAP, as we already have a method for carbon and bill savings. """ INTERCEPT = 10.310168559226678 diff --git a/backend/engine/engine.py b/backend/engine/engine.py index 8410024d..b04cb2f5 100644 --- a/backend/engine/engine.py +++ b/backend/engine/engine.py @@ -15,7 +15,7 @@ from etl.epc.Record import EPCRecord from sqlalchemy.exc import IntegrityError, OperationalError from sqlalchemy.orm import sessionmaker from starlette.responses import Response -from backend.ml_models.AnnualBillSavings import AnnualBillSavings +from backend.app.BatterySapScorer import BatterySAPScorer from backend.app.config import get_settings, get_prediction_buckets from backend.app.db.connection import db_engine @@ -1100,11 +1100,10 @@ async def model_engine(body: PlanTriggerRequest): scheme = "none" funded_measures, solution = [], [] ( - project_funding, total_uplift, full_project_score, partial_project_score, uplift_project_score - ) = 0, 0, 0, 0, 0 + project_funding, total_uplift, full_project_score, partial_project_score, uplift_project_score, + battery_sap_score + ) = 0, 0, 0, 0, 0, 0 else: - - # If the solution isn't eligible, we can't really consider it solutions = solutions[ (solutions["is_eligible"] & (solutions["scheme"] != "none")) | (solutions["scheme"] == "none") ] @@ -1136,6 +1135,8 @@ async def model_engine(body: PlanTriggerRequest): partial_project_score = optimal_solution["partial_project_score"] # This is the uplift score ABS uplift_project_score = optimal_solution["total_uplift_score"] + # This is the SAP score associated to a battery + battery_sap_score = optimal_solution["battery_sap_uplift"] else: # We optimise and then we determine eligibility for funding, based on the measures selected optimiser = ( @@ -1146,6 +1147,8 @@ async def model_engine(body: PlanTriggerRequest): optimiser.setup() optimiser.solve() solution = optimiser.solution + gain = optimiser.solution_gain + post_sap = int(p.data["current-energy-efficiency"]) + gain recommendation_types = [] for measures in input_measures: @@ -1193,6 +1196,10 @@ async def model_engine(body: PlanTriggerRequest): full_project_score = 0 if funding.full_project_abs is not None else funding.full_project_abs partial_project_score = funding.partial_project_abs uplift_project_score = funding.eco4_uplift if scheme == "eco4" else funding.gbis_uplift + pv_size = next( + (m["array_size"] for m in solution if m["type"] == "solar_pv"), 0 + ) + battery_sap_score = BatterySAPScorer.score(starting_sap=post_sap, pv_size=pv_size) selected = {r["id"] for r in solution} @@ -1206,7 +1213,7 @@ async def model_engine(body: PlanTriggerRequest): selected = optimiser_functions.add_best_practice_measures(p.id, solution, recommendations, selected) # Final flattening recommendations[p.id] = optimiser_functions.flatten_recommendations_with_defaults( - p.id, recommendations, selected + p.id, recommendations, selected, battery_sap_score ) # TODO: functionise diff --git a/etl/epc/DataProcessor.py b/etl/epc/DataProcessor.py index 5e5d0872..da83eb05 100644 --- a/etl/epc/DataProcessor.py +++ b/etl/epc/DataProcessor.py @@ -388,7 +388,7 @@ class EPCDataProcessor: has_missings = pd.isnull(self.data[col]).sum() while has_missings: self.data = apply_clean( - data=self.data, matching_columns=matching_columns[0 : to_index + 1] + data=self.data, matching_columns=matching_columns[0: to_index + 1] ) has_missings = pd.isnull(self.data[col]).sum() @@ -705,7 +705,7 @@ class EPCDataProcessor: [ violation_uprn_missing, violation_old_lodgment_date, - violation_invalid_transaction_type, + # violation_invalid_transaction_type, violation_ignored_floor_level, violation_rdsap_score_above_max, violation_missing_windows_description, diff --git a/etl/epc/Dataset.py b/etl/epc/Dataset.py index 2e5646ac..8fa3e13c 100644 --- a/etl/epc/Dataset.py +++ b/etl/epc/Dataset.py @@ -840,7 +840,9 @@ class TrainingDataset(BaseDataset): if len(missings) == 0: return - # Make sure they are all efficiency columns + # + + # Make sure they are all efficiency columns if any(~missings.index.str.contains("energy_eff")): raise ValueError("Non efficiency columns are missing") diff --git a/recommendations/WindowsRecommendations.py b/recommendations/WindowsRecommendations.py index a5561d20..8bdab5d1 100644 --- a/recommendations/WindowsRecommendations.py +++ b/recommendations/WindowsRecommendations.py @@ -52,6 +52,10 @@ class WindowsRecommendations: # We don't make any recommendations in this case. The property already has outstanding glazing return + # We handle the rare case of not having any windows data + if self.property.windows["clean_description"] is None: + return + if self.property.windows["has_glazing"] & ( self.property.windows["glazing_coverage"] == "full" ): diff --git a/recommendations/optimiser/funding_optimiser.py b/recommendations/optimiser/funding_optimiser.py index 925a818f..0aa69f39 100644 --- a/recommendations/optimiser/funding_optimiser.py +++ b/recommendations/optimiser/funding_optimiser.py @@ -18,6 +18,7 @@ from recommendations.optimiser.CostOptimiser import CostOptimiser from recommendations.optimiser.GainOptimiser import GainOptimiser from utils.logger import setup_logger from backend.Funding import Funding +from backend.app.BatterySapScorer import BatterySAPScorer logger = setup_logger() @@ -239,6 +240,10 @@ def _move_hhrsh_to_unfunded(picked, unfunded_picked, needs_pre_eco_hhrsh_upgrade return picked, unfunded_picked +def has_battery(items): + return any(x.get("has_battery", False) for x in items) + + def optimise_with_funding_paths( p, input_measures, housing_type, funding: Funding, budget=None, target_gain=None, work_package=None ): @@ -519,6 +524,23 @@ def optimise_with_funding_paths( solutions["starting_sap"] = int(p.data["current-energy-efficiency"]) solutions["floor_area"] = p.floor_area solutions["ending_sap"] = solutions["starting_sap"] + solutions["total_gain"] + # We flag projects that are including batteries + solutions["has_battery"] = solutions["items"].apply(has_battery) + solutions["array_size"] = solutions["items"].apply( + lambda x: sum(float(y["array_size"]) for y in x if "array_size" in y) + ) + + # For properties that are including batteries, we need to adjust the starting SAP to include the battery SAP uplift + # Note: We score on ending sap, as the battery SAP uplift is based on the ending SAP after fabric/heat/solar + # upgrades of each package is applied + solutions["battery_sap_uplift"] = solutions.apply( + lambda x: BatterySAPScorer.score(starting_sap=x["ending_sap"], pv_size=x["array_size"]) + if x["has_battery"] else 0, + axis=1 + ) + # We add this on to ending SAP + solutions["ending_sap"] = solutions["ending_sap"] + solutions["battery_sap_uplift"] + solutions["starting_band"] = (solutions["starting_sap"] + solutions["already_installed_gain"]).apply( funding.get_sap_band ) diff --git a/recommendations/optimiser/optimiser_functions.py b/recommendations/optimiser/optimiser_functions.py index 4a8c96da..d7705456 100644 --- a/recommendations/optimiser/optimiser_functions.py +++ b/recommendations/optimiser/optimiser_functions.py @@ -75,8 +75,8 @@ def prepare_input_measures( continue # Filter out solar PV with batteries - if recs[0]["type"] == "solar_pv": - recs = [r for r in recs if ~r["has_battery"]] + # if recs[0]["type"] == "solar_pv": + # recs = [r for r in recs if ~r["has_battery"]] # Only include measures with non-negative cost savings if eco_measures: @@ -123,6 +123,14 @@ def prepare_input_measures( else rec["measure_type"] ) + array_size = 0 + if rec["measure_type"] == "solar_pv": + # Grab the parts + solar_part = next( + (part for part in rec["parts"] if part["type"] == "solar_pv"), + ) + array_size = solar_part["size"] + # We also include the innovation uplift to_append.append( { @@ -136,6 +144,8 @@ def prepare_input_measures( "partial_project_score": rec["partial_project_score"], "uplift_project_score": rec["uplift_project_score"], "already_installed": rec.get("already_installed", False), + "has_battery": rec.get("has_battery", False), + "array_size": array_size, } ) @@ -331,7 +341,7 @@ def add_best_practice_measures(property_id, solution, recommendations, selected) return selected -def flatten_recommendations_with_defaults(property_id, recommendations, selected): +def flatten_recommendations_with_defaults(property_id, recommendations, selected, battery_sap_score=0): """ Flattens nested recommendation lists for a property and marks which recommendations were selected. @@ -349,6 +359,8 @@ def flatten_recommendations_with_defaults(property_id, recommendations, selected Each value is a list of lists (grouped by measure type). selected : set Set of selected recommendation IDs. + battery_sap_score: int, optional + SAP score uplift from battery storage, if applicable. Returns ------- @@ -356,13 +368,14 @@ def flatten_recommendations_with_defaults(property_id, recommendations, selected A flattened list of recommendation dicts for the given property, each with an added `default` field. """ - final_recommendations = [ - [ - {**rec, "default": rec["recommendation_id"] in selected} - for rec in recommendations_by_type - ] - for recommendations_by_type in recommendations[property_id] - ] + + final_recommendations = [] + for recommendations_by_type in recommendations[property_id]: + for rec in recommendations_by_type: + rec_copy = {**rec, "default": rec["recommendation_id"] in selected} + if rec_copy.get("has_battery", False): + rec_copy["sap_points"] += battery_sap_score + final_recommendations.append(rec_copy) # Flatten the nested list of lists into a single list return [rec for recommendations_by_type in final_recommendations for rec in recommendations_by_type] From f43b3edd01ea96884082fd07c4a14933cd4d4b6e Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 10 Dec 2025 17:56:53 +0000 Subject: [PATCH 117/202] commiting missing stuff --- backend/app/BatterySapScorer.py | 4 +-- backend/engine/engine.py | 7 ++++- etl/customers/lincs_rural/prepare_data.py | 26 +++++++++++++++++++ .../optimiser/funding_optimiser.py | 7 ++--- .../optimiser/optimiser_functions.py | 5 +++- 5 files changed, 42 insertions(+), 7 deletions(-) create mode 100644 etl/customers/lincs_rural/prepare_data.py diff --git a/backend/app/BatterySapScorer.py b/backend/app/BatterySapScorer.py index f5e485c4..923c5498 100644 --- a/backend/app/BatterySapScorer.py +++ b/backend/app/BatterySapScorer.py @@ -25,6 +25,6 @@ class BatterySAPScorer: + cls.COEF_PV_SIZE * pv_size ) - # Round + clamp to [0,5] - sap_uplift = int(np.round(np.clip(sap_uplift, 0, 5))) + # Round + clamp to [1,5] - there are only a small number of cases with 0 points + sap_uplift = int(np.round(np.clip(sap_uplift, 1, 5))) return sap_uplift diff --git a/backend/engine/engine.py b/backend/engine/engine.py index b04cb2f5..1f2b3976 100644 --- a/backend/engine/engine.py +++ b/backend/engine/engine.py @@ -1136,7 +1136,12 @@ async def model_engine(body: PlanTriggerRequest): # This is the uplift score ABS uplift_project_score = optimal_solution["total_uplift_score"] # This is the SAP score associated to a battery - battery_sap_score = optimal_solution["battery_sap_uplift"] + pv_size = next( + (m["array_size"] for m in optimal_solution["items"] if m["type"] == "solar_pv"), 0 + ) + battery_sap_score = BatterySAPScorer.score( + starting_sap=optimal_solution["ending_sap"], pv_size=pv_size + ) else: # We optimise and then we determine eligibility for funding, based on the measures selected optimiser = ( diff --git a/etl/customers/lincs_rural/prepare_data.py b/etl/customers/lincs_rural/prepare_data.py new file mode 100644 index 00000000..db7a9087 --- /dev/null +++ b/etl/customers/lincs_rural/prepare_data.py @@ -0,0 +1,26 @@ +""" +Rough script to prepare the data for Lincs Rural project +""" +import pandas as pd +from etl.find_my_epc.RetrieveFindMyEpc import RetrieveFindMyEpc + +data = pd.read_excel( + "/Users/khalimconn-kowlessar/Downloads/MASTER LIST EPCS UPDATED November 2025 Domna Homes.xlsx", + sheet_name="PROPERTY EPC RATINGS" +) + +# We have property RRNs - we need UPRN + +for _, x in data.iterrows(): + rrn = x["EPC Ref."] + + # Fetch from find my epc + retriever = RetrieveFindMyEpc( + address="", + postcode="", + rrn=rrn, + address_postal_town="", + sap_rating=x["Actual"] + ) + + find_epc_data = retriever.retrieve_all_find_my_epc_data() diff --git a/recommendations/optimiser/funding_optimiser.py b/recommendations/optimiser/funding_optimiser.py index 0aa69f39..a8b998ae 100644 --- a/recommendations/optimiser/funding_optimiser.py +++ b/recommendations/optimiser/funding_optimiser.py @@ -533,18 +533,19 @@ def optimise_with_funding_paths( # For properties that are including batteries, we need to adjust the starting SAP to include the battery SAP uplift # Note: We score on ending sap, as the battery SAP uplift is based on the ending SAP after fabric/heat/solar # upgrades of each package is applied + # NB: The battery SAP uplift is used to potentially prioritise packages that include batteries, it does NOT impact + # the eventual SAP score at this point. Once the package is included, we'll re-calculate battery SAP score outside + # of this. This is because solutions["battery_sap_uplift"] = solutions.apply( lambda x: BatterySAPScorer.score(starting_sap=x["ending_sap"], pv_size=x["array_size"]) if x["has_battery"] else 0, axis=1 ) - # We add this on to ending SAP - solutions["ending_sap"] = solutions["ending_sap"] + solutions["battery_sap_uplift"] solutions["starting_band"] = (solutions["starting_sap"] + solutions["already_installed_gain"]).apply( funding.get_sap_band ) - solutions["ending_band"] = solutions["ending_sap"].apply(funding.get_sap_band) + solutions["ending_band"] = (solutions["ending_sap"] + solutions["battery_sap_uplift"]).apply(funding.get_sap_band) solutions["floor_area_band"] = solutions["floor_area"].apply(funding.get_floor_area_band) solutions["project_score"] = solutions.apply( lambda x: funding._calculate_full_project_abs( diff --git a/recommendations/optimiser/optimiser_functions.py b/recommendations/optimiser/optimiser_functions.py index d7705456..0eec35dc 100644 --- a/recommendations/optimiser/optimiser_functions.py +++ b/recommendations/optimiser/optimiser_functions.py @@ -371,11 +371,14 @@ def flatten_recommendations_with_defaults(property_id, recommendations, selected final_recommendations = [] for recommendations_by_type in recommendations[property_id]: + recs_by_type = [] for rec in recommendations_by_type: rec_copy = {**rec, "default": rec["recommendation_id"] in selected} if rec_copy.get("has_battery", False): rec_copy["sap_points"] += battery_sap_score - final_recommendations.append(rec_copy) + recs_by_type.append(rec_copy) + + final_recommendations.append(recs_by_type) # Flatten the nested list of lists into a single list return [rec for recommendations_by_type in final_recommendations for rec in recommendations_by_type] From 110cb8070ce78823d2bd9edcca5d5d95222a9da4 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 10 Dec 2025 18:42:25 +0000 Subject: [PATCH 118/202] [Cincreased concurrency of backend --- etl/customers/lincs_rural/prepare_data.py | 71 ++++++++++++++++--- .../data_cleanse.py | 6 ++ etl/find_my_epc/RetrieveFindMyEpc.py | 13 ++-- serverless.yml | 2 +- 4 files changed, 74 insertions(+), 18 deletions(-) create mode 100644 etl/customers/peabody/Nov 2025 Consulting Project/data_cleanse.py diff --git a/etl/customers/lincs_rural/prepare_data.py b/etl/customers/lincs_rural/prepare_data.py index db7a9087..675179a8 100644 --- a/etl/customers/lincs_rural/prepare_data.py +++ b/etl/customers/lincs_rural/prepare_data.py @@ -1,8 +1,15 @@ """ Rough script to prepare the data for Lincs Rural project """ +from tqdm import tqdm import pandas as pd +import os +from dotenv import load_dotenv from etl.find_my_epc.RetrieveFindMyEpc import RetrieveFindMyEpc +from backend.SearchEpc import SearchEpc + +load_dotenv(dotenv_path="backend/.env") +EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN") data = pd.read_excel( "/Users/khalimconn-kowlessar/Downloads/MASTER LIST EPCS UPDATED November 2025 Domna Homes.xlsx", @@ -11,16 +18,58 @@ data = pd.read_excel( # We have property RRNs - we need UPRN -for _, x in data.iterrows(): - rrn = x["EPC Ref."] +standardised_ara_list = [] +missed = [] +for _, x in tqdm(data.iterrows(), total=len(data)): + try: + rrn = x["EPC Ref."] - # Fetch from find my epc - retriever = RetrieveFindMyEpc( - address="", - postcode="", - rrn=rrn, - address_postal_town="", - sap_rating=x["Actual"] - ) + # Fetch from find my epc + retriever = RetrieveFindMyEpc( + address="", + postcode="", + rrn=rrn, + address_postal_town="", + ) - find_epc_data = retriever.retrieve_all_find_my_epc_data() + find_epc_data = retriever.retrieve_newest_find_my_epc_data(rrn=rrn) + + # Find the UPRN + epc_searcher = SearchEpc( + address1=str(find_epc_data["address1"]), + postcode=str(find_epc_data["postcode"]), + auth_token=EPC_AUTH_TOKEN, + os_api_key="", + property_type=None, + fast=False, + full_address=",".join([find_epc_data["address1"], find_epc_data["address2"]]), + max_retries=5, + ) + epc_searcher.find_property(skip_os=True) + + # Append in format we need + # Stuff we need: + standardised_ara_list.append( + { + "landlord_property_id": x["Property Ref."], + "landlord_property_type": epc_searcher.newest_epc.get("property-type"), + "landlord_built_form": epc_searcher.newest_epc.get("built-form"), + "landlord_heating_system": epc_searcher.newest_epc.get("mainheat-description", ""), + "epc_os_uprn": epc_searcher.newest_epc.get("uprn"), + "domna_property_id": x["Property Ref."], + "domna_full_address": epc_searcher.newest_epc.get( + "address", ", ".join([ + find_epc_data["address1"], + find_epc_data["address2"], + ]) + ), + } + ) + except Exception as e: + missed.append({ + "property_ref": x["Property Ref."], + "rrn": x["EPC Ref."], + "error": str(e) + }) + +missed_df = pd.DataFrame(missed) diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/data_cleanse.py b/etl/customers/peabody/Nov 2025 Consulting Project/data_cleanse.py new file mode 100644 index 00000000..a1be533d --- /dev/null +++ b/etl/customers/peabody/Nov 2025 Consulting Project/data_cleanse.py @@ -0,0 +1,6 @@ +""" +We have found, within the Peabody data, a large volume of properties with missing and incorrects +UPRNS and incorrect address data. We want to flag these records and also find missings where we can + +We also have duplicate UPRNS that should be flagged +""" diff --git a/etl/find_my_epc/RetrieveFindMyEpc.py b/etl/find_my_epc/RetrieveFindMyEpc.py index eb330948..cf6659f9 100644 --- a/etl/find_my_epc/RetrieveFindMyEpc.py +++ b/etl/find_my_epc/RetrieveFindMyEpc.py @@ -465,12 +465,13 @@ class RetrieveFindMyEpc: potential_rating = ratings.split(".")[1] current_sap = int(current_rating.split(' ')[-1]) - if current_sap != self.sap_rating: - # This means we likely have the wrong data. If we are in this scenario, we return nothing - return { - "epc_certificate": None, - "page_source": None, - } + if self.sap_rating: + if current_sap != self.sap_rating: + # This means we likely have the wrong data. If we are in this scenario, we return nothing + return { + "epc_certificate": None, + "page_source": None, + } # Retrieve the energy consumption bills = address_res.find('div', {'id': 'bills-affected'}) diff --git a/serverless.yml b/serverless.yml index d2d8f50a..38d8da89 100644 --- a/serverless.yml +++ b/serverless.yml @@ -66,7 +66,7 @@ functions: - sqs: arn: arn:aws:sqs:${self:provider.region}:${aws:accountId}:model-engine-queue batchSize: 1 - maximumConcurrency: 5 # Heavily restricts concurrency to avoid overwhelming the ldmbda limits + maximumConcurrency: 10 # Heavily restricts concurrency to avoid overwhelming the ldmbda limits resources: From 8f7e9e0bdece3a0073aff017d32ebcfa3d6050a1 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 10 Dec 2025 19:17:19 +0000 Subject: [PATCH 119/202] simplified fuel code --- asset_list/app.py | 34 +++++++++++++++++++++++ backend/Property.py | 24 +++------------- backend/SearchEpc.py | 2 +- etl/customers/lincs_rural/prepare_data.py | 16 +++++++++++ 4 files changed, 55 insertions(+), 21 deletions(-) diff --git a/asset_list/app.py b/asset_list/app.py index cbb2cd93..dfd7aa46 100644 --- a/asset_list/app.py +++ b/asset_list/app.py @@ -59,6 +59,40 @@ def app(): Property UPRN """ + # Lambeth: + data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Lambeth/December 10th" + data_filename = "lambeth_sw2_leigham court estate.xlsx" + sheet_name = "Sheet1" + postcode_column = 'Postcode' + address1_column = "Address" + address1_method = None + fulladdress_column = None + address_cols_to_concat = ["Address"] + missing_postcodes_method = None + landlord_year_built = None + landlord_os_uprn = None + landlord_property_type = None + landlord_built_form = None + landlord_wall_construction = None + landlord_roof_construction = None + landlord_heating_system = None + landlord_existing_pv = None + landlord_property_id = "row_id" + landlord_sap = None + outcomes_filename = None + outcomes_sheetname = None + outcomes_postcode = None + outcomes_houseno = None + outcomes_id = None + outcomes_address = None + master_filepaths = [] + master_id_colnames = [] + master_to_asset_list_filepath = None + phase = False + ecosurv_landlords = None + asset_list_header = 0 + landlord_block_reference = None + # Maps addresses to uprn in problematic cases manual_uprn_map = {} diff --git a/backend/Property.py b/backend/Property.py index cbcb9aa3..31991702 100644 --- a/backend/Property.py +++ b/backend/Property.py @@ -1416,30 +1416,14 @@ class Property: if not self.is_ashp_valid(measures=["air_source_heat_pump"]): return self.current_energy_consumption - # If the property currently has an electric boiler, it will still benefit from the ASHP efficiency gain - remap_fuel_sources = [ - "Natural Gas", "LPG", "Wood Logs", "Oil", "Electricity", "Coal", "Smokeless Fuel", - "Natural Gas + Solar Thermal", "Anthracite", "Wood Pellets", "LPG + Solar Thermal", - "Natural Gas (Community Scheme)" - ] - - heating_energy_source = self.heating_energy_source - hot_water_energy_source = self.hot_water_energy_source heating_consumption = self.energy_consumption_estimates["unadjusted"]["heating"] hotwater_consumption = self.energy_consumption_estimates["unadjusted"]["hot_water"] - if (heating_energy_source not in remap_fuel_sources) or ( - hot_water_energy_source not in remap_fuel_sources + ["Electricity + Solar Thermal"] - ): - raise NotImplementedError("Have not implemented estimating electrical consumption for this fuel type") + # Adjust the heating consumption to reflect the expected efficiency of an ASHP - broadly 3.0 COP + heating_consumption = heating_consumption / (assumed_ashp_efficiency / 100) - if heating_energy_source in remap_fuel_sources: - # Adjust the heating consumption to reflect the expected efficiency of an ASHP - heating_consumption = heating_consumption / (assumed_ashp_efficiency / 100) - - if hot_water_energy_source in remap_fuel_sources: - # Adjust the hot water consumption to reflect the expected efficiency of an ASHP - hotwater_consumption = hotwater_consumption / (assumed_ashp_efficiency / 100) + # Adjust the hot water consumption to reflect the expected efficiency of an ASHP + hotwater_consumption = hotwater_consumption / (assumed_ashp_efficiency / 100) electric_consumption = ( heating_consumption + diff --git a/backend/SearchEpc.py b/backend/SearchEpc.py index a193a65f..cb465239 100644 --- a/backend/SearchEpc.py +++ b/backend/SearchEpc.py @@ -563,7 +563,7 @@ class SearchEpc: uprn = hash(self.address1 + self.postcode) if self.fast: - return newest_epc, [], {}, "", "", None + return newest_epc, [], {}, "", "", None, "" # Retrieve postcode and address address_epc, postcode_epc, address_postal_town = self.format_address(newest_epc=newest_epc) diff --git a/etl/customers/lincs_rural/prepare_data.py b/etl/customers/lincs_rural/prepare_data.py index 675179a8..0a3be7fe 100644 --- a/etl/customers/lincs_rural/prepare_data.py +++ b/etl/customers/lincs_rural/prepare_data.py @@ -52,6 +52,8 @@ for _, x in tqdm(data.iterrows(), total=len(data)): standardised_ara_list.append( { "landlord_property_id": x["Property Ref."], + "domna_address_1": find_epc_data["address1"], + "postcode": find_epc_data["postcode"], "landlord_property_type": epc_searcher.newest_epc.get("property-type"), "landlord_built_form": epc_searcher.newest_epc.get("built-form"), "landlord_heating_system": epc_searcher.newest_epc.get("mainheat-description", ""), @@ -73,3 +75,17 @@ for _, x in tqdm(data.iterrows(), total=len(data)): }) missed_df = pd.DataFrame(missed) + +# Store +standardised_ara_df = pd.DataFrame(standardised_ara_list) +standardised_ara_df.to_excel( + "/Users/khalimconn-kowlessar/Downloads/lincs_rural_standardised_ara_nov_2025.xlsx", + index=False, + sheet_name="Standardised Asset List" +) +# Store missed +missed_df.to_excel( + "/Users/khalimconn-kowlessar/Downloads/lincs_rural_missed_nov_2025.xlsx", + index=False, + sheet_name="Missed Properties" +) From 395ab0e083899403dad4978343936a4078315905 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sat, 13 Dec 2025 22:13:09 +0800 Subject: [PATCH 120/202] minor debugging --- asset_list/app.py | 87 +++++--- asset_list/mappings/built_form.py | 8 +- .../db/functions/recommendations_functions.py | 157 +++++++++++++-- backend/engine/engine.py | 4 +- backend/ml_models/AnnualBillSavings.py | 8 + backend/tests/test_integration.py | 188 ++++++++++-------- etl/customers/lincs_rural/get_missed.py | 47 +++++ .../{data_prep.py => a_data_prep.py} | 0 .../b_data_cleanse.py | 147 ++++++++++++++ .../c_finalised_modelling_data.py | 114 +++++++++++ .../data_cleanse.py | 6 - etl/epc/Dataset.py | 2 +- etl/webscrape/Zoopla.py | 152 +++++++------- sfr/principal_pitch/2_export_data.py | 19 +- 14 files changed, 722 insertions(+), 217 deletions(-) create mode 100644 etl/customers/lincs_rural/get_missed.py rename etl/customers/peabody/Nov 2025 Consulting Project/{data_prep.py => a_data_prep.py} (100%) create mode 100644 etl/customers/peabody/Nov 2025 Consulting Project/b_data_cleanse.py create mode 100644 etl/customers/peabody/Nov 2025 Consulting Project/c_finalised_modelling_data.py delete mode 100644 etl/customers/peabody/Nov 2025 Consulting Project/data_cleanse.py diff --git a/asset_list/app.py b/asset_list/app.py index dfd7aa46..3d8a0fae 100644 --- a/asset_list/app.py +++ b/asset_list/app.py @@ -59,25 +59,26 @@ def app(): Property UPRN """ - # Lambeth: - data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Lambeth/December 10th" - data_filename = "lambeth_sw2_leigham court estate.xlsx" + # Peabody data for cleaning + data_folder = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting " + "Project/data_validation") + data_filename = "to_standardise_uprns.xlsx" sheet_name = "Sheet1" postcode_column = 'Postcode' - address1_column = "Address" + address1_column = "Address 1" address1_method = None fulladdress_column = None - address_cols_to_concat = ["Address"] + address_cols_to_concat = ["Address 1", "Address 2", "Address 3"] missing_postcodes_method = None landlord_year_built = None landlord_os_uprn = None - landlord_property_type = None - landlord_built_form = None + landlord_property_type = "Type" + landlord_built_form = "Attachment" landlord_wall_construction = None landlord_roof_construction = None landlord_heating_system = None landlord_existing_pv = None - landlord_property_id = "row_id" + landlord_property_id = "Org Ref" landlord_sap = None outcomes_filename = None outcomes_sheetname = None @@ -93,6 +94,40 @@ def app(): asset_list_header = 0 landlord_block_reference = None + # Lambeth: + # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Lambeth/December 10th" + # data_filename = "lambeth_sw2_leigham court estate.xlsx" + # sheet_name = "Sheet1" + # postcode_column = 'Postcode' + # address1_column = "Address" + # address1_method = None + # fulladdress_column = None + # address_cols_to_concat = ["Address"] + # missing_postcodes_method = None + # landlord_year_built = None + # landlord_os_uprn = None + # landlord_property_type = None + # landlord_built_form = None + # landlord_wall_construction = None + # landlord_roof_construction = None + # landlord_heating_system = None + # landlord_existing_pv = None + # landlord_property_id = "row_id" + # landlord_sap = None + # outcomes_filename = None + # outcomes_sheetname = None + # outcomes_postcode = None + # outcomes_houseno = None + # outcomes_id = None + # outcomes_address = None + # master_filepaths = [] + # master_id_colnames = [] + # master_to_asset_list_filepath = None + # phase = False + # ecosurv_landlords = None + # asset_list_header = 0 + # landlord_block_reference = None + # Maps addresses to uprn in problematic cases manual_uprn_map = {} @@ -230,22 +265,22 @@ def app(): ) # We now retrieve any failed properties - chunk_failed = chunk[chunk[asset_list.DOMNA_PROPERTY_ID].isin(errors_chunk)] - epc_data_failed, _, _ = get_data( - df=chunk_failed, - row_id_name=asset_list.DOMNA_PROPERTY_ID, - uprn_column=AssetList.STANDARD_UPRN, - fulladdress_column=AssetList.STANDARD_FULL_ADDRESS, - address1_column=AssetList.STANDARD_ADDRESS_1, - postcode_column=AssetList.STANDARD_POSTCODE, - property_type_column=AssetList.STANDARD_PROPERTY_TYPE, - built_form_column=AssetList.STANDARD_BUILT_FORM, - manual_uprn_map=manual_uprn_map, - epc_api_only=epc_api_only, - epc_auth_token=EPC_AUTH_TOKEN - ) - - epc_data_chunk.extend(epc_data_failed) + # chunk_failed = chunk[chunk[asset_list.DOMNA_PROPERTY_ID].isin(errors_chunk)] + # epc_data_failed, _, _ = get_data( + # df=chunk_failed, + # row_id_name=asset_list.DOMNA_PROPERTY_ID, + # uprn_column=AssetList.STANDARD_UPRN, + # fulladdress_column=AssetList.STANDARD_FULL_ADDRESS, + # address1_column=AssetList.STANDARD_ADDRESS_1, + # postcode_column=AssetList.STANDARD_POSTCODE, + # property_type_column=AssetList.STANDARD_PROPERTY_TYPE, + # built_form_column=AssetList.STANDARD_BUILT_FORM, + # manual_uprn_map=manual_uprn_map, + # epc_api_only=epc_api_only, + # epc_auth_token=EPC_AUTH_TOKEN + # ) + # + # epc_data_chunk.extend(epc_data_failed) # Append the failed data to the main data # Store the chunk locally as a csv @@ -422,3 +457,7 @@ def app(): if not asset_list.geographical_areas.empty: asset_list.geographical_areas.to_excel(writer, sheet_name="Geographical Areas", index=False) + + # Store dupes + if not asset_list.duplicated_addresses.empty: + asset_list.duplicated_addresses.to_excel(writer, sheet_name="Duplicate Properties", index=False) diff --git a/asset_list/mappings/built_form.py b/asset_list/mappings/built_form.py index 2556d755..58686d6b 100644 --- a/asset_list/mappings/built_form.py +++ b/asset_list/mappings/built_form.py @@ -458,6 +458,12 @@ BUILT_FORM_MAPPINGS = { 'Maisonette: Detached: Mid Floor': 'detached', 'Bungalow: EnclosedMidTerrace': 'enclosed mid-terrace', - 'House: EnclosedMidTerrace': 'enclosed mid-terrace' + 'House: EnclosedMidTerrace': 'enclosed mid-terrace', + + 'EnclosedMidTerrace': 'enclosed mid-terrace', + 'EnclosedEndTerrace': 'enclosed end-terrace', + 'EndTerrace': 'end-terrace', + 'SemiDetached': 'semi-detached', + 'MidTerrace': 'mid-terrace' } diff --git a/backend/app/db/functions/recommendations_functions.py b/backend/app/db/functions/recommendations_functions.py index 57ccf65c..14596749 100644 --- a/backend/app/db/functions/recommendations_functions.py +++ b/backend/app/db/functions/recommendations_functions.py @@ -1,4 +1,4 @@ -from sqlalchemy import insert, delete, text +from sqlalchemy import insert, delete, select from sqlalchemy.orm import Session from sqlalchemy.exc import SQLAlchemyError from backend.app.db.models.recommendations import ( @@ -242,20 +242,26 @@ def chunked(iterable, size=100): yield iterable[i:i + size] +# def fast_delete_recommendations(session, chunk): +# placeholders = ",".join(["(:p{})".format(i) for i in range(len(chunk))]) +# params = {f"p{i}": chunk[i] for i in range(len(chunk))} +# +# sql = text(f""" +# WITH ids(property_id) AS ( +# VALUES {placeholders} +# ) +# DELETE FROM recommendation r +# USING ids +# WHERE r.property_id = ids.property_id; +# """) +# +# session.execute(sql, params, execution_options={"synchronize_session": False}) + def fast_delete_recommendations(session, chunk): - placeholders = ",".join(["(:p{})".format(i) for i in range(len(chunk))]) - params = {f"p{i}": chunk[i] for i in range(len(chunk))} - - sql = text(f""" - WITH ids(property_id) AS ( - VALUES {placeholders} - ) - DELETE FROM recommendation r - USING ids - WHERE r.property_id = ids.property_id; - """) - - session.execute(sql, params, execution_options={"synchronize_session": False}) + session.execute( + delete(Recommendation) + .where(Recommendation.property_id.in_(chunk)) + ) def clear_portfolio(session: Session, portfolio_id: int, batch_size=100): @@ -362,11 +368,19 @@ def clear_portfolio(session: Session, portfolio_id: int, batch_size=100): # -------------------------- # Recommendations (fast delete) # -------------------------- - rec_chunks = list(chunked(property_ids, batch_size)) + # rec_chunks = list(chunked(property_ids, batch_size * 5)) # larger chunks for fast delete + # total = len(rec_chunks) + # for i, chunk in enumerate(rec_chunks, start=1): + # print_progress("Deleting Recommendations", i, total) + # fast_delete_recommendations(session, chunk) + rec_chunks = list(chunked(recommendation_ids, batch_size)) total = len(rec_chunks) for i, chunk in enumerate(rec_chunks, start=1): print_progress("Deleting Recommendations", i, total) - fast_delete_recommendations(session, chunk) + session.execute( + delete(Recommendation) + .where(Recommendation.id.in_(chunk)) + ) # -------------------------- # Inspections @@ -412,3 +426,114 @@ def clear_portfolio(session: Session, portfolio_id: int, batch_size=100): session.commit() print("Portfolio cleared.") + + +def clear_portfolio_in_batches( + session: Session, + portfolio_id: int, + property_batch_size: int = 10 +): + # Fetch all property IDs once + property_ids = [ + pid for (pid,) in + session.query(PropertyModel.id) + .filter(PropertyModel.portfolio_id == portfolio_id) + .all() + ] + + def delete_for_property_batch(prop_ids): + # ---------------------------- + # Recommendations → PlanRecommendations + # ---------------------------- + rec_subq = ( + select(Recommendation.id) + .where(Recommendation.property_id.in_(prop_ids)) + ) + + session.execute( + delete(PlanRecommendations) + .where(PlanRecommendations.recommendation_id.in_(rec_subq)) + ) + + session.execute( + delete(RecommendationMaterials) + .where(RecommendationMaterials.recommendation_id.in_(rec_subq)) + ) + + session.execute( + delete(Recommendation) + .where(Recommendation.property_id.in_(prop_ids)) + ) + + # ---------------------------- + # Inspections + # ---------------------------- + session.execute( + delete(InspectionModel) + .where(InspectionModel.property_id.in_(prop_ids)) + ) + + # ---------------------------- + # Plans (scoped to these properties) + # ---------------------------- + plan_subq = ( + select(Plan.id) + .where(Plan.property_id.in_(prop_ids)) + ) + + session.execute( + delete(PlanRecommendations) + .where(PlanRecommendations.plan_id.in_(plan_subq)) + ) + + session.execute( + delete(FundingPackageMeasures) + .where( + FundingPackageMeasures.funding_package_id.in_( + select(FundingPackage.id) + .where(FundingPackage.plan_id.in_(plan_subq)) + ) + ) + ) + + session.execute( + delete(FundingPackage) + .where(FundingPackage.plan_id.in_(plan_subq)) + ) + + session.execute( + delete(Plan) + .where(Plan.id.in_(plan_subq)) + ) + + # ---------------------------- + # Property-scoped auxiliary tables + # ---------------------------- + session.execute( + delete(PropertyDetailsEpcModel) + .where(PropertyDetailsEpcModel.property_id.in_(prop_ids)) + ) + + session.execute( + delete(PropertyTargetsModel) + .where(PropertyTargetsModel.property_id.in_(prop_ids)) + ) + + # ---------------------------- + # Properties (last) + # ---------------------------- + session.execute( + delete(PropertyModel) + .where(PropertyModel.id.in_(prop_ids)) + ) + + # -------- BATCH DELETE LOOP -------- + property_chunks = list(chunked(property_ids, property_batch_size)) + total_batches = len(property_chunks) + + for i, prop_ids in enumerate(property_chunks, start=1): + print(f"Deleting batch {i}/{total_batches} ({len(prop_ids)} properties)") + delete_for_property_batch(prop_ids) + session.commit() + + print("Portfolio cleared in batches.") diff --git a/backend/engine/engine.py b/backend/engine/engine.py index 1f2b3976..967d6b16 100644 --- a/backend/engine/engine.py +++ b/backend/engine/engine.py @@ -662,7 +662,9 @@ async def model_engine(body: PlanTriggerRequest): address1 = config.get("domna_address_1", None) address1 = str(int(address1)) if isinstance(address1, float) else str(address1) - full_address = config.get("domna_full_address") if body.file_format == "domna_asset_list" else None + full_address = config.get("domna_full_address", "") if body.file_format == "domna_asset_list" else None + if not isinstance(full_address, str): # Catch for when the full address is nan + full_address = None heating_system = parse_heating_system(config) associated_uprns = [] diff --git a/backend/ml_models/AnnualBillSavings.py b/backend/ml_models/AnnualBillSavings.py index 27d1b5be..f04ee2f1 100644 --- a/backend/ml_models/AnnualBillSavings.py +++ b/backend/ml_models/AnnualBillSavings.py @@ -290,6 +290,14 @@ class AnnualBillSavings: # The solar thermal covers a % of the heating kwh, so we need to adjust the cost return (kwh / cop) * assumptions.SOLAR_CONSUMPTION_PROPORTION * cls.ELECTRICITY_PRICE_CAP + if fuel in ['Oil + Solar Thermal']: + # The solar thermal covers a % of the heating kwh, so we need to adjust the cost + price_data = cls.FUEL_DATA[cls.FUEL_DATA["Fuel"] == "Kerosene"].squeeze() + cost_per_kwh = cls.cost_per_kwh( + price_data["Price (p)"], price_data["Energy Content, Net Calorific value (kWh/unit)"] + ) + return (kwh / cop) * cost_per_kwh * assumptions.SOLAR_CONSUMPTION_PROPORTION + if fuel == "LPG + Solar Thermal": # The solar thermal covers a % of the heating kwh, so we need to adjust the cost price_data = cls.FUEL_DATA[cls.FUEL_DATA["Fuel"] == "LPG"].squeeze() diff --git a/backend/tests/test_integration.py b/backend/tests/test_integration.py index 45dd109a..cdc27abd 100644 --- a/backend/tests/test_integration.py +++ b/backend/tests/test_integration.py @@ -82,6 +82,12 @@ costs_by_floor_area = epc_data[ ][["TOTAL_FLOOR_AREA", "CURRENT_ENERGY_EFFICIENCY", "LIGHTING_COST_CURRENT", "HEATING_COST_CURRENT", "HOT_WATER_COST_CURRENT"]].copy() +epc_data = epc_data[ + (epc_data["MAINHEAT_DESCRIPTION"].str.contains("SAP05:") == False) & + (~epc_data["LIGHTING_COST_CURRENT"].isin([None, ""])) & + (~pd.isnull(epc_data["LIGHTING_COST_CURRENT"])) + ] + costs_by_floor_area.columns = [c.lower().replace("_", "-") for c in costs_by_floor_area.columns] for c in ["lighting-cost-current", "heating-cost-current", "hot-water-cost-current"]: costs_by_floor_area[c + "_scaled"] = costs_by_floor_area[c] / costs_by_floor_area["total-floor-area"] @@ -92,8 +98,8 @@ costs_by_floor_area = costs_by_floor_area.groupby("current-energy-efficiency")[ epc_data = epc_data[~pd.isnull(epc_data["UPRN"])] -sample_epc_data = epc_data[pd.to_datetime(epc_data["LODGEMENT_DATE"]) >= "2015-01-01"].drop_duplicates("UPRN").sample( - 10000).reset_index(drop=True) +sample_epc_data = epc_data[pd.to_datetime(epc_data["LODGEMENT_DATE"]) >= "2008-01-01"].drop_duplicates("UPRN").sample( + 50000).reset_index(drop=True) # TODO: In Property find_energy_sources, sort out biomass community heating - what fuel type # TODO: We might be able to remove find_energy_sources entirely and remove estimate_electrical_consumption. It's used @@ -163,6 +169,8 @@ mocked_kwh_predictions["heating_kwh_predictions"] = pd.DataFrame(mocked_kwh_pred mocked_kwh_predictions["hotwater_kwh_predictions"] = pd.DataFrame(mocked_kwh_predictions["hotwater_kwh_predictions"]) # TODO: We might want to implement this generally, via an ETL process +for x in cleaned["mainheat-description"]: + x["has_wood_chips"] = False for p in input_properties: for col in ["lighting-cost-current", "heating-cost-current", "hot-water-cost-current"]: if pd.isnull(p.data[col]): @@ -313,6 +321,10 @@ for p in tqdm(input_properties): if not recommendations.get(p.id): continue + # Temp allow to skip + if not isinstance(recommendations.get(p.id)[0], list): + continue + # we need to double unlist because we have a list of lists property_measure_types = {rec["type"] for recs in recommendations[p.id] for rec in recs} property_required_measures = [m for m in recommendations[p.id] if m[0]["type"] in body.required_measures] @@ -336,32 +348,32 @@ for p in tqdm(input_properties): ) gain = optimiser_functions.calculate_gain(body=body, p=p, fixed_gain=fixed_gain, eco_packages=eco_packages) - funding = Funding( - tenure=body.housing_type, - project_scores_matrix=project_scores_matrix, - partial_project_scores_matrix=partial_project_scores_matrix, - whlg_eligible_postcodes=whlg_eligible_postcodes, - eco4_social_cavity_abs_rate=13, - eco4_social_solid_abs_rate=17, - eco4_private_cavity_abs_rate=13, - eco4_private_solid_abs_rate=17, - gbis_social_cavity_abs_rate=21, - gbis_social_solid_abs_rate=25, - gbis_private_cavity_abs_rate=21, - gbis_private_solid_abs_rate=28, - ) - - li_thickness = convert_thickness_to_numeric( - p.roof["insulation_thickness"], p.roof["is_pitched"], p.roof["is_flat"] - ) - current_wall_u_value = p.walls["thermal_transmittance"] - if current_wall_u_value is None: - current_wall_u_value = get_wall_u_value( - clean_description=p.walls["clean_description"], - age_band=p.age_band, - is_granite_or_whinstone=p.walls["is_granite_or_whinstone"], - is_sandstone_or_limestone=p.walls["is_sandstone_or_limestone"], - ) + # funding = Funding( + # tenure=body.housing_type, + # project_scores_matrix=project_scores_matrix, + # partial_project_scores_matrix=partial_project_scores_matrix, + # whlg_eligible_postcodes=whlg_eligible_postcodes, + # eco4_social_cavity_abs_rate=13, + # eco4_social_solid_abs_rate=17, + # eco4_private_cavity_abs_rate=13, + # eco4_private_solid_abs_rate=17, + # gbis_social_cavity_abs_rate=21, + # gbis_social_solid_abs_rate=25, + # gbis_private_cavity_abs_rate=21, + # gbis_private_solid_abs_rate=28, + # ) + # + # li_thickness = convert_thickness_to_numeric( + # p.roof["insulation_thickness"], p.roof["is_pitched"], p.roof["is_flat"] + # ) + # current_wall_u_value = p.walls["thermal_transmittance"] + # if current_wall_u_value is None: + # current_wall_u_value = get_wall_u_value( + # clean_description=p.walls["clean_description"], + # age_band=p.age_band, + # is_granite_or_whinstone=p.walls["is_granite_or_whinstone"], + # is_sandstone_or_limestone=p.walls["is_sandstone_or_limestone"], + # ) # We insert the innovation uplift measures_to_optimise_with_uplift = deepcopy(measures_to_optimise) @@ -369,35 +381,39 @@ for p in tqdm(input_properties): # TODO: Turn this into a function and store the innovaiton uplift for group in measures_to_optimise_with_uplift: for r in group: - - if r["type"] in ["mechanical_ventilation", "low_energy_lighting", "secondary_heating", - "extension_cavity_wall_insulation", "draught_proofing", "sealing_open_fireplace"]: - ( - r["partial_project_score"], - r["partial_project_funding"], - r["innovation_uplift"], - r["uplift_project_score"], - ) = ( - 0, 0, 0, 0 - ) - continue - - ( - r["partial_project_score"], r["partial_project_funding"], r["innovation_uplift"], - r["uplift_project_score"] - ) = funding.get_innovation_uplift( - measure=r, - starting_sap=int(p.data["current-energy-efficiency"]), - floor_area=p.floor_area, - is_cavity=p.walls["is_cavity_wall"], - current_wall_uvalue=current_wall_u_value, - is_partial="partial" in p.walls["clean_description"].lower(), - existing_li_thickness=li_thickness, - mainheating=p.main_heating, - main_fuel=p.main_fuel, - mainheat_energy_eff=p.data["mainheat-energy-eff"], + (r["partial_project_score"], r["partial_project_funding"], r["innovation_uplift"], + r["uplift_project_score"]) = ( + 0, 0, 0, 0 ) + # if r["type"] in ["mechanical_ventilation", "low_energy_lighting", "secondary_heating", + # "extension_cavity_wall_insulation", "draught_proofing", "sealing_open_fireplace"]: + # ( + # r["partial_project_score"], + # r["partial_project_funding"], + # r["innovation_uplift"], + # r["uplift_project_score"], + # ) = ( + # 0, 0, 0, 0 + # ) + # continue + # + # ( + # r["partial_project_score"], r["partial_project_funding"], r["innovation_uplift"], + # r["uplift_project_score"] + # ) = funding.get_innovation_uplift( + # measure=r, + # starting_sap=int(p.data["current-energy-efficiency"]), + # floor_area=p.floor_area, + # is_cavity=p.walls["is_cavity_wall"], + # current_wall_uvalue=current_wall_u_value, + # is_partial="partial" in p.walls["clean_description"].lower(), + # existing_li_thickness=li_thickness, + # mainheating=p.main_heating, + # main_fuel=p.main_fuel, + # mainheat_energy_eff=p.data["mainheat-energy-eff"], + # ) + if r["already_installed"]: # if already installed, we zero out the uplift and funding (r["partial_project_score"], r["partial_project_funding"], r["innovation_uplift"], @@ -411,7 +427,7 @@ for p in tqdm(input_properties): ) # When the goal is Increasing EPC, we can run the funding optimiser - if body.goal == "Increasing EPC": + if body.goal == "Switch off": solutions = optimise_with_funding_paths( p=p, @@ -481,37 +497,43 @@ for p in tqdm(input_properties): ROOF_INSULATION_MEASURES ) - funding.check_funding( - measures=solution, - starting_sap=int(p.data["current-energy-efficiency"]), - ending_sap=int(p.data["current-energy-efficiency"]) + sum([x["gain"] for x in solution]), - floor_area=p.floor_area, - mainheat_description=p.main_heating["clean_description"], - heating_control_description=p.main_heating_controls["clean_description"], - is_cavity=p.walls["is_cavity_wall"], - current_wall_uvalue=current_wall_u_value, - is_partial="partial" in p.walls["clean_description"].lower(), - existing_li_thickness=li_thickness, - mainheating=p.main_heating, - main_fuel=p.main_fuel, - mainheat_energy_eff=p.data["mainheat-energy-eff"], - has_wall_insulation_recommendation=has_wall_insulation_recommendation, - has_roof_insulation_recommendation=has_roof_insulation_recommendation, - ) + # funding.check_funding( + # measures=solution, + # starting_sap=int(p.data["current-energy-efficiency"]), + # ending_sap=int(p.data["current-energy-efficiency"]) + sum([x["gain"] for x in solution]), + # floor_area=p.floor_area, + # mainheat_description=p.main_heating["clean_description"], + # heating_control_description=p.main_heating_controls["clean_description"], + # is_cavity=p.walls["is_cavity_wall"], + # current_wall_uvalue=current_wall_u_value, + # is_partial="partial" in p.walls["clean_description"].lower(), + # existing_li_thickness=li_thickness, + # mainheating=p.main_heating, + # main_fuel=p.main_fuel, + # mainheat_energy_eff=p.data["mainheat-energy-eff"], + # has_wall_insulation_recommendation=has_wall_insulation_recommendation, + # has_roof_insulation_recommendation=has_roof_insulation_recommendation, + # ) # Determine the scheme scheme = "none" - if funding.eco4_eligible: - scheme = "eco4" - if scheme == "none" and funding.gbis_eligible: - scheme = "gbis" + # if funding.eco4_eligible: + # scheme = "eco4" + # if scheme == "none" and funding.gbis_eligible: + # scheme = "gbis" - funded_measures = solution if scheme in ["gbis", "eco4"] else [] - project_funding = 0 if funding.full_project_abs is not None else funding.full_project_abs - total_uplift = funding.eco4_uplift - full_project_score = 0 if funding.full_project_abs is not None else funding.full_project_abs - partial_project_score = funding.partial_project_abs - uplift_project_score = funding.eco4_uplift if scheme == "eco4" else funding.gbis_uplift + funded_measures = [] + # funded_measures = solution if scheme in ["gbis", "eco4"] else [] + # project_funding = 0 if funding.full_project_abs is not None else funding.full_project_abs + project_funding = 0 + # total_uplift = funding.eco4_uplift + total_uplift = 0 + # full_project_score = 0 if funding.full_project_abs is not None else funding.full_project_abs + full_project_score = 0 + # partial_project_score = funding.partial_project_abs + partial_project_score = 0 + # uplift_project_score = funding.eco4_uplift if scheme == "eco4" else funding.gbis_uplift + uplift_project_score = 0 selected = {r["id"] for r in solution} diff --git a/etl/customers/lincs_rural/get_missed.py b/etl/customers/lincs_rural/get_missed.py new file mode 100644 index 00000000..d25449c5 --- /dev/null +++ b/etl/customers/lincs_rural/get_missed.py @@ -0,0 +1,47 @@ +# After going back to Lincs rural, they gave us some additional data that we can use to try to fetch missed UPRNs again +import pandas as pd + +# missed = pd.read_excel( +# "/Users/khalimconn-kowlessar/Downloads/lincs_rural_missed_nov_2025.xlsx", +# sheet_name="Missed Properties" +# ) +# missed = missed[~pd.isnull(missed["rrn"])] + +prepared = pd.read_excel( + "/Users/khalimconn-kowlessar/Downloads/lincs_rural_standardised_ara_nov_2025.xlsx", + sheet_name="Standardised Asset List" +) + +updated_data = pd.read_excel( + "/Users/khalimconn-kowlessar/Downloads/MASTER LIST EPCS UPDATED November 2025 Domna Homes - Copy.xlsx", + sheet_name="PROPERTY EPC RATINGS" +) +updated_data = updated_data[~pd.isnull(updated_data["Property Ref."])] + +missed = updated_data[~updated_data["Property Ref."].isin(prepared["landlord_property_id"].values.tolist())].copy() +# missed.to_csv("/Users/khalimconn-kowlessar/Downloads/lincs_rural_missed_uprn.csv") +# We'll grab the UPRNs manually and then pull them in, and prepare for ARA + +missing_uprns = pd.read_csv("/Users/khalimconn-kowlessar/Downloads/lincs_rural_missed_uprn.csv") + +missing_uprns["landlord_property_id"] = missing_uprns["Property Ref."].copy() +missing_uprns["domna_property_id"] = missing_uprns["Property Ref."].copy() +missing_uprns["domna_address_1"] = missing_uprns['Unnamed: 1'].str.split(",").str[0].str.strip() +missing_uprns["postcode"] = missing_uprns['Unnamed: 1'].str.split(",").str[-1].str.strip() +missing_uprns["landlord_property_type"] = "unknown" +missing_uprns["landlord_built_form"] = "unknown" +missing_uprns["domna_full_address"] = missing_uprns['Unnamed: 1'].copy() + +missed_standardised_for_ara = missing_uprns[ + ['landlord_property_id', 'domna_address_1', 'landlord_property_type', 'landlord_built_form', 'postcode', + 'domna_property_id', 'UPRN'] +].rename( + columns={"UPRN": "epc_os_uprn"} +) + +# Store +missed_standardised_for_ara.to_excel( + "/Users/khalimconn-kowlessar/Downloads/lincs_rural_missed_standardised_ara_nov_2025.xlsx", + index=False, + sheet_name="Standardised Asset List" +) diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/data_prep.py b/etl/customers/peabody/Nov 2025 Consulting Project/a_data_prep.py similarity index 100% rename from etl/customers/peabody/Nov 2025 Consulting Project/data_prep.py rename to etl/customers/peabody/Nov 2025 Consulting Project/a_data_prep.py diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/b_data_cleanse.py b/etl/customers/peabody/Nov 2025 Consulting Project/b_data_cleanse.py new file mode 100644 index 00000000..13faa371 --- /dev/null +++ b/etl/customers/peabody/Nov 2025 Consulting Project/b_data_cleanse.py @@ -0,0 +1,147 @@ +""" +We have found, within the Peabody data, a large volume of properties with missing and incorrects +UPRNS and incorrect address data. We want to flag these records and also find missings where we can + +We also have duplicate UPRNS that should be flagged +""" +import json +import time +import os +import pandas as pd +import numpy as np +from tqdm import tqdm +from dotenv import load_dotenv +from asset_list.utils import get_data_for_property +from utils.logger import setup_logger +from utils.s3 import read_io_from_s3, save_dataframe_to_s3_parquet, read_dataframe_from_s3_parquet + +logger = setup_logger() + +load_dotenv(dotenv_path="backend/.env") +EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN") + +sustainability_data = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody " + "- Data Extracts for Domna.xlsx", + sheet_name="Sustainability" +) +property_data = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody " + "- Data Extracts for Domna.xlsx", + sheet_name="Properties" +) + +missing_uprns = sustainability_data[pd.isnull(sustainability_data['UPRN'])].copy() + +# Any non-numeric UPRNS or leading with 0s are invalid +non_numeric_uprns = sustainability_data[ + ~sustainability_data['UPRN'].astype(str).str.match(r'^[1-9][0-9]*$') & ~pd.isnull(sustainability_data['UPRN']) + ].copy() +# 70 properties +leading_zero_uprns = sustainability_data[ + sustainability_data['UPRN'].astype(str).str.startswith('0') +].copy() + +# Flag duplicates +duplicate_uprns = sustainability_data[ + sustainability_data.duplicated(subset=['UPRN'], keep=False) & ~pd.isnull(sustainability_data['UPRN']) + ].copy() + +# Store this data +# missing_uprns.to_csv("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting +# Project/data_validation/missing_uprns.csv", index=False) +# non_numeric_uprns.to_csv("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting +# Project/data_validation/non_numeric_uprns.csv", index=False) +# leading_zero_uprns.to_csv("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting +# Project/data_validation/leading_zero_uprns.csv", index=False) +# duplicate_uprns.to_csv("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting +# Project/data_validation/duplicate_uprns.csv", index=False) + +# Take everything remaining +data_needing_validation = sustainability_data[ + ~sustainability_data["Org Ref"].isin( + missing_uprns["Org Ref"].values.tolist() + non_numeric_uprns["Org Ref"].values.tolist() + + leading_zero_uprns["Org Ref"].values.tolist() + duplicate_uprns["Org Ref"].values.tolist() + ) +].copy() + +# TODO: We should build a SAL for UPRNS that are missing, invalid or duplicated + +# We check UPRN validity against our OS data +uprn_filenames = read_dataframe_from_s3_parquet( + bucket_name="retrofit-data-dev", file_key="spatial/filename_meta.parquet" +) + +# We're going to: +# 1) Grab a filename +# 2) Read it in +# 3) Check which UPRNS from our data are in that file +# 4) Keep a record of which UPRNS were found where + +for uprn_file in tqdm(uprn_filenames['filenames'].values, total=len(uprn_filenames)): + spatial_data = read_dataframe_from_s3_parquet( + bucket_name="retrofit-data-dev", file_key=f"spatial/{uprn_file}" + ) + + uprns_in_file = data_needing_validation[ + data_needing_validation['UPRN'].astype('Int64').isin(spatial_data['UPRN'].astype('Int64').values) + ].copy() + + print("Found {} UPRNS in file {}".format(len(uprns_in_file), uprn_file)) + if len(uprns_in_file) > 0: + # Store the found UPRNS in the validation cache + data_to_store = uprns_in_file[["Org Ref", "UPRN"]].copy() + data_to_store["Source File"] = uprn_file + # Store + data_to_store.to_csv( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting " + f"Project/data_validation/validation_cache/{uprn_file.split('.parquet')[0]}_found_uprns.csv", + index=False + ) + +# Get all of the files: +storage_locations = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting " + "Project/data_validation/validation_cache") +# List contents +folder_contents = os.listdir(storage_locations) +# Grab files and concatenate +all_found_uprns = [] +for file in folder_contents: + if file.endswith("_found_uprns.csv"): + df = pd.read_csv(os.path.join(storage_locations, file)) + all_found_uprns.append(df) + +all_found_uprns = pd.concat(all_found_uprns) + +# We now flag any UPRNS that were not found in any of the OS datasets +os_missed_uprns = data_needing_validation[ + ~data_needing_validation['Org Ref'].isin(all_found_uprns['Org Ref'].values.tolist()) +].copy() + +# store +os_missed_uprns.to_csv( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting " + "Project/data_validation/os_missed_uprns.csv", + index=False +) + +# Now build a larger table for standardisation +to_standardised = pd.concat( + [missing_uprns, non_numeric_uprns, leading_zero_uprns, duplicate_uprns, os_missed_uprns] +) + +to_standardised.to_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting " + "Project/data_validation/to_standardise_uprns.xlsx", + index=False) + +# We prepare a finalised dataset to work with, that excludes all problematic properties and leaves us with +# properties for which we have the data we need + +finalised_data = sustainability_data[ + ~sustainability_data["Org Ref"].isin( + to_standardised["Org Ref"].values.tolist() + ) +].copy() + +# Prepare with the column formats we need, as analogous to a_data_prep where we defined an initial working sample diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/c_finalised_modelling_data.py b/etl/customers/peabody/Nov 2025 Consulting Project/c_finalised_modelling_data.py new file mode 100644 index 00000000..2868bce5 --- /dev/null +++ b/etl/customers/peabody/Nov 2025 Consulting Project/c_finalised_modelling_data.py @@ -0,0 +1,114 @@ +import pandas as pd + +# import pandas as pd +# +# sal = pd.read_excel( +# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting " +# "Project/data_validation/to_standardise_uprns - Standardised.xlsx", +# sheet_name="Standardised Asset List" +# ) +# +# # Quick breadown of missingness +# missing = sal[ +# pd.isnull(sal["estimated"]) | (sal["estimated"] == True) | pd.isnull(sal["epc_os_uprn"]) +# ] +# +# fetched = sal[(sal["estimated"] == False) | ~pd.isnull(sal["epc_os_uprn"])].copy() +# fetched = fetched[ +# ["landlord_property_id", "domna_address_1", "domna_postcode", "domna_full_address", "epc_address1", +# "epc_postcode", "epc_address", "landlord_property_type", "epc_property_type"] +# ] +# +# known_issues = [ +# +# ] +# +# # Missed postcodes +# missed_postcode_agg = missing.groupby("domna_postcode").size().reset_index(name="count") +# missed_postcode_agg = missed_postcode_agg.sort_values("count", ascending=False) +# +# multi_missed_postcode = missed_postcode_agg[missed_postcode_agg["count"] > 1] + +### Prepare +sustainability_data = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody " + "- Data Extracts for Domna.xlsx", + sheet_name="Sustainability" +) + +# Data we want to remove: +missing_uprns = sustainability_data[pd.isnull(sustainability_data['UPRN'])].copy() + +# Any non-numeric UPRNS or leading with 0s are invalid +non_numeric_uprns = sustainability_data[ + ~sustainability_data['UPRN'].astype(str).str.match(r'^[1-9][0-9]*$') & ~pd.isnull(sustainability_data['UPRN']) + ].copy() +# 70 properties +leading_zero_uprns = sustainability_data[ + sustainability_data['UPRN'].astype(str).str.startswith('0') +].copy() + +# Flag duplicates +duplicate_uprns = sustainability_data[ + sustainability_data.duplicated(subset=['UPRN'], keep=False) & ~pd.isnull(sustainability_data['UPRN']) + ].copy() + +# Read in the UPRNs that were not valid based on the OS data +os_missed_uprns = pd.read_csv( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting " + "Project/data_validation/os_missed_uprns.csv", +) + +modelling_data = sustainability_data[ + ~sustainability_data["Org Ref"].isin( + missing_uprns["Org Ref"].unique().tolist() + non_numeric_uprns["Org Ref"].unique().tolist() + + leading_zero_uprns["Org Ref"].unique().tolist() + duplicate_uprns["Org Ref"].unique().tolist() + + os_missed_uprns["Org Ref"].unique().tolist() + ) +].copy() + +# Need to prepare for upload +# Variables: + + +modelling_data["landlord_property_id"] = sustainability_data["Org Ref"].copy() +modelling_data["domna_property_id"] = sustainability_data["Org Ref"].copy() + +modelling_data = modelling_data.rename( + { + "Address 1": "domna_address_1", + "Postcode": "postcode", + "Type": "landlord_property_type", + "Attachment": "landlord_built_form", + "Heating": "landlord_heating_system", + "UPRN": "epc_os_uprn" + } +) + +modelling_data = modelling_data[ + [ + "domna_address_1", "Address 2", "Address 3", "postcode", "landlord_property_type", + "landlord_built_form", "landlord_heating_system", "epc_os_uprn", "Total Floor Area (m2)", + "domna_property_id", "domna_full_address" + ] +] + +modelling_data["landlord_built_form"] = modelling_data["landlord_built_form"].map( + { + "MidTerrace": "Mid-Terrace", + "EndTerrace": "End-Terrace", + "SemiDetached": "Semi-Detached", + "Detached": "Detached", + "EnclosedEndTerrace": "Enclosed End-Terrace", + "EnclosedMidTerrace": "Enclosed Mid-Terrace", + } +) + + +def make_full_address(x): + to_join = [x['domna_address_1'], x['Address 2'], x['Address 3']] + to_join = [x for x in to_join if not pd.isnull(x) and x != ''] + return ", ".join(to_join) + + +modelling_data["domna_full_address"] = modelling_data.apply(lambda x: make_full_address(x), axis=1) diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/data_cleanse.py b/etl/customers/peabody/Nov 2025 Consulting Project/data_cleanse.py deleted file mode 100644 index a1be533d..00000000 --- a/etl/customers/peabody/Nov 2025 Consulting Project/data_cleanse.py +++ /dev/null @@ -1,6 +0,0 @@ -""" -We have found, within the Peabody data, a large volume of properties with missing and incorrects -UPRNS and incorrect address data. We want to flag these records and also find missings where we can - -We also have duplicate UPRNS that should be flagged -""" diff --git a/etl/epc/Dataset.py b/etl/epc/Dataset.py index 8fa3e13c..74dcfc56 100644 --- a/etl/epc/Dataset.py +++ b/etl/epc/Dataset.py @@ -844,7 +844,7 @@ class TrainingDataset(BaseDataset): # Make sure they are all efficiency columns if any(~missings.index.str.contains("energy_eff")): - raise ValueError("Non efficiency columns are missing") + raise ValueError(f"Non efficiency columns are missing {missings.index}") for m in missings.index: self.df[m] = self.df[m].fillna("NO_RATING") diff --git a/etl/webscrape/Zoopla.py b/etl/webscrape/Zoopla.py index 4c0443f1..9d15e019 100644 --- a/etl/webscrape/Zoopla.py +++ b/etl/webscrape/Zoopla.py @@ -15,25 +15,10 @@ os.makedirs(CACHE_DIR, exist_ok=True) def random_delay(): - """Pause randomly between requests (0.5–2 s).""" time.sleep(random.uniform(0.5, 2)) -def extract_feature(soup, icon_id): - tag = soup.find("use", href=f"#{icon_id}") - if tag: - parent = tag.find_parent("div", class_="_1pbf8i53") - if parent: - text = parent.get_text(strip=True) - return text - return None - - def extract_embedded_json(text): - """ - Extract embedded property JSON containing attributes, energy, estimates, and sales history. - """ - # Try to grab everything after "attributes" match = re.search( r'"attributes"\s*:\s*\{.*?\}\s*,.*?"historicSales".*?\]', text, @@ -48,13 +33,16 @@ def extract_embedded_json(text): except json.JSONDecodeError: pass - # fallback for independent keys result = {} for key in [ "attributes", "energy", "rentEstimate", "saleEstimate", "saleHistory", "historicSales" ]: - key_match = re.search(rf'"{key}"\s*:\s*(\{{.*?\}}|\[.*?\])', text, re.DOTALL) + key_match = re.search( + rf'"{key}"\s*:\s*(\{{.*?\}}|\[.*?\])', + text, + re.DOTALL + ) if key_match: try: result[key] = json.loads(key_match.group(1)) @@ -64,28 +52,23 @@ def extract_embedded_json(text): def scrape_all_estimates(session, url): - """Scrape valuation estimates for one Zoopla property URL.""" resp = session.get(url, impersonate=random.choice(ENGINES)) html = resp.text - page_source = BeautifulSoup(resp.text, "html.parser") - estimates = page_source.find_all("div", {"data-testid": "sale-estimate"}) - + soup = BeautifulSoup(html, "html.parser") + estimates = soup.find_all("div", {"data-testid": "sale-estimate"}) data = extract_embedded_json(html) - is_blocked = len(estimates) == 0 - return { "estimates": estimates, - "is_blocked": is_blocked, + "is_blocked": len(estimates) == 0, "response_html": html, - "attributes": data.get("attributes"), - "rent": data.get("rentEstimate"), - "historicSales": data.get("historicSales"), + "attributes": data.get("attributes", {}), + "rentEstimate": data.get("rentEstimate", {}), + "historicSales": data.get("historicSales", []), } def extract_estimates(estimates): - """Extract low, mid, and high estimates from parsed HTML.""" est = estimates[0] low = est.find("span", {"data-testid": "low-estimate-blurred"}).text mid = est.find("p", {"data-testid": "estimate-blurred"}).text @@ -94,110 +77,123 @@ def extract_estimates(estimates): def cache_path_for_url(url): - """Return a deterministic local cache path for a URL.""" uprn = url.split("/")[-2] return os.path.join(CACHE_DIR, f"{uprn}.html") +def parse_cached_html(url, html): + soup = BeautifulSoup(html, "html.parser") + estimates = soup.find_all("div", {"data-testid": "sale-estimate"}) + data = extract_embedded_json(html) + history = data.get("historicSales") or [{}] + + if not estimates: + return None + + low, mid, high = extract_estimates(estimates) + + return { + "URL": url, + "Low Estimate": low, + "Middle Estimate": mid, + "High Estimate": high, + **data.get("attributes", {}), + **data.get("rentEstimate", {}), + **history[0], + } + + def parallel_task(url): - """Main worker function executed in each process.""" cache_path = cache_path_for_url(url) - # Use cached file if it exists if os.path.exists(cache_path): - html = open(cache_path, "r").read() - page_source = BeautifulSoup(html, "html.parser") - estimates = page_source.find_all("div", {"data-testid": "sale-estimate"}) - data = extract_embedded_json(html) - history_sales = data.get("historicSales", [{}]) - if len(history_sales) == 0: - history_sales = [{}] + with open(cache_path, "r", encoding="utf-8") as f: + html = f.read() + cached = parse_cached_html(url, html) + if cached: + return cached - if estimates: - low, mid, high = extract_estimates(estimates) - return { - "URL": url, "Low Estimate": low, "Middle Estimate": mid, "High Estimate": high, - **data.get("attributes", {}), **data.get("rentEstimate", {}), - **history_sales[0] - } - - # Otherwise scrape live with StealthSession() as session: - attempts = 0 - while attempts < 5: + for attempt in range(5): output = scrape_all_estimates(session, url) + if not output["is_blocked"] and output["estimates"]: - open(cache_path, "w").write(output["html"]) + html = output.get("response_html") + if html: + with open(cache_path, "w", encoding="utf-8") as f: + f.write(html) + + history = output.get("historicSales") or [{}] low, mid, high = extract_estimates(output["estimates"]) - history_sales = output.get("historicSales", [{}]) - if len(history_sales) == 0: - history_sales = [{}] + return { - "URL": url, "Low Estimate": low, "Middle Estimate": mid, "High Estimate": high, + "URL": url, + "Low Estimate": low, + "Middle Estimate": mid, + "High Estimate": high, **output.get("attributes", {}), - **output.get("rent", {}), - **history_sales[0] + **output.get("rentEstimate", {}), + **history[0], } - attempts += 1 - print(f"[Attempt {attempts}] Blocked or empty for {url}") + random_delay() - # If still blocked, return placeholders - return {"URL": url, "Low Estimate": None, "Middle Estimate": None, "High Estimate": None} + return { + "URL": url, + "Low Estimate": None, + "Middle Estimate": None, + "High Estimate": None, + } def parse_price(p): - if p is None: + if not p: return None p = p.replace("£", "").strip().lower() - if not p: - return None if p.endswith("k"): return float(p[:-1]) * 1_000 - elif p.endswith("m"): + if p.endswith("m"): return float(p[:-1]) * 1_000_000 - else: - try: - return float(p.replace(",", "")) - except ValueError: - return None + + try: + return float(p.replace(",", "")) + except ValueError: + return None if __name__ == "__main__": - # Load portfolio asset_list = pd.read_excel( - "/Users/khalimconn-kowlessar/Documents/hestia/Customers/sfr/October 2025 AL portfolio/22.10 AL Portfolio - " - "Standardised - partial UPRN fill.xlsx", + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting " + "Project/modelling_sample.xlsx", sheet_name="Standardised Asset List" ) + asset_list = asset_list[~pd.isnull(asset_list["epc_os_uprn"])] + asset_list = asset_list.drop_duplicates("epc_os_uprn") asset_list["epc_os_uprn"] = asset_list["epc_os_uprn"].astype(int).astype(str) + uprns = asset_list["epc_os_uprn"].tolist() urls = [f"https://www.zoopla.co.uk/property/uprn/{uprn}/" for uprn in uprns] - # Limit concurrency to avoid blocks - with Pool(processes=2) as pool: # fewer processes = fewer fingerprints + with Pool(processes=2) as pool: estimates_list = list( tqdm(pool.imap(parallel_task, urls), total=len(urls)) ) df = pd.DataFrame(estimates_list) - - print(df.head()) - df["uprn"] = df["URL"].str.extract(r"uprn/(\d+)/") df["valuation"] = df["Middle Estimate"].apply(parse_price) df.to_csv("zoopla_estimates.csv", index=False) - # Merge with asset list merged = asset_list.merge( df[["uprn", "valuation"]], left_on="epc_os_uprn", right_on="uprn", how="left" ) + merged.to_excel( "20251029 AL Portfolio - Standardised - with valuations.xlsx", index=False diff --git a/sfr/principal_pitch/2_export_data.py b/sfr/principal_pitch/2_export_data.py index f6618f22..d05275ea 100644 --- a/sfr/principal_pitch/2_export_data.py +++ b/sfr/principal_pitch/2_export_data.py @@ -11,8 +11,8 @@ from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcMod # PORTFOLIO_ID = 206 # SCENARIOS = [389] -PORTFOLIO_ID = 388 -SCENARIOS = [803] +PORTFOLIO_ID = 404 +SCENARIOS = [829] def get_data(portfolio_id, scenario_ids): @@ -121,7 +121,8 @@ recommendations_measures_pivot["total_retrofit_cost"] = recommendations_measures df = properties_df[ [ - "property_id", "uprn", "address", "postcode", "property_type", "walls", "roof", "heating", "windows", + "landlord_property_id", "property_id", "uprn", "address", "postcode", "property_type", "walls", "roof", + "heating", "windows", "current_epc_rating", "current_sap_points", "total_floor_area", "number_of_rooms", ] @@ -143,7 +144,7 @@ from utils.s3 import read_csv_from_s3, read_excel_from_s3 # asset_list = read_csv_from_s3(bucket_name="retrofit-plan-inputs-dev", filepath='8/206/asset_list.csv') asset_list = read_excel_from_s3( - bucket_name="retrofit-plan-inputs-dev", file_key='2/388/20251208T203603925Z/asset_list.xlsx', + bucket_name="retrofit-plan-inputs-dev", file_key="2/404/20251211T163200754Z/asset_list.xlsx", header_row=0, sheet_name="Standardised Asset List" ) asset_list = pd.DataFrame(asset_list) @@ -201,11 +202,15 @@ asset_list = asset_list.merge( ) # For exporting -asset_list.to_excel( - "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting " - "Project/20251209_sample_package_data.xlsx", +df.to_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Lincs Rural/EPC C -without floors proposed measures - " + "with ID.xlsx", index=False ) +# asset_list.to_excel( +# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Lincs Rural/epc_measures.xlsx", +# index=False +# ) condition_costs = pd.read_excel( "/Users/khalimconn-kowlessar/Documents/hestia/sfr/Spring JV/Condition costs.xlsx", From d65fc22ad4e8c78bf25b694875638cb933f1e915 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sat, 13 Dec 2025 23:21:25 +0800 Subject: [PATCH 121/202] fixed sap05 downgrade --- backend/Property.py | 2 +- .../c_finalised_modelling_data.py | 28 +++++++++++++------ 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/backend/Property.py b/backend/Property.py index 31991702..9a4f8d97 100644 --- a/backend/Property.py +++ b/backend/Property.py @@ -843,7 +843,7 @@ class Property: raise ValueError("Current energy bill has not been set") # IF we have a SAP05 overwrite, we pull out the relevant information - sap_05_overwritten = self.data.get("sap_05_overwritten", False) + sap_05_overwritten = self.data.get("sap-05-overwritten", False) sap_05_score, sap_05_epc_rating = None, None if sap_05_overwritten: diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/c_finalised_modelling_data.py b/etl/customers/peabody/Nov 2025 Consulting Project/c_finalised_modelling_data.py index 2868bce5..b2dfb01e 100644 --- a/etl/customers/peabody/Nov 2025 Consulting Project/c_finalised_modelling_data.py +++ b/etl/customers/peabody/Nov 2025 Consulting Project/c_finalised_modelling_data.py @@ -75,7 +75,7 @@ modelling_data["landlord_property_id"] = sustainability_data["Org Ref"].copy() modelling_data["domna_property_id"] = sustainability_data["Org Ref"].copy() modelling_data = modelling_data.rename( - { + columns={ "Address 1": "domna_address_1", "Postcode": "postcode", "Type": "landlord_property_type", @@ -85,6 +85,15 @@ modelling_data = modelling_data.rename( } ) + +def make_full_address(x): + to_join = [x['domna_address_1'], x['Address 2'], x['Address 3']] + to_join = [x for x in to_join if not pd.isnull(x) and x != ''] + return ", ".join(to_join) + + +modelling_data["domna_full_address"] = modelling_data.apply(lambda x: make_full_address(x), axis=1) + modelling_data = modelling_data[ [ "domna_address_1", "Address 2", "Address 3", "postcode", "landlord_property_type", @@ -104,11 +113,12 @@ modelling_data["landlord_built_form"] = modelling_data["landlord_built_form"].ma } ) - -def make_full_address(x): - to_join = [x['domna_address_1'], x['Address 2'], x['Address 3']] - to_join = [x for x in to_join if not pd.isnull(x) and x != ''] - return ", ".join(to_join) - - -modelling_data["domna_full_address"] = modelling_data.apply(lambda x: make_full_address(x), axis=1) +filename = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20251213 Model " + "data.xlsx") +with pd.ExcelWriter(filename) as writer: + modelling_data.to_excel(writer, sheet_name="Standardised Asset List", index=False) + # Store the three sections + modelling_data[0:30000].to_excel(writer, sheet_name="Part 1", index=False) + modelling_data[30000:60000].to_excel(writer, sheet_name="Part 2", index=False) + modelling_data[60000:].to_excel(writer, sheet_name="Part 3", index=False) + modelling_data.sample(60).to_excel(writer, sheet_name="Random testing sample", index=False) From 33057b855fbdf1835069afc83a273ee30ec529c3 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sun, 14 Dec 2025 07:51:32 +0800 Subject: [PATCH 122/202] added missing fuels --- backend/Property.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/backend/Property.py b/backend/Property.py index 9a4f8d97..cee82836 100644 --- a/backend/Property.py +++ b/backend/Property.py @@ -1238,6 +1238,9 @@ class Property: "electricity": "Electricity", "biogas": "Smokeless Fuel", "heat network": "Natural Gas (Community Scheme)", + "lpg": 'LPG', + "biodiesel": "Smokeless Fuel", + "b30d": "B30K Biofuel" } self.heating_energy_source = list({ From 00c14e7493272d7eabd020bf32971973d578b766 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sun, 14 Dec 2025 19:40:31 +0800 Subject: [PATCH 123/202] Handle where u-value is 0, instead of none --- recommendations/RoofRecommendations.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/recommendations/RoofRecommendations.py b/recommendations/RoofRecommendations.py index 0324c9cb..1e5636ff 100644 --- a/recommendations/RoofRecommendations.py +++ b/recommendations/RoofRecommendations.py @@ -150,7 +150,7 @@ class RoofRecommendations: return # If we have a u-value and we don't have a non-invasive recommendation, we can't recommend anything - if u_value and not any( + if (u_value is not None) and not any( x in MEASURE_MAP["roof_insulation"] for x in [r["type"] for r in self.property.non_invasive_recommendations] ): # We don't have enough information to provide a recommendation From 2a33a664702b7db8b5b28469639f1190c251e5dc Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sun, 14 Dec 2025 21:54:50 +0800 Subject: [PATCH 124/202] refactoring db calls to be short lived --- backend/app/db/connection.py | 12 +- backend/app/plan/utils.py | 3 +- backend/engine/engine.py | 369 +++++++++++++++++++---------------- 3 files changed, 212 insertions(+), 172 deletions(-) diff --git a/backend/app/db/connection.py b/backend/app/db/connection.py index 2ac9bd02..bff63ae1 100644 --- a/backend/app/db/connection.py +++ b/backend/app/db/connection.py @@ -12,7 +12,17 @@ db_string = connection_string.format( dbname=get_settings().DB_NAME, ) -db_engine = create_engine(db_string, pool_size=5, max_overflow=5) +# db_engine = create_engine(db_string, pool_size=5, max_overflow=5) + +# Adjusted database connection to decease pool size for serverless environments (from lambda) so that +# each lambda doesn't hog all connections +db_engine = create_engine( + db_string, + pool_size=1, + max_overflow=0, # Limit the number of extra connections. With this and pool size, we allow 1 connection per lambda + pool_pre_ping=True, + pool_recycle=300, # Forces SQLAlchemy to close and reopen any connection older than 300 seconds +) def get_db_session(): diff --git a/backend/app/plan/utils.py b/backend/app/plan/utils.py index ebf1dd9c..717638cf 100644 --- a/backend/app/plan/utils.py +++ b/backend/app/plan/utils.py @@ -238,7 +238,7 @@ def build_cloudwatch_log_url(start_ms: int) -> str: ) -def handle_error(session, msg, e, subtask_id, status=500, start_ms=None): +def handle_error(msg, e, subtask_id, status=500, start_ms=None): # When the pipeline fails, handles error process cloud_logs_url = build_cloudwatch_log_url(start_ms) @@ -249,5 +249,4 @@ def handle_error(session, msg, e, subtask_id, status=500, start_ms=None): cloud_logs_url=cloud_logs_url ) logger.error(msg, exc_info=True) - session.rollback() return Response(status_code=status, content=msg) diff --git a/backend/engine/engine.py b/backend/engine/engine.py index 967d6b16..2dd08a1c 100644 --- a/backend/engine/engine.py +++ b/backend/engine/engine.py @@ -10,6 +10,8 @@ from uuid import UUID from backend.Funding import Funding from backend.SearchEpc import SearchEpc +from contextlib import contextmanager +from sqlmodel import Session from etl.epc.Record import EPCRecord from sqlalchemy.exc import IntegrityError, OperationalError @@ -516,16 +518,60 @@ def averages_cleaning(prepared_epc: EPCRecord, cleaning_data: pd.DataFrame): return prepared_epc +def extract_address_data(config, body): + """ + Simple helper to grab address data from the config + :return: + """ + uprn = config.get("uprn", None) + if pd.isnull(uprn): + uprn = None + if uprn: + uprn = int(float(uprn)) + + address1 = config.get("address", None) + # Handle domna address list format + if pd.isnull(address1) and body.file_format == "domna_asset_list": + address1 = config.get("domna_address_1", None) + + address1 = str(int(address1)) if isinstance(address1, float) else str(address1) + full_address = config.get("domna_full_address", "") if body.file_format == "domna_asset_list" else None + if not isinstance(full_address, str): # Catch for when the full address is nan + full_address = None + + return uprn, address1, full_address + + +@contextmanager +def db_session(): + session = Session(db_engine) + try: + yield session + session.commit() + except Exception: + session.rollback() + raise + finally: + session.close() + + +@contextmanager +def db_read_session(): + session = Session(db_engine, expire_on_commit=False) + try: + yield session + finally: + session.close() + + async def model_engine(body: PlanTriggerRequest): logger.info("Model Engine triggered with body: %s", json.loads(body.model_dump_json())) logger.info("Connecting to db") - session = sessionmaker(bind=db_engine)() created_at = datetime.now().isoformat() start_ms = int(time.time() * 1000) try: - session.begin() logger.info("Getting the inputs") if body.file_type == "xlsx": @@ -641,40 +687,32 @@ async def model_engine(body: PlanTriggerRequest): input_properties, inspections_map, eco_packages = [], {}, {} for config in tqdm(plan_input): - # We validate each record in the file. If the record is NOT valid, we need to handle this accordingly - uprn = config.get("uprn", None) - if pd.isnull(uprn): - uprn = None - if uprn: - uprn = int(float(uprn)) - - epc_api_data, epc_page, rrn, epc_cache = None, None, None, {} - if uprn: - # if we have a UPRN, we check if we already have EPC data associated with this UPRN - epc_cache = db_funcs.epc_functions.EpcStoreService.get_epc_for_uprn(session, uprn) - - if epc_cache["status"] == db_funcs.epc_functions.EpcStoreService.FRESH: - epc_api_data, epc_page, rrn = epc_cache["epc_api"], epc_cache["epc_page"], epc_cache["epc_page_rrn"] - - address1 = config.get("address", None) - # Handle domna address list format - if pd.isnull(address1) and body.file_format == "domna_asset_list": - address1 = config.get("domna_address_1", None) - - address1 = str(int(address1)) if isinstance(address1, float) else str(address1) - full_address = config.get("domna_full_address", "") if body.file_format == "domna_asset_list" else None - if not isinstance(full_address, str): # Catch for when the full address is nan - full_address = None + uprn, address1, full_address = extract_address_data(config, body) heating_system = parse_heating_system(config) - associated_uprns = [] - if (body.event_type == "remote_assessment") and config.get("property_type") == "Flat": - # We're running a remote assessment for a flat - we go and grab the associated - # UPRNS for other units in the same building - associated_uprns = db_funcs.address_functions.get_associated_uprns( - session, postcode=config["postcode"], uprn=uprn + # ---------- 1) fetch data ---------- + epc_api_data, epc_page, rrn, epc_cache = None, None, None, {} + with db_read_session() as session: + epc_cache = {} + if uprn: + epc_cache = db_funcs.epc_functions.EpcStoreService.get_epc_for_uprn(session, uprn) + + # For remote assessments of flats, we get associated UPRNs + associated_uprns = [] + if body.event_type == "remote_assessment" and config.get("property_type") == "Flat": + associated_uprns = db_funcs.address_functions.get_associated_uprns( + session, postcode=config["postcode"], uprn=uprn + ) + + # We check for an energy assessment we have performed on this property: + energy_assessment = db_funcs.energy_assessment_functions.get_latest_assessment_by_uprn( + session, uprn ) + # Extract from EPC cache + if epc_cache.get("status") == db_funcs.epc_functions.EpcStoreService.FRESH: + epc_api_data, epc_page, rrn = epc_cache["epc_api"], epc_cache["epc_page"], epc_cache["epc_page_rrn"] + epc_searcher = SearchEpc( address1=address1, postcode=config["postcode"], @@ -692,28 +730,25 @@ async def model_engine(body: PlanTriggerRequest): epc_searcher.find_property(skip_os=True, api_data=epc_api_data, overwrite_sap05=True) epc_searcher.set_uprn_source(file_format=body.file_format) - # We check for an energy assessment we have performed on this property: - energy_assessment = db_funcs.energy_assessment_functions.get_latest_assessment_by_uprn( - session, uprn if uprn is not None else epc_searcher.uprn - ) + # ---------- 2) ensure property exists ---------- + with db_session() as session: + property_id, is_new = db_funcs.property_functions.ensure_property_exists( + session, body, epc_searcher, energy_assessment, + landlord_property_id=config.get("landlord_property_id") + ) - property_id, is_new = db_funcs.property_functions.ensure_property_exists( - session, body, epc_searcher, energy_assessment, landlord_property_id=config.get("landlord_property_id") - ) - if not property_id: - continue - - if not is_new and not body.multi_plan: + if not property_id or (not is_new and not body.multi_plan): continue if is_new: - db_funcs.property_functions.create_property_targets( - session, - property_id=property_id, - portfolio_id=body.portfolio_id, - epc_target=body.goal_value, - heat_demand_target=None - ) + with db_session() as session: + db_funcs.property_functions.create_property_targets( + session, + property_id=property_id, + portfolio_id=body.portfolio_id, + epc_target=body.goal_value, + heat_demand_target=None + ) # If we have an energy assessment in place, that is newer than all of the previous EPCs, we use that. # Otherwise, we use the newest EPC @@ -789,18 +824,18 @@ async def model_engine(body: PlanTriggerRequest): # 2) A real EPC # 3) A UPRN (meaning that a UPRN could be fetched against that property) # We store this data - - if db_funcs.epc_functions.EpcStoreService.check_insert_needed( - epc_cache, epc_searcher.newest_epc.get("estimated"), epc_searcher.uprn - ): - # We store the EPC data we have found for this property - db_funcs.epc_functions.EpcStoreService.upsert_epc_data( - session=session, - uprn=epc_searcher.uprn, - epc_api=epc_searcher.data, - epc_page=epc_page_source.get("page_source"), - epc_page_rrn=epc_page_source.get("rrn"), - ) + with db_session() as session: + if db_funcs.epc_functions.EpcStoreService.check_insert_needed( + epc_cache, epc_searcher.newest_epc.get("estimated"), epc_searcher.uprn + ): + # We store the EPC data we have found for this property + db_funcs.epc_functions.EpcStoreService.upsert_epc_data( + session=session, + uprn=epc_searcher.uprn, + epc_api=epc_searcher.data, + epc_page=epc_page_source.get("page_source"), + epc_page_rrn=epc_page_source.get("rrn"), + ) if not input_properties: return Response(status_code=204) @@ -811,7 +846,8 @@ async def model_engine(body: PlanTriggerRequest): # aginst each property if if inspections_map: logger.info("Inserting inspections data") - db_funcs.inspections_functions.bulk_upsert_inspections_pg(session, inspections_map) + with db_session() as session: + db_funcs.inspections_functions.bulk_upsert_inspections_pg(session, inspections_map) # Set up model api and warm up the lambdas model_api = ModelApi( @@ -828,7 +864,8 @@ async def model_engine(body: PlanTriggerRequest): # consistent requests to the backend for # the same data logger.info("Reading in materials and cleaned datasets") - materials = db_funcs.materials_functions.get_materials(session) + with db_read_session() as session: + materials = db_funcs.materials_functions.get_materials(session) cleaned = get_cleaned() project_scores_matrix, partial_project_scores_matrix, whlg_eligible_postcodes = get_funding_data() @@ -861,24 +898,24 @@ async def model_engine(body: PlanTriggerRequest): ofgem_consumption_averages=ofgem_consumption_averages, body=body ) - - input_properties = GoogleSolarApi.building_solar_analysis( - building_solar_config=building_solar_config, - input_properties=input_properties, - session=session, - google_solar_api_key=get_settings().GOOGLE_SOLAR_API_KEY, - solar_materials=[m for m in materials if m["type"] == "solar_pv"], - ) - - input_properties = GoogleSolarApi.unit_solar_analysis( - unit_solar_config=unit_solar_config, - input_properties=input_properties, - session=session, - body=body, - solar_materials=[m for m in materials if m["type"] == "solar_pv"], - google_solar_api_key=get_settings().GOOGLE_SOLAR_API_KEY, - inspections_map=inspections_map - ) + with db_session() as session: + input_properties = GoogleSolarApi.building_solar_analysis( + building_solar_config=building_solar_config, + input_properties=input_properties, + session=session, + google_solar_api_key=get_settings().GOOGLE_SOLAR_API_KEY, + solar_materials=[m for m in materials if m["type"] == "solar_pv"], + ) + with db_session() as session: + input_properties = GoogleSolarApi.unit_solar_analysis( + unit_solar_config=unit_solar_config, + input_properties=input_properties, + session=session, + body=body, + solar_materials=[m for m in materials if m["type"] == "solar_pv"], + google_solar_api_key=get_settings().GOOGLE_SOLAR_API_KEY, + inspections_map=inspections_map + ) # We also make a tweak - if the property has been flagged for solar but doesn't contain # any panel performance, we ensure that we have a 3kWp and 4kWp option for the property @@ -1272,92 +1309,90 @@ async def model_engine(body: PlanTriggerRequest): # We don't need to create a new scenario, we just use the existing one scenario_id = body.scenario_id else: - engine_scenario = db_funcs.recommendations_functions.create_scenario( - session=session, - scenario={ - "name": body.scenario_name, - "created_at": created_at, - "budget": body.budget, - "portfolio_id": body.portfolio_id, - "housing_type": body.housing_type, - "goal": body.goal, - "goal_value": body.goal_value, - "trigger_file_path": body.trigger_file_path, - "already_installed_file_path": body.already_installed_file_path, - "patches_file_path": body.patches_file_path, - "non_invasive_recommendations_file_path": body.non_invasive_recommendations_file_path, - "exclusions": body.exclusions, - "multi_plan": body.multi_plan - } - ) + with db_session() as session: + engine_scenario = db_funcs.recommendations_functions.create_scenario( + session=session, + scenario={ + "name": body.scenario_name, + "created_at": created_at, + "budget": body.budget, + "portfolio_id": body.portfolio_id, + "housing_type": body.housing_type, + "goal": body.goal, + "goal_value": body.goal_value, + "trigger_file_path": body.trigger_file_path, + "already_installed_file_path": body.already_installed_file_path, + "patches_file_path": body.patches_file_path, + "non_invasive_recommendations_file_path": body.non_invasive_recommendations_file_path, + "exclusions": body.exclusions, + "multi_plan": body.multi_plan + } + ) scenario_id = engine_scenario.id - property_valuation_increases = [] - session.commit() + # property_valuation_increases = [] new_epc_bands = {} - property_value_increase_ranges = {} + # property_value_increase_ranges = {} for i in range(0, len(input_properties), BATCH_SIZE): try: # Take a slice of the input_properties list to make a batch batch_properties = input_properties[i:i + BATCH_SIZE] + with db_session() as session: + for p in batch_properties: + recommendations_to_upload = recommendations.get(p.id, []) + default_recommendations = [r for r in recommendations_to_upload if r["default"]] + total_sap_points = sum([r["sap_points"] for r in default_recommendations]) + new_sap_points = float(p.data["current-energy-efficiency"]) + total_sap_points + new_epc = sap_to_epc(new_sap_points) + new_epc_bands[p.id] = new_epc - for p in batch_properties: - recommendations_to_upload = recommendations.get(p.id, []) - default_recommendations = [r for r in recommendations_to_upload if r["default"]] - total_sap_points = sum([r["sap_points"] for r in default_recommendations]) - new_sap_points = float(p.data["current-energy-efficiency"]) + total_sap_points - new_epc = sap_to_epc(new_sap_points) - new_epc_bands[p.id] = new_epc + total_cost = sum([r["total"] for r in default_recommendations]) - total_cost = sum([r["total"] for r in default_recommendations]) + valuations = PropertyValuation.estimate( + property_instance=p, target_epc=new_epc, total_cost=total_cost + ) + # property_value_increase_ranges[p.id] = valuations - valuations = PropertyValuation.estimate( - property_instance=p, target_epc=new_epc, total_cost=total_cost - ) - property_value_increase_ranges[p.id] = valuations + property_plan_data = db_funcs.recommendations_functions.prepare_plan_data( + p, body, scenario_id, eco_packages, valuations, new_sap_points, new_epc, + default_recommendations + ) - property_plan_data = db_funcs.recommendations_functions.prepare_plan_data( - p, body, scenario_id, eco_packages, valuations, new_sap_points, new_epc, default_recommendations - ) - - # TODO - this is not right, especially if the existing run failed - if p.is_new: property_details_epc = p.get_property_details_epc( portfolio_id=body.portfolio_id, rating_lookup=rating_lookup, ) + property_data = p.get_full_property_data(current_valuation=valuations["current_value"]) db_funcs.property_functions.create_property_details_epc(session, property_details_epc) db_funcs.property_functions.update_or_create_property_spatial_details( session, p.uprn, p.spatial ) - property_data = p.get_full_property_data(current_valuation=valuations["current_value"]) - db_funcs.property_functions.update_property_data( session, property_id=p.id, portfolio_id=body.portfolio_id, property_data=property_data ) - if not recommendations_to_upload: - continue + if not recommendations_to_upload: + continue - new_plan_id = db_funcs.recommendations_functions.create_plan(session, plan=property_plan_data) - - db_funcs.recommendations_functions.upload_recommendations( - session, recommendations_to_upload, p.id, new_plan_id - ) - db_funcs.funding_functions.upload_funding(session, p, new_plan_id, recommendations_to_upload) - - if valuations["current_value"] > 0: - property_valuation_increases.append( - valuations["average_increased_value"] - valuations["current_value"] + new_plan_id = db_funcs.recommendations_functions.create_plan( + session, plan=property_plan_data ) - # Commit the session after each batch - session.commit() + db_funcs.recommendations_functions.upload_recommendations( + session, recommendations_to_upload, p.id, new_plan_id + ) + db_funcs.funding_functions.upload_funding( + session, p, new_plan_id, recommendations_to_upload + ) + + # if valuations["current_value"] > 0: + # property_valuation_increases.append( + # valuations["average_increased_value"] - valuations["current_value"] + # ) except Exception as e: # Rollback the session if an error occurs - session.rollback() logger.warning("Failed i = %s" % str(i)) logger.error(f"An error occurred during batch starting at index {i}: {e}") logger.error(f"property is uprn {p.uprn} id {p.id} address {p.address}") @@ -1369,42 +1404,38 @@ async def model_engine(body: PlanTriggerRequest): # recommendation from being default to not default, we'll need to re-run this process to re-calculate the # the portfolion level impact - total_valuation_increase = sum(property_valuation_increases) - labour_days = round(max( - [sum(r["labour_days"] for r in rec_group if r["default"]) for p_id, rec_group in recommendations.items()] - )) + # total_valuation_increase = sum(property_valuation_increases) + # labour_days = round(max( + # [sum(r["labour_days"] for r in rec_group if r["default"]) for p_id, rec_group in recommendations.items()] + # )) + # + # # TODO - This code only pulls in the properties that have been updated in this run, but we need to + # # aggregate all properties in the portfolio. We likely need to trigger a re-aggregation + # aggregated_data = extract_portfolio_aggregation_data( + # input_properties=input_properties, + # total_valuation_increase=total_valuation_increase, + # recommendations=recommendations, + # new_epc_bands=new_epc_bands, + # property_value_increase_ranges=property_value_increase_ranges + # ) + # with db_session() as session: + # db_funcs.portfolio_functions.aggregate_portfolio_recommendations( + # session, + # portfolio_id=body.portfolio_id, + # scenario_id=scenario_id, + # total_valuation_increase=total_valuation_increase, + # labour_days=labour_days, + # aggregated_data=aggregated_data + # ) - # TODO - This code only pulls in the properties that have been updated in this run, but we need to - # aggregate all properties in the portfolio. We likely need to trigger a re-aggregation - aggregated_data = extract_portfolio_aggregation_data( - input_properties=input_properties, - total_valuation_increase=total_valuation_increase, - recommendations=recommendations, - new_epc_bands=new_epc_bands, - property_value_increase_ranges=property_value_increase_ranges - ) - - db_funcs.portfolio_functions.aggregate_portfolio_recommendations( - session, - portfolio_id=body.portfolio_id, - scenario_id=scenario_id, - total_valuation_increase=total_valuation_increase, - labour_days=labour_days, - aggregated_data=aggregated_data - ) - - # Commit final changes - session.commit() except IntegrityError as e: - return handle_error(session, "Database integrity error.", e, body.subtask_id, 500, start_ms) + return handle_error("Database integrity error.", e, body.subtask_id, 500, start_ms) except OperationalError as e: - return handle_error(session, "Database operational error.", e, body.subtask_id, 500, start_ms) + return handle_error("Database operational error.", e, body.subtask_id, 500, start_ms) except ValueError as e: - return handle_error(session, "Bad request: malformed data.", e, body.subtask_id, 400, start_ms) + return handle_error("Bad request: malformed data.", e, body.subtask_id, 400, start_ms) except Exception as e: # General exception handling - return handle_error(session, "An unexpected error occurred.", e, body.subtask_id, 500, start_ms) - finally: - session.close() + return handle_error("An unexpected error occurred.", e, body.subtask_id, 500, start_ms) cloud_logs_url = build_cloudwatch_log_url(start_ms) # Mark the subtask as successful From bf7f05108db9568f3a4d41858ffbaa66db45600b Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sun, 14 Dec 2025 22:06:35 +0800 Subject: [PATCH 125/202] cleanup --- backend/engine/engine.py | 39 +-------------------------------------- 1 file changed, 1 insertion(+), 38 deletions(-) diff --git a/backend/engine/engine.py b/backend/engine/engine.py index 2dd08a1c..f71fbfaa 100644 --- a/backend/engine/engine.py +++ b/backend/engine/engine.py @@ -1330,9 +1330,7 @@ async def model_engine(body: PlanTriggerRequest): ) scenario_id = engine_scenario.id - # property_valuation_increases = [] new_epc_bands = {} - # property_value_increase_ranges = {} for i in range(0, len(input_properties), BATCH_SIZE): try: # Take a slice of the input_properties list to make a batch @@ -1351,7 +1349,6 @@ async def model_engine(body: PlanTriggerRequest): valuations = PropertyValuation.estimate( property_instance=p, target_epc=new_epc, total_cost=total_cost ) - # property_value_increase_ranges[p.id] = valuations property_plan_data = db_funcs.recommendations_functions.prepare_plan_data( p, body, scenario_id, eco_packages, valuations, new_sap_points, new_epc, @@ -1386,47 +1383,13 @@ async def model_engine(body: PlanTriggerRequest): session, p, new_plan_id, recommendations_to_upload ) - # if valuations["current_value"] > 0: - # property_valuation_increases.append( - # valuations["average_increased_value"] - valuations["current_value"] - # ) - except Exception as e: # Rollback the session if an error occurs logger.warning("Failed i = %s" % str(i)) logger.error(f"An error occurred during batch starting at index {i}: {e}") logger.error(f"property is uprn {p.uprn} id {p.id} address {p.address}") - logger.info("Creating portfolio aggregations") - # We implement this in the simplest way possible which will be just to query the database for all - # recommendations associated to the portfolio and then aggregate them. This is not the most efficient - # way to do this, but it's the simplest and will be a process that we can re-use since when we change a - # recommendation from being default to not default, we'll need to re-run this process to re-calculate the - # the portfolion level impact - - # total_valuation_increase = sum(property_valuation_increases) - # labour_days = round(max( - # [sum(r["labour_days"] for r in rec_group if r["default"]) for p_id, rec_group in recommendations.items()] - # )) - # - # # TODO - This code only pulls in the properties that have been updated in this run, but we need to - # # aggregate all properties in the portfolio. We likely need to trigger a re-aggregation - # aggregated_data = extract_portfolio_aggregation_data( - # input_properties=input_properties, - # total_valuation_increase=total_valuation_increase, - # recommendations=recommendations, - # new_epc_bands=new_epc_bands, - # property_value_increase_ranges=property_value_increase_ranges - # ) - # with db_session() as session: - # db_funcs.portfolio_functions.aggregate_portfolio_recommendations( - # session, - # portfolio_id=body.portfolio_id, - # scenario_id=scenario_id, - # total_valuation_increase=total_valuation_increase, - # labour_days=labour_days, - # aggregated_data=aggregated_data - # ) + logger.info("Work completed, updating log status") except IntegrityError as e: return handle_error("Database integrity error.", e, body.subtask_id, 500, start_ms) From 8b7c6086aeb9952f7018f34333ded54ddd22f876 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sun, 14 Dec 2025 22:18:54 +0800 Subject: [PATCH 126/202] minor logging improvement --- backend/engine/engine.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/backend/engine/engine.py b/backend/engine/engine.py index f71fbfaa..43b18d1b 100644 --- a/backend/engine/engine.py +++ b/backend/engine/engine.py @@ -567,7 +567,6 @@ def db_read_session(): async def model_engine(body: PlanTriggerRequest): logger.info("Model Engine triggered with body: %s", json.loads(body.model_dump_json())) - logger.info("Connecting to db") created_at = datetime.now().isoformat() start_ms = int(time.time() * 1000) @@ -1330,8 +1329,9 @@ async def model_engine(body: PlanTriggerRequest): ) scenario_id = engine_scenario.id - new_epc_bands = {} - for i in range(0, len(input_properties), BATCH_SIZE): + for i in tqdm( + range(0, len(input_properties), BATCH_SIZE), total=int(np.ceil(len(input_properties) / BATCH_SIZE)) + ): try: # Take a slice of the input_properties list to make a batch batch_properties = input_properties[i:i + BATCH_SIZE] @@ -1342,7 +1342,6 @@ async def model_engine(body: PlanTriggerRequest): total_sap_points = sum([r["sap_points"] for r in default_recommendations]) new_sap_points = float(p.data["current-energy-efficiency"]) + total_sap_points new_epc = sap_to_epc(new_sap_points) - new_epc_bands[p.id] = new_epc total_cost = sum([r["total"] for r in default_recommendations]) From f5fa97e1798c774f2491d1acab7459d23032e150 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sun, 14 Dec 2025 23:14:11 +0800 Subject: [PATCH 127/202] attempting to debug missing walls description --- backend/Property.py | 5 +- etl/epc/Dataset.py | 66 ++++++++++--------------- recommendations/WallRecommendations.py | 5 +- recommendations/recommendation_utils.py | 5 ++ 4 files changed, 37 insertions(+), 44 deletions(-) diff --git a/backend/Property.py b/backend/Property.py index cee82836..8f9637a9 100644 --- a/backend/Property.py +++ b/backend/Property.py @@ -611,7 +611,10 @@ class Property: if self.data[description] in self.DATA_ANOMALY_MATCHES: template = cleaned[description][0] - fill_dict = dict(zip(template.keys(), [None] * len(template))) + # Handling edge case for walls + fill_with = False if description == "walls-description" else None + fill_dict = dict(zip(template.keys(), [fill_with] * len(template))) + fill_dict.update( { "original_description": self.data[description], diff --git a/etl/epc/Dataset.py b/etl/epc/Dataset.py index 74dcfc56..01391874 100644 --- a/etl/epc/Dataset.py +++ b/etl/epc/Dataset.py @@ -498,56 +498,43 @@ class TrainingDataset(BaseDataset): Drop properties that have inconsistent data, i.e. changing material types """ + starting_and_finishing_null = ( + expanded_df["original_description"].isin([None, ""]) & + expanded_df["original_description_ending"].isin([None, ""]) + ) + if component == "walls": expanded_df = expanded_df[ - (expanded_df["is_cavity_wall"] == expanded_df["is_cavity_wall_ending"]) - & ( - expanded_df["is_solid_brick"] - == expanded_df["is_solid_brick_ending"] - ) - & ( - expanded_df["is_timber_frame"] - == expanded_df["is_timber_frame_ending"] - ) - & ( - expanded_df["is_granite_or_whinstone"] - == expanded_df["is_granite_or_whinstone_ending"] - ) - & (expanded_df["is_cob"] == expanded_df["is_cob_ending"]) - & ( - expanded_df["is_sandstone_or_limestone"] - == expanded_df["is_sandstone_or_limestone_ending"] + starting_and_finishing_null | ( + (expanded_df["is_cavity_wall"] == expanded_df["is_cavity_wall_ending"]) + & (expanded_df["is_solid_brick"] == expanded_df["is_solid_brick_ending"]) + & (expanded_df["is_timber_frame"] == expanded_df["is_timber_frame_ending"]) + & (expanded_df["is_granite_or_whinstone"] == expanded_df["is_granite_or_whinstone_ending"]) + & (expanded_df["is_cob"] == expanded_df["is_cob_ending"]) + & (expanded_df["is_sandstone_or_limestone"] == expanded_df["is_sandstone_or_limestone_ending"]) ) ] elif component == "floor": expanded_df = expanded_df[ - (expanded_df["is_suspended"] == expanded_df["is_suspended_ending"]) - & (expanded_df["is_solid"] == expanded_df["is_solid_ending"]) - & ( - expanded_df["another_property_below"] - == expanded_df["another_property_below_ending"] - ) - & ( - expanded_df["is_to_unheated_space"] - == expanded_df["is_to_unheated_space_ending"] - ) - & ( - expanded_df["is_to_external_air"] - == expanded_df["is_to_external_air_ending"] + starting_and_finishing_null | ( + (expanded_df["is_suspended"] == expanded_df["is_suspended_ending"]) + & (expanded_df["is_solid"] == expanded_df["is_solid_ending"]) + & (expanded_df["another_property_below"] == expanded_df["another_property_below_ending"]) + & (expanded_df["is_to_unheated_space"] == expanded_df["is_to_unheated_space_ending"]) + & (expanded_df["is_to_external_air"] == expanded_df["is_to_external_air_ending"]) ) ] elif component == "roof": expanded_df = expanded_df[ - (expanded_df["is_pitched"] == expanded_df["is_pitched_ending"]) - & (expanded_df["is_roof_room"] == expanded_df["is_roof_room_ending"]) - & (expanded_df["is_loft"] == expanded_df["is_loft_ending"]) - & (expanded_df["is_flat"] == expanded_df["is_flat_ending"]) - & (expanded_df["is_thatched"] == expanded_df["is_thatched_ending"]) - & (expanded_df["is_at_rafters"] == expanded_df["is_at_rafters_ending"]) - & ( - expanded_df["has_dwelling_above"] - == expanded_df["has_dwelling_above_ending"] + starting_and_finishing_null | ( + (expanded_df["is_pitched"] == expanded_df["is_pitched_ending"]) + & (expanded_df["is_roof_room"] == expanded_df["is_roof_room_ending"]) + & (expanded_df["is_loft"] == expanded_df["is_loft_ending"]) + & (expanded_df["is_flat"] == expanded_df["is_flat_ending"]) + & (expanded_df["is_thatched"] == expanded_df["is_thatched_ending"]) + & (expanded_df["is_at_rafters"] == expanded_df["is_at_rafters_ending"]) + & (expanded_df["has_dwelling_above"] == expanded_df["has_dwelling_above_ending"]) ) ] @@ -677,7 +664,6 @@ class TrainingDataset(BaseDataset): } for component in components_to_expand: - # TODO: change cleaned dataframe to have underscores instead of dashes if component == "main-fuel": cleaned_key = "main-fuel" left_on_starting = "main_fuel_starting" diff --git a/recommendations/WallRecommendations.py b/recommendations/WallRecommendations.py index 53a74534..c54c00d9 100644 --- a/recommendations/WallRecommendations.py +++ b/recommendations/WallRecommendations.py @@ -163,9 +163,8 @@ class WallRecommendations(Definitions): if ( (insulation_thickness in ["average", "above average"]) or self.property.walls["is_filled_cavity"] - ) and ( - "cavity_extract_and_refill" - not in measures + or self.property.walls["clean_description"] is None + ) and ("cavity_extract_and_refill" not in measures ): return diff --git a/recommendations/recommendation_utils.py b/recommendations/recommendation_utils.py index adbeecf5..0794013e 100644 --- a/recommendations/recommendation_utils.py +++ b/recommendations/recommendation_utils.py @@ -201,6 +201,11 @@ def get_wall_u_value( ) ) else: + + # Handle rare edge case + if clean_description == "": + return 0 + mapped_description = epc_wall_description_map[clean_description] mapped_value = wall_uvalues_df[ From 35085d25d3a9ececd651d7df48b836dc68e1c258 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sun, 14 Dec 2025 23:36:52 +0800 Subject: [PATCH 128/202] relaxing connections and temp connection for lamnda --- backend/app/db/connection.py | 4 +-- backend/app/plan/router.py | 59 ++++++++++++++++++++++-------------- 2 files changed, 38 insertions(+), 25 deletions(-) diff --git a/backend/app/db/connection.py b/backend/app/db/connection.py index bff63ae1..a0bbe238 100644 --- a/backend/app/db/connection.py +++ b/backend/app/db/connection.py @@ -18,8 +18,8 @@ db_string = connection_string.format( # each lambda doesn't hog all connections db_engine = create_engine( db_string, - pool_size=1, - max_overflow=0, # Limit the number of extra connections. With this and pool size, we allow 1 connection per lambda + pool_size=3, + max_overflow=5, # Limit the number of extra connections. With this and pool size, we allow 1 connection per lambda pool_pre_ping=True, pool_recycle=300, # Forces SQLAlchemy to close and reopen any connection older than 300 seconds ) diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index e6e6052f..5de6b74e 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -2,7 +2,8 @@ import boto3 import json import math import asyncio -import random +from contextlib import contextmanager +from sqlmodel import Session from datetime import datetime @@ -29,6 +30,19 @@ router = APIRouter( sqs_client = boto3.client("sqs") +@contextmanager +def db_session(): + session = Session(db_engine) + try: + yield session + session.commit() + except Exception: + session.rollback() + raise + finally: + session.close() + + @router.post("/trigger", status_code=202) async def trigger_plan_entrypoint(body: PlanTriggerRequest): """ @@ -57,28 +71,27 @@ async def trigger_plan_entrypoint(body: PlanTriggerRequest): scenario_id = data.get("scenario_id") if not scenario_id: created_at = datetime.now().isoformat() - session = sessionmaker(bind=db_engine)() - - # Create a new scenario - new_scenario = create_scenario( - session=session, - scenario={ - "name": body.scenario_name, - "created_at": created_at, - "budget": body.budget, - "portfolio_id": body.portfolio_id, - "housing_type": body.housing_type, - "goal": body.goal, - "goal_value": body.goal_value, - "trigger_file_path": body.trigger_file_path, - "already_installed_file_path": body.already_installed_file_path, - "patches_file_path": body.patches_file_path, - "non_invasive_recommendations_file_path": body.non_invasive_recommendations_file_path, - "exclusions": body.exclusions, - "multi_plan": body.multi_plan - } - ) - scenario_id = new_scenario.id + with db_session() as session: + # Create a new scenario + new_scenario = create_scenario( + session=session, + scenario={ + "name": body.scenario_name, + "created_at": created_at, + "budget": body.budget, + "portfolio_id": body.portfolio_id, + "housing_type": body.housing_type, + "goal": body.goal, + "goal_value": body.goal_value, + "trigger_file_path": body.trigger_file_path, + "already_installed_file_path": body.already_installed_file_path, + "patches_file_path": body.patches_file_path, + "non_invasive_recommendations_file_path": body.non_invasive_recommendations_file_path, + "exclusions": body.exclusions, + "multi_plan": body.multi_plan + } + ) + scenario_id = new_scenario.id # Insert the scenario ID into the data payload data["scenario_id"] = scenario_id From 14c292a3ec2bc9fd19d39505ca510f2f7e773db5 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 15 Dec 2025 09:30:01 +0800 Subject: [PATCH 129/202] handling coal community heating fuel --- backend/Property.py | 5 ++--- backend/engine/engine.py | 1 - .../d_restart_failed_subtasks.py | 9 +++++++++ 3 files changed, 11 insertions(+), 4 deletions(-) create mode 100644 etl/customers/peabody/Nov 2025 Consulting Project/d_restart_failed_subtasks.py diff --git a/backend/Property.py b/backend/Property.py index 8f9637a9..f8013fb5 100644 --- a/backend/Property.py +++ b/backend/Property.py @@ -22,7 +22,6 @@ from recommendations.recommendation_utils import ( ) from backend.ml_models.AnnualBillSavings import AnnualBillSavings from backend.app.utils import sap_to_epc -from backend.Funding import Funding import backend.app.assumptions as assumptions ENVIRONMENT = os.environ.get("ENVIRONMENT", "dev") @@ -340,7 +339,6 @@ class Property: recommendation_record=recommendation_record, recommendations=property_representative_recommendations, primary_recommendation_id=self.id, - non_invasive_recommendations=self.non_invasive_recommendations, ) return scoring_dict @@ -1243,7 +1241,8 @@ class Property: "heat network": "Natural Gas (Community Scheme)", "lpg": 'LPG', "biodiesel": "Smokeless Fuel", - "b30d": "B30K Biofuel" + "b30d": "B30K Biofuel", + "coal": "Coal", } self.heating_energy_source = list({ diff --git a/backend/engine/engine.py b/backend/engine/engine.py index 43b18d1b..0c6ed1de 100644 --- a/backend/engine/engine.py +++ b/backend/engine/engine.py @@ -15,7 +15,6 @@ from sqlmodel import Session from etl.epc.Record import EPCRecord from sqlalchemy.exc import IntegrityError, OperationalError -from sqlalchemy.orm import sessionmaker from starlette.responses import Response from backend.app.BatterySapScorer import BatterySAPScorer diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/d_restart_failed_subtasks.py b/etl/customers/peabody/Nov 2025 Consulting Project/d_restart_failed_subtasks.py new file mode 100644 index 00000000..79a8bdc2 --- /dev/null +++ b/etl/customers/peabody/Nov 2025 Consulting Project/d_restart_failed_subtasks.py @@ -0,0 +1,9 @@ +""" +For the Peabody project, there were a number of subtasks that failed due to issues, with the most +prominent being errors with the property address and ID data. + +This script will fetch those failed subtasks, get the associated properties and delete the properties +from the database so that the subtasks can be restarted cleanly. + +Additionally, we wil find the problematic records and remove them +""" From 1ae3aa5d6b9af3954bc4d8f9bff762295ed4c468 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 15 Dec 2025 11:06:00 +0800 Subject: [PATCH 130/202] upgrade storage type of rds to gp3 --- .../d_restart_failed_subtasks.py | 3 +++ etl/epc/Dataset.py | 8 ++++++-- infrastructure/terraform/main.tf | 4 ++++ recommendations/Recommendations.py | 7 +++++++ 4 files changed, 20 insertions(+), 2 deletions(-) diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/d_restart_failed_subtasks.py b/etl/customers/peabody/Nov 2025 Consulting Project/d_restart_failed_subtasks.py index 79a8bdc2..5ef901b2 100644 --- a/etl/customers/peabody/Nov 2025 Consulting Project/d_restart_failed_subtasks.py +++ b/etl/customers/peabody/Nov 2025 Consulting Project/d_restart_failed_subtasks.py @@ -6,4 +6,7 @@ This script will fetch those failed subtasks, get the associated properties and from the database so that the subtasks can be restarted cleanly. Additionally, we wil find the problematic records and remove them + +Given we ran an EPC C scenario, we should check how many properties, below EPC C we have, that have no plan +or recommendations in case something went wrong """ diff --git a/etl/epc/Dataset.py b/etl/epc/Dataset.py index 01391874..7c27de51 100644 --- a/etl/epc/Dataset.py +++ b/etl/epc/Dataset.py @@ -697,8 +697,12 @@ class TrainingDataset(BaseDataset): for x in missing_descriptions: desc_cleaner = cleaner(x) cleaned = desc_cleaner.process() - # IF NODATA, REMAP TO NONE VALUES - if all((pd.DataFrame(cleaned, index=[0]).T)[0] == False): + # IF NODATA, REMAP TO NONE VALUES, apart from walls which we want to keep as is + # If we convert the walls data to None, we end up converting booleans to None which + # causes issues downstream + if all( + (pd.DataFrame(cleaned, index=[0]).T)[0] == False + ) and component != "walls": cleaned = {key: None for key in cleaned.keys()} cleaned_data.append( { diff --git a/infrastructure/terraform/main.tf b/infrastructure/terraform/main.tf index f345c985..a26a1af4 100644 --- a/infrastructure/terraform/main.tf +++ b/infrastructure/terraform/main.tf @@ -85,6 +85,10 @@ resource "aws_db_instance" "default" { ca_cert_identifier = "rds-ca-rsa2048-g1" # Temporary to enfore immediate change apply_immediately = true + # Set up storage type to gp3 for better performance + storage_type = "gp3" + iops = 3000 + storage_throughput = 125 } # Set up the bucket that recieve the csv uploads of epc to be retrofit diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py index 5d97a554..f984acc3 100644 --- a/recommendations/Recommendations.py +++ b/recommendations/Recommendations.py @@ -698,6 +698,13 @@ class Recommendations: "heating_cop": 0.85, "hotwater_cop": 0.85 } + if main_fuel_description in ['coal (community)']: + return { + "heating_fuel_type": "Coal", + "hotwater_fuel_type": "Coal", + "heating_cop": 0.85, + "hotwater_cop": 0.85 + } logger.warning( "Unhandled community fuel." f"Fuel: {main_fuel_description}" From e4436b45986311c266e26be75e8ff0d8864ccb22 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 15 Dec 2025 11:21:18 +0800 Subject: [PATCH 131/202] remove invalid rds params --- infrastructure/terraform/main.tf | 2 -- 1 file changed, 2 deletions(-) diff --git a/infrastructure/terraform/main.tf b/infrastructure/terraform/main.tf index a26a1af4..c3a585f7 100644 --- a/infrastructure/terraform/main.tf +++ b/infrastructure/terraform/main.tf @@ -87,8 +87,6 @@ resource "aws_db_instance" "default" { apply_immediately = true # Set up storage type to gp3 for better performance storage_type = "gp3" - iops = 3000 - storage_throughput = 125 } # Set up the bucket that recieve the csv uploads of epc to be retrofit From d7b980309067b75bdea7875db9608bc0cb594eb2 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 19 Dec 2025 08:41:59 +0800 Subject: [PATCH 132/202] refactoring database fetches --- asset_list/AssetList.py | 10 +- asset_list/app.py | 33 +++++ asset_list/mappings/built_form.py | 56 ++++++++- asset_list/mappings/property_type.py | 67 +++++++++- backend/addresses/Address.py | 65 ++++++++++ backend/addresses/Addresses.py | 84 +++++++++++++ backend/app/db/functions/address_functions.py | 99 ++++++++++----- .../functions/energy_assessment_functions.py | 52 +++++--- backend/app/db/functions/epc_functions.py | 76 ++++++++++- .../app/db/functions/property_functions.py | 84 ++++++++++++- backend/engine/engine.py | 118 +++++++++++------- 11 files changed, 646 insertions(+), 98 deletions(-) create mode 100644 backend/addresses/Address.py create mode 100644 backend/addresses/Addresses.py diff --git a/asset_list/AssetList.py b/asset_list/AssetList.py index da20432b..940c723a 100644 --- a/asset_list/AssetList.py +++ b/asset_list/AssetList.py @@ -997,7 +997,15 @@ class AssetList: # Keep a record of duplicates self.duplicated_addresses = self.standardised_asset_list[ self.standardised_asset_list[self.DOMNA_PROPERTY_ID].duplicated() - ][[self.DOMNA_PROPERTY_ID, self.address1_colname, self.postcode_colname]].copy() + ][[self.DOMNA_PROPERTY_ID, self.full_address_colname, self.address1_colname, self.postcode_colname]].copy() + + df = self.standardised_asset_list[ + self.standardised_asset_list[self.DOMNA_PROPERTY_ID].isin( + self.duplicated_addresses[self.DOMNA_PROPERTY_ID]) + ][[self.landlord_property_id, self.DOMNA_PROPERTY_ID, self.full_address_colname, self.address1_colname, + self.postcode_colname]].copy() + + df = df.sort_values(by=[self.DOMNA_PROPERTY_ID]) self.standardised_asset_list = self.standardised_asset_list[ ~self.standardised_asset_list[self.DOMNA_PROPERTY_ID].duplicated() diff --git a/asset_list/app.py b/asset_list/app.py index 3d8a0fae..c58eccd7 100644 --- a/asset_list/app.py +++ b/asset_list/app.py @@ -59,6 +59,39 @@ def app(): Property UPRN """ + data_folder = ("/Users/khalimconn-kowlessar/Documents/hestia/Warmfront/SCIS") + data_filename = "SCIS_Historic_Deemed_Combined_Workings.xlsx" + sheet_name = "SCIS" + postcode_column = 'POSTCODE' + address1_column = "NO" + address1_method = None + fulladdress_column = None + address_cols_to_concat = ["NO", "Street / Block Name", "Town/Area"] + missing_postcodes_method = None + landlord_year_built = None + landlord_os_uprn = None + landlord_property_type = "PROPERTY TYPE As per table emailed" + landlord_built_form = "PROPERTY TYPE As per table emailed" + landlord_wall_construction = None + landlord_roof_construction = None + landlord_heating_system = None + landlord_existing_pv = None + landlord_property_id = "Row ID" + landlord_sap = None + outcomes_filename = None + outcomes_sheetname = None + outcomes_postcode = None + outcomes_houseno = None + outcomes_id = None + outcomes_address = None + master_filepaths = [] + master_id_colnames = [] + master_to_asset_list_filepath = None + phase = False + ecosurv_landlords = None + asset_list_header = 0 + landlord_block_reference = None + # Peabody data for cleaning data_folder = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting " "Project/data_validation") diff --git a/asset_list/mappings/built_form.py b/asset_list/mappings/built_form.py index 58686d6b..a9defdef 100644 --- a/asset_list/mappings/built_form.py +++ b/asset_list/mappings/built_form.py @@ -464,6 +464,60 @@ BUILT_FORM_MAPPINGS = { 'EnclosedEndTerrace': 'enclosed end-terrace', 'EndTerrace': 'end-terrace', 'SemiDetached': 'semi-detached', - 'MidTerrace': 'mid-terrace' + 'MidTerrace': 'mid-terrace', + + '1st FLOOR FLAT': 'mid-floor', + 'END TERRACE HOUSE': 'end-terrace', + 'BUNGALOW-END TERRACE': 'end-terrace', + 'BUNGALOW END TERRACE': 'end-terrace', + 'END-TERRACE': 'end-terrace', + 'SEMI DETACHED': 'semi-detached', + 'Mid flat Ground Floor': 'ground floor', + 'MID TERRACED': 'mid-terrace', + 'Mid Terrace bungalow': 'mid-terrace', + 'BUNGLAOW SEMI DETACHED': 'detached', + 'Bungalow ENd Terrace': 'end-terrace', + 'Bungalow Semi detached': 'detached', + 'BUNGALOW - SEMI DETACHED': 'detached', + 'Bungalow mid terrace': 'mid-terrace', + 'BUNGALOW - MID TERRACED': 'mid-terrace', + 'BUNGALOW - MID TERRACE': 'mid-terrace', + 'Bungalow end terrace': 'end-terrace', + 'BUNGALOW SEMI-DETACHED': 'detached', + 'MID TERR': 'mid-terrace', + 'Bungalow - mid terrace': 'mid-terrace', + 'MID-TERRACE': 'mid-terrace', + 'Bunagalow Semi Detached': 'semi-detached', + 'SEMI DETACHED BUNGALOW': 'semi-detached', + 'MID TERRACE HOUSE': 'mid-terrace', + 'END - TERRACE': 'end-terrace', + 'BUNGALOW-SEMI DETACHED': 'semi-detached', + 'Semi-Detached': 'semi-detached', + 'End-Terrace house': 'end-terrace', + 'BUNGALOW MID TERRACE': 'mid-terrace', + 'SEMI DETACHED HOUSE': 'semi-detached', + 'BUNGALOW SEMI DETACHED': 'detached', + 'MID - TERRACE': 'mid-terrace', + '3 EXT WALL FLAT': 'end-terrace', + '3 Ext wall flat': 'end-terrace', + '3 EX WALL FLAT': 'end-terrace', + '2 ext wall flats': 'mid-terrace', + '2 EXT WALLS': 'mid-terrace', + '3.EXT.WALL FLAT': 'end-terrace', + 'FLAT 3 WALLS': 'end-terrace', + '2 Ext Wall flat': 'mid-terrace', + 'DETATCHED HOUSE': 'detached', + '3 EXT. WALL FLAT': 'end-terrace', + '3 ext wall flat': 'end-terrace', + '3 EXT WALLS': 'end-terrace', + '3 EXT WALL - NOW 2 EXT': 'unknown', + '3 EXT-WALL FLAT': 'end-terrace', + 'FLAT 2 WALLS': 'mid-terrace', + '3 EX WALL MAISONETTE': 'end-terrace', + '3 Ext Wall Flat': 'end-terrace', + 'Semi Bungalow': 'semi-detached', + '2 EXT WALL FLAT': 'mid-terrace', + '2.EXT.WALL FLAT': 'mid-terrace', + '2 EXT. WALL FLAT': 'mid-terrace', } diff --git a/asset_list/mappings/property_type.py b/asset_list/mappings/property_type.py index 1c236d96..1f251598 100644 --- a/asset_list/mappings/property_type.py +++ b/asset_list/mappings/property_type.py @@ -362,6 +362,71 @@ PROPERTY_MAPPING = { 'Maisonette: Semi Detached: Mid Floor': 'maisonette', 'Maisonette: Detached: Mid Floor': 'maisonette', - 'House: EnclosedMidTerrace': 'house' + 'House: EnclosedMidTerrace': 'house', + + '3 EXT WALL FLAT': 'flat', + '1st FLOOR FLAT': 'flat', + '3 Ext wall flat': 'flat', + '3 EX WALL FLAT': 'flat', + 'END TERRACE HOUSE': 'house', + 'BUNGALOW-END TERRACE': 'bungalow', + 'BUNGALOW END TERRACE': 'bungalow', + '2 ext wall flats': 'flat', + 'Mid flat Ground Floor': 'flat', + '3.EXT.WALL FLAT': 'flat', + 'FLAT 3 WALLS': 'flat', + 'Mid Terrace bungalow': 'bungalow', + 'Bungalow ENd Terrace': 'bungalow', + '2 Ext Wall flat': 'flat', + 'DETATCHED HOUSE': 'house', + 'Bungalow Semi detached': 'bungalow', + 'BUNGALOW - SEMI DETACHED': 'bungalow', + 'Bungalow mid terrace': 'bungalow', + 'BUNGALOW - MID TERRACED': 'bungalow', + 'BUNGALOW - MID TERRACE': 'bungalow', + 'Bungalow end terrace': 'bungalow', + '3 EXT. WALL FLAT': 'flat', + '3 ext wall flat': 'flat', + 'BUNGALOW SEMI-DETACHED': 'bungalow', + '3 EXT-WALL FLAT': 'flat', + 'Bungalow - mid terrace': 'bungalow', + 'SEMI DETACHED BUNGALOW': 'bungalow', + 'FLAT 2 WALLS': 'flat', + 'MID TERRACE HOUSE': 'house', + '3 EX WALL MAISONETTE': 'maisonette', + 'BUNGALOW-SEMI DETACHED': 'bungalow', + '3 Ext Wall Flat': 'flat', + 'Semi Bungalow': 'bungalow', + 'End-Terrace house': 'house', + 'BUNGALOW MID TERRACE': 'bungalow', + 'Mid-terrace house': 'house', + 'SEMI DETACHED HOUSE': 'house', + 'Semi-detached house': 'house', + '2 EXT WALL FLAT': 'flat', + '2.EXT.WALL FLAT': 'flat', + 'BUNGALOW SEMI DETACHED': 'bungalow', + '2 EXT. WALL FLAT': 'flat', + 'END-TERRACE': 'unknown', + 'SEMI DETACHED': 'unknown', + '2 EXT WALLS': 'unknown', + 'MID TERRACED': 'unknown', + 'BUNGLAOW SEMI DETACHED': 'bungalow', + 'END TERRACE': 'unknown', + '3 EXT WALLS': 'unknown', + 'Mid Terrace': 'unknown', + '3 EXT WALL - NOW 2 EXT': 'unknown', + 'MID TERR': 'unknown', + 'DETACHED': 'unknown', + 'MID-TERRACE': 'unknown', + 'Bunagalow Semi Detached': 'bungalow', + 'End-terrace': 'unknown', + 'END - TERRACE': 'unknown', + 'SEMI-DETACHED': 'unknown', + 'Semi-Detached': 'unknown', + 'MID TERRACE': 'unknown', + 'End Terrace': 'unknown', + 'Detached': 'unknown', + 'Mid-terrace': 'unknown', + 'MID - TERRACE': 'unknown' } diff --git a/backend/addresses/Address.py b/backend/addresses/Address.py new file mode 100644 index 00000000..d6a00407 --- /dev/null +++ b/backend/addresses/Address.py @@ -0,0 +1,65 @@ +from dataclasses import dataclass +from typing import Optional + + +@dataclass(slots=True) +class Address: + uprn: Optional[int] + landlord_property_id: Optional[str] + address: Optional[str] + full_address: Optional[str] + postcode: str + property_type: Optional[str] + built_form: Optional[str] + estimated: bool + + # Additional address data, associated to a standardised asset list + domna_full_address: Optional[str] + domna_address_1: Optional[str] + landlord_heating_system: Optional[str] = None + + @property + def address1(self): + + if self.domna_address_1 is not None: + address1 = self.domna_address_1 + else: + address1 = self.address + + # Format + address1 = str(int(address1)) if isinstance(address1, float) else str(address1) + return address1 + + @property + def request_data(self) -> dict[str, Optional[str]]: + """ + Canonical request payload for downstream services. + """ + data = { + "uprn": self.uprn, + "landlord_property_id": self.landlord_property_id, + "postcode": self.postcode, + "address1": self.address1, + "full_address": self.full_address, + } + + # Drop nulls + return {k: v for k, v in data.items() if v is not None} + + @property + def heating_system(self): + """ + Helper function to extract a heating system, which can be used to estimate EPC. This is a very limited, + placeholder function to cover some initial immediate cases. + :return: + """ + + ll_heating = self.landlord_property_id + if not ll_heating: + return None + + if ll_heating == "electric storage heaters": + # Return with the same format at the EPC + return "Electric storage heaters" + + return None diff --git a/backend/addresses/Addresses.py b/backend/addresses/Addresses.py new file mode 100644 index 00000000..22822c6b --- /dev/null +++ b/backend/addresses/Addresses.py @@ -0,0 +1,84 @@ +from backend.addresses.Address import Address + + +class Addresses: + def __init__(self, addresses: list[Address]): + self._addresses = addresses + # self._identity_index = self._build_identity_index() + + def __getitem__(self, index: int) -> Address: + return self._addresses[index] + + def __len__(self) -> int: + return len(self._addresses) + + @classmethod + def from_plan_input(cls, plan_input: list[dict], body) -> "Addresses": + addresses = [] + for row in plan_input: + addresses.append(cls._parse_row(row, body)) + return cls(addresses) + + def get_uprns(self): + return [x.uprn for x in self._addresses if x.uprn is not None] + + def get_landlord_ids(self): + return [x.landlord_property_id for x in self._addresses if x.landlord_property_id is not None] + + def get_unique_postcodes(self): + return list({x.postcode for x in self._addresses}) + + def get_postcodes_for_flats(self): + # Method to extract all of the postcodes associated to a flat, which is used for remote assessments + # on flats + return [x.postcode for x in self._addresses if x.property_type in ["Flat", "flat"]] + + def get_property_requests(self): + return [x.request_data for x in self._addresses] + + @staticmethod + def _parse_row(row: dict, body) -> Address: + def clean_uprn(v): + try: + return int(float(v)) + except (TypeError, ValueError): + return None + + uprn = clean_uprn(row.get("uprn")) + + address = row.get("address") + if not address and body.file_format == "domna_asset_list": + address = row.get("domna_address_1") + + full_address = ( + row.get("domna_full_address") + if body.file_format == "domna_asset_list" + else None + ) + if not isinstance(full_address, str): + full_address = None + + postcode = str(row["postcode"]).strip().upper() + + return Address( + uprn=uprn, + landlord_property_id=str(row["landlord_property_id"]) + if row.get("landlord_property_id") else None, + address=str(address).strip() if address else None, + full_address=str(full_address).strip() if full_address else None, + postcode=postcode, + property_type=row.get("property_type"), + built_form=row.get("built_form"), + estimated=bool(row.get("estimated", False)), + domna_full_address=row.get("domna_full_address"), + domna_address_1=row.get("domna_address_1"), + ) + + # def _build_identity_index(self) -> dict: + # index = {} + # for addr in self._addresses: + # key = addr.identity_key() + # if key in index: + # raise ValueError(f"Duplicate address identity detected: {key}") + # index[key] = addr + # return index diff --git a/backend/app/db/functions/address_functions.py b/backend/app/db/functions/address_functions.py index b04f14c9..3074b02a 100644 --- a/backend/app/db/functions/address_functions.py +++ b/backend/app/db/functions/address_functions.py @@ -20,7 +20,7 @@ def _get_associated_records(results, uprn, uprn_key="UPRN"): return matched_record -def get_associated_uprns(session: Session, postcode: str, uprn: str): +def get_associated_uprns(postcode_search: PostcodeSearch, uprn: str): """ Given a postcode and UPRN, for a remote assessment, fetch all associated UPRNs, based on parent UPRN. This will be properties in the same building @@ -28,40 +28,83 @@ def get_associated_uprns(session: Session, postcode: str, uprn: str): Parent UPRN is referenced in the following docs: https://static.geoplace.co.uk/downloads/GeoPlace-Data-Entry-Conventions-Best-Practice-for-Addresses.pdf - :param session: The database session - :param postcode: The postcode string to search for + :param PostcodeSearch postcode_search: The postcode search record :param uprn: The UPRN string to match :return: The matching PostcodeSearch record, or None if not found """ - try: - record = ( - session.query(PostcodeSearch) - .filter(func.upper(PostcodeSearch.postcode) == postcode) - .first() - ) - if not record: - # No record found for this postcode - return [] + if not postcode_search: + return [] - matched_record = _get_associated_records(results=record.result_data["results"], uprn=uprn) + matched_record = _get_associated_records(results=postcode_search.result_data["results"], uprn=uprn) - if len(matched_record) != 1: - logger.error("Something went wrong, about to return nothing") - return [] + if len(matched_record) != 1: + return [] - if not matched_record[0].get("PARENT_UPRN"): - logger.info("No parent UPRN found, cannot get associated records") - return [] + if not matched_record[0].get("PARENT_UPRN"): + logger.info("No parent UPRN found, cannot get associated records") + return [] - associated_records = _get_associated_records( - results=record.result_data["results"], uprn=matched_record[0]["PARENT_UPRN"], uprn_key="PARENT_UPRN" - ) - # We now fetch all UPRNS with the same parent UPRN - associated_uprns = [int(x["UPRN"]) for x in associated_records if x["UPRN"] != str(uprn)] + associated_records = _get_associated_records( + results=postcode_search.result_data["results"], uprn=matched_record[0]["PARENT_UPRN"], uprn_key="PARENT_UPRN" + ) + # We now fetch all UPRNS with the same parent UPRN + associated_uprns = [int(x["UPRN"]) for x in associated_records if x["UPRN"] != str(uprn)] - return associated_uprns + return associated_uprns - except SQLAlchemyError as e: - session.rollback() - raise e + +def get_by_postcodes(session: Session, postcodes: list[str]) -> dict[str, PostcodeSearch]: + """ + Given a list of postcodes, retrieves postcode data from the database form the PostcodeSearch table + :param session: + :param postcodes: + :return: + """ + if not postcodes: + return {} + + normalised = {p.upper() for p in postcodes if p} + + records = ( + session.query(PostcodeSearch) + .filter(func.upper(PostcodeSearch.postcode).in_(normalised)) + .all() + ) + + return {r.postcode.upper(): r for r in records} + + +def get_associated_uprns_from_record(record: PostcodeSearch, uprn: str) -> list[int]: + """ + Given the postcode sra + :param record: + :param uprn: + :return: + """ + if not record: + return [] + + matched_record = _get_associated_records( + results=record.result_data["results"], + uprn=uprn + ) + + if len(matched_record) != 1: + return [] + + parent_uprn = matched_record[0].get("PARENT_UPRN") + if not parent_uprn: + return [] + + associated_records = _get_associated_records( + results=record.result_data["results"], + uprn=parent_uprn, + uprn_key="PARENT_UPRN" + ) + + return [ + int(x["UPRN"]) + for x in associated_records + if x["UPRN"] != str(uprn) + ] diff --git a/backend/app/db/functions/energy_assessment_functions.py b/backend/app/db/functions/energy_assessment_functions.py index bbdaaac7..c9e40b3f 100644 --- a/backend/app/db/functions/energy_assessment_functions.py +++ b/backend/app/db/functions/energy_assessment_functions.py @@ -1,3 +1,4 @@ +from typing import Iterable from backend.app.db.models.energy_assessments import ( EnergyAssessment, EnergyAssessmentScenarios, EnergyAssessmentDocuments, DocumentTypeEnum ) @@ -63,27 +64,48 @@ def bulk_insert_energy_assessments(session: Session, data_list: List[dict]) -> D return uprn_to_assessment_id -def get_latest_assessment_by_uprn(session: Session, uprn: int) -> Optional[EnergyAssessment]: +def get_latest_assessments_for_uprns( + session: Session, + uprns: Iterable[int], +) -> dict[int, dict]: """ - Retrieve the latest energy assessment for a given UPRN based on the inspection date. + Fetch the latest energy assessment per UPRN in a single query. - :param session: The database session - :param uprn: The unique property reference number - :return: The latest EnergyAssessment object or None if not found + Returns a dict: + uprn -> assessment_dict | empty_response """ - if not uprn: - return EnergyAssessment.empty_response() + uprns = [u for u in uprns if u] + if not uprns: + return {} - try: - # Query the EnergyAssessment model, filter by uprn, order by inspection_date in descending order - latest_assessment = session.query(EnergyAssessment).filter_by(uprn=uprn).order_by( - desc(EnergyAssessment.inspection_date)).first() + # DISTINCT ON requires matching ORDER BY + records = ( + session.query(EnergyAssessment) + .filter(EnergyAssessment.uprn.in_(uprns)) + .order_by( + EnergyAssessment.uprn, + desc(EnergyAssessment.inspection_date), + ) + .distinct(EnergyAssessment.uprn) + .all() + ) - return latest_assessment.to_dict() if latest_assessment else EnergyAssessment.empty_response() - except Exception as e: - logger.info(f"An error occurred: {e}") - return None + result: dict[int, dict] = {} + + for record in records: + result[record.uprn] = record.to_dict() + + # Fill missing uprns with empty response + uprn_set = set(uprns) + found_set = set(result.keys()) + + missing_uprns = uprn_set - found_set + + for uprn in missing_uprns: + result[uprn] = EnergyAssessment.empty_response() + + return result def create_scenarios_for_documents(session: Session, document_list: List[dict], uprn_to_assessment_id: dict): diff --git a/backend/app/db/functions/epc_functions.py b/backend/app/db/functions/epc_functions.py index 4b675f1f..27fb9da4 100644 --- a/backend/app/db/functions/epc_functions.py +++ b/backend/app/db/functions/epc_functions.py @@ -1,3 +1,4 @@ +from typing import List from datetime import datetime, timedelta, timezone from sqlalchemy.orm import Session from sqlalchemy.exc import SQLAlchemyError @@ -50,6 +51,77 @@ class EpcStoreService: "epc_page_created_at": record.epc_page_created_at, } + @classmethod + def get_epcs_for_uprns(cls, session: Session, uprns: List[int]) -> dict[int, dict]: + """ + Given a list of uprns, return a dict mapping each uprn to its EPC data status and content. + :param session: + :param uprns: + :return: + """ + if not uprns: + return {} + + cutoff = datetime.now(timezone.utc) - timedelta(days=cls.FRESHNESS_DAYS) + + records = ( + session.query(EpcStore) + .filter(EpcStore.uprn.in_(uprns)) + .all() + ) + + result: dict[int, dict] = {} + + for record in records: + if not record.epc_api_created_at: + result[record.uprn] = { + "status": cls.MISSING, + "epc_api": None, + "epc_page": None, + "epc_page_rrn": None, + "epc_api_created_at": None, + "epc_page_created_at": None, + } + continue + + if record.epc_api_created_at.date() < cutoff.date(): + # We only expose epc_page when epc_api is fresh. + result[record.uprn] = { + "status": cls.EXPIRED, + "epc_api": None, + "epc_page": None, + "epc_page_rrn": None, + "epc_api_created_at": None, + "epc_page_created_at": None, + } + continue + + result[record.uprn] = { + "status": cls.FRESH, + "epc_api": record.epc_api, + "epc_page": record.epc_page, + "epc_page_rrn": record.epc_page_rrn, + "epc_api_created_at": record.epc_api_created_at, + "epc_page_created_at": record.epc_page_created_at, + } + + # For the uprns not found in records, mark them as missing + requested = set(uprns) + found = set(result.keys()) + + missing = requested - found + for uprn in missing: + result[uprn] = { + "status": cls.MISSING, + "epc_api": None, + "epc_page": None, + "epc_page_rrn": None, + "epc_api_created_at": None, + "epc_page_created_at": None, + } + + return result + @classmethod def check_insert_needed(cls, epc_cache, epc_estimated, uprn): """ @@ -115,11 +187,7 @@ class EpcStoreService: ) session.add(record) - session.flush() - session.commit() - return record except SQLAlchemyError as e: - session.rollback() raise e diff --git a/backend/app/db/functions/property_functions.py b/backend/app/db/functions/property_functions.py index fc49d205..32cd9a25 100644 --- a/backend/app/db/functions/property_functions.py +++ b/backend/app/db/functions/property_functions.py @@ -1,14 +1,19 @@ ### # This script contains methods for interacting with the property table in the database ### +from typing import List import datetime import pytz +from sqlalchemy import select, or_ from sqlalchemy.orm import Session +from sqlalchemy.orm.exc import NoResultFound +from sqlalchemy.dialects.postgresql import insert + +from backend.addresses.Address import Address from backend.app.db.models.portfolio import ( PropertyModel, PropertyCreationStatus, PortfolioStatus, PropertyTargetsModel, PropertyDetailsEpcModel, PropertyDetailsSpatial ) -from sqlalchemy.orm.exc import NoResultFound def create_property(session: Session, portfolio_id: int, address: str, postcode: str, uprn: str, @@ -203,3 +208,80 @@ def update_or_create_property_spatial_details(session: Session, uprn: int, prope session.flush() return True + + +def get_existing_properties(session, portfolio_id, uprns, landlord_ids): + """ + Bulk method for checking for existing properties + :param session: + :param portfolio_id: + :param uprns: + :param landlord_ids: + :return: + """ + return ( + session.exec( + select(PropertyModel) + .where(PropertyModel.portfolio_id == portfolio_id) + .where( + or_( + PropertyModel.uprn.in_(uprns), + PropertyModel.landlord_property_id.in_(landlord_ids), + ) + ) + ) + .scalars() + .all() + ) + + +def bulk_create_properties( + session, + body, + addresses: list[Address], # these are *new* addresses + energy_assessment_by_uprn: dict[int, dict], +): + rows = [] + + for addr in addresses: + energy_assessment = energy_assessment_by_uprn.get(addr.uprn, {}) + status = ( + PortfolioStatus.ASSESSMENT.value + if not energy_assessment.get("epc") + else PortfolioStatus.SURVEY.value + ) + + rows.append( + { + "address": addr.address1, + "postcode": addr.postcode, + "portfolio_id": body.portfolio_id, + "uprn": addr.uprn, + "landlord_property_id": addr.landlord_property_id, + "creation_status": PropertyCreationStatus.LOADING, + "status": status, + "has_pre_condition_report": False, + "has_recommendations": False, + } + ) + + if not rows: + return [] + + stmt = ( + insert(PropertyModel) + .values(rows) + .on_conflict_do_nothing( + index_elements=["portfolio_id", "uprn"] + ) + .returning( + PropertyModel.id, + PropertyModel.uprn, + PropertyModel.landlord_property_id, + ) + ) + + result = session.execute(stmt) + session.flush() + + return result.fetchall() diff --git a/backend/engine/engine.py b/backend/engine/engine.py index 0c6ed1de..46490289 100644 --- a/backend/engine/engine.py +++ b/backend/engine/engine.py @@ -34,6 +34,7 @@ import backend.app.assumptions as assumptions from backend.ml_models.api import ModelApi from backend.Property import Property from backend.apis.GoogleSolarApi import GoogleSolarApi +from backend.addresses.Addresses import Addresses from recommendations.optimiser.CostOptimiser import CostOptimiser from recommendations.optimiser.GainOptimiser import GainOptimiser @@ -368,24 +369,6 @@ def get_funding_data(): return project_scores_matrix, partial_project_scores_matrix, whlg_eligible_postcodes -def parse_heating_system(config): - """ - Helper function to extract a heating system, which can be used to estimate EPC. This is a very limited, - placeholder function to cover some initial immediate cases. - :return: - """ - - ll_heating = config.get("landlord_heating_system", None) - if not ll_heating: - return None - - if ll_heating == "electric storage heaters": - # Return with the same format at the EPC - return "Electric storage heaters" - - return None - - def check_duplicate_uprns(plan_input): """ Simple function to check if the input data contains duplicated UPRNS. @@ -682,38 +665,81 @@ async def model_engine(body: PlanTriggerRequest): bucket_name=get_settings().DATA_BUCKET, file_key="sap_change_model/cleaning_dataset.parquet", ) - input_properties, inspections_map, eco_packages = [], {}, {} - for config in tqdm(plan_input): + # Prepare input data + addresses = Addresses.from_plan_input(plan_input, body) - uprn, address1, full_address = extract_address_data(config, body) - heating_system = parse_heating_system(config) + uprns = addresses.get_uprns() + landlord_ids = addresses.get_landlord_ids() + postcodes = addresses.get_postcodes_for_flats() - # ---------- 1) fetch data ---------- - epc_api_data, epc_page, rrn, epc_cache = None, None, None, {} - with db_read_session() as session: - epc_cache = {} - if uprn: - epc_cache = db_funcs.epc_functions.EpcStoreService.get_epc_for_uprn(session, uprn) + with db_read_session() as session: + existing_properties = db_funcs.property_functions.get_existing_properties( + session, body.portfolio_id, uprns, landlord_ids + ) + property_lookup = {} + for prop in existing_properties: + if prop.uprn: + property_lookup[("uprn", prop.uprn)] = prop.id + if prop.landlord_property_id: + property_lookup[("landlord_property_id", prop.landlord_property_id)] = prop.id - # For remote assessments of flats, we get associated UPRNs - associated_uprns = [] - if body.event_type == "remote_assessment" and config.get("property_type") == "Flat": - associated_uprns = db_funcs.address_functions.get_associated_uprns( - session, postcode=config["postcode"], uprn=uprn - ) + # List of properties that need to be created in the db + to_create = [] + for addr in addresses: + key = ("uprn", addr.uprn) if addr.uprn else ("landlord_property_id", addr.landlord_property_id) + if key not in property_lookup: + to_create.append(addr) - # We check for an energy assessment we have performed on this property: - energy_assessment = db_funcs.energy_assessment_functions.get_latest_assessment_by_uprn( - session, uprn + # Pre-requests to the db + with db_read_session() as session: + epc_cache_by_uprn = db_funcs.epc_functions.EpcStoreService.get_epcs_for_uprns(session, uprns) + postcode_searches = db_funcs.address_functions.get_by_postcodes(session, list(postcodes)) + energy_assessments_by_uprn = db_funcs.energy_assessment_functions.get_latest_assessments_for_uprns( + session, uprns + ) + + # If we have properties that need to be created, we cerate them in bulk + if to_create: + with db_session() as session: + inserted = db_funcs.property_functions.bulk_create_properties( + session, body, to_create, energy_assessments_by_uprn ) + for prop_id, uprn, landlord_property_id in inserted: + if uprn is not None: + property_lookup[("uprn", uprn)] = prop_id + if landlord_property_id: + property_lookup[("landlord_property_id", landlord_property_id)] = prop_id + + # We append the newly created properties to property_lookup + + input_properties, inspections_map, eco_packages = [], {}, {} + for addr in tqdm(addresses): + + # Identity data + uprn = addr.uprn + address1 = addr.address1 + postcode = addr.postcode + full_address = addr.full_address + heating_system = addr.heating_system + + # ---------- 1) filter fetched data ---------- + epc_cache = epc_cache_by_uprn[uprn] + epc_api_data, epc_page, rrn, = epc_cache["epc_api"], epc_cache["epc_page"], epc_cache["epc_page_rrn"] # Extract from EPC cache if epc_cache.get("status") == db_funcs.epc_functions.EpcStoreService.FRESH: epc_api_data, epc_page, rrn = epc_cache["epc_api"], epc_cache["epc_page"], epc_cache["epc_page_rrn"] + # Extract associated UPRNs from the database response + associated_uprns = db_funcs.address_functions.get_associated_uprns( + postcode_searches.get(postcode.upper()), uprn=uprn + ) + + energy_assessment = energy_assessments_by_uprn.get(uprn) + epc_searcher = SearchEpc( address1=address1, - postcode=config["postcode"], + postcode=postcode, uprn=uprn, auth_token=get_settings().EPC_AUTH_TOKEN, os_api_key="", @@ -721,8 +747,8 @@ async def model_engine(body: PlanTriggerRequest): heating_system=heating_system, associated_uprns=associated_uprns ) - epc_searcher.ordnance_survey_client.built_form = config.get("built_form", None) - epc_searcher.ordnance_survey_client.property_type = config.get("property_type", None) + epc_searcher.ordnance_survey_client.built_form = addr.built_form + epc_searcher.ordnance_survey_client.property_type = addr.property_type # For the moment, our OS API access is unavailable, so we skip and interpolate epc_searcher.find_property(skip_os=True, api_data=epc_api_data, overwrite_sap05=True) @@ -731,14 +757,15 @@ async def model_engine(body: PlanTriggerRequest): # ---------- 2) ensure property exists ---------- with db_session() as session: property_id, is_new = db_funcs.property_functions.ensure_property_exists( - session, body, epc_searcher, energy_assessment, - landlord_property_id=config.get("landlord_property_id") + session, body, epc_searcher, energy_assessment, landlord_property_id=addr.landlord_property_id ) if not property_id or (not is_new and not body.multi_plan): continue if is_new: + # TODO: We can probably make these queries in bulk at the front end and use a placeholder + # property ID, and then inject this information afterwards with db_session() as session: db_funcs.property_functions.create_property_targets( session, @@ -783,11 +810,7 @@ async def model_engine(body: PlanTriggerRequest): epc_records = patch_epc(patch, epc_records) - prepared_epc = EPCRecord( - epc_records=epc_records, - run_mode="newdata", - cleaning_data=cleaning_data, - ) + prepared_epc = EPCRecord(epc_records=epc_records, run_mode="newdata", cleaning_data=cleaning_data) # TODO: This is a temp function to handle a specific edge case with Peabody. We should # factor this into EPCRecord as part of the cleaning however we need some more testing @@ -822,6 +845,7 @@ async def model_engine(body: PlanTriggerRequest): # 2) A real EPC # 3) A UPRN (meaning that a UPRN could be fetched against that property) # We store this data + # TODO: Upload in bulk with db_session() as session: if db_funcs.epc_functions.EpcStoreService.check_insert_needed( epc_cache, epc_searcher.newest_epc.get("estimated"), epc_searcher.uprn From 9c34e202bc0797b23881e2177aa1f2d7ee982ebd Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 19 Dec 2025 09:43:13 +0800 Subject: [PATCH 133/202] refactoring db requests to run in bulk --- backend/addresses/Address.py | 2 + backend/app/db/functions/address_functions.py | 6 +- .../app/db/functions/inspections_functions.py | 2 - .../db/functions/recommendations_functions.py | 34 ++++----- backend/app/plan/utils.py | 25 ++++--- backend/engine/engine.py | 72 +++++++++---------- 6 files changed, 66 insertions(+), 75 deletions(-) diff --git a/backend/addresses/Address.py b/backend/addresses/Address.py index d6a00407..9b95f5e0 100644 --- a/backend/addresses/Address.py +++ b/backend/addresses/Address.py @@ -17,6 +17,8 @@ class Address: domna_full_address: Optional[str] domna_address_1: Optional[str] landlord_heating_system: Optional[str] = None + solar_reason: Optional[str] = None + cavity_reason: Optional[str] = None @property def address1(self): diff --git a/backend/app/db/functions/address_functions.py b/backend/app/db/functions/address_functions.py index 3074b02a..4b8ad5f2 100644 --- a/backend/app/db/functions/address_functions.py +++ b/backend/app/db/functions/address_functions.py @@ -20,7 +20,7 @@ def _get_associated_records(results, uprn, uprn_key="UPRN"): return matched_record -def get_associated_uprns(postcode_search: PostcodeSearch, uprn: str): +def get_associated_uprns(postcode_search: PostcodeSearch, uprn: str | int): """ Given a postcode and UPRN, for a remote assessment, fetch all associated UPRNs, based on parent UPRN. This will be properties in the same building @@ -36,6 +36,10 @@ def get_associated_uprns(postcode_search: PostcodeSearch, uprn: str): if not postcode_search: return [] + if isinstance(uprn, int): + # For this, coerce to string + uprn = str(uprn) + matched_record = _get_associated_records(results=postcode_search.result_data["results"], uprn=uprn) if len(matched_record) != 1: diff --git a/backend/app/db/functions/inspections_functions.py b/backend/app/db/functions/inspections_functions.py index d66154cb..b1c1eeb5 100644 --- a/backend/app/db/functions/inspections_functions.py +++ b/backend/app/db/functions/inspections_functions.py @@ -2,7 +2,6 @@ import re from dataclasses import dataclass, asdict from typing import Optional, Dict, Any, Type, TypeVar from sqlalchemy.orm import Session -from datetime import timezone from enum import Enum from datetime import datetime, timedelta @@ -24,7 +23,6 @@ from backend.app.db.models.inspections import ( InspectionsCladding, InspectionsAccessIssues, ) -from sqlalchemy.dialects.postgresql import insert NON_INTRUSIVE_PREFIX = "non-intrusives:" diff --git a/backend/app/db/functions/recommendations_functions.py b/backend/app/db/functions/recommendations_functions.py index 14596749..b70111da 100644 --- a/backend/app/db/functions/recommendations_functions.py +++ b/backend/app/db/functions/recommendations_functions.py @@ -96,27 +96,23 @@ def create_plan(session: Session, plan): raise e -def create_scenario(session: Session, scenario): - """ - This function will create a record for the scenario in the database if it does not exist. - :param session: The database session - :param scenario: dictionary of data representing a scenario to be created - """ - try: +def create_scenario(session: Session, scenario: dict) -> int: + existing_scenario = ( + session.query(Scenario) + .filter_by(portfolio_id=scenario["portfolio_id"]) + .first() + ) - # Before creating a new scenario, we check if there is a scenario for this portfolio id already - # If there is, it means that any new scnario created will NOT be the default scenario - existing_scenario = session.query(Scenario).filter_by(portfolio_id=scenario["portfolio_id"]).first() - scenario["is_default"] = True if not existing_scenario else False + scenario["is_default"] = not bool(existing_scenario) - new_scenario = Scenario(**scenario) - session.add(new_scenario) - session.flush() - session.commit() - return new_scenario - except SQLAlchemyError as e: - session.rollback() - raise e + new_scenario = Scenario(**scenario) + session.add(new_scenario) + session.flush() # ensures ID is populated + + scenario_id = new_scenario.id + session.commit() + + return scenario_id def create_recommendation(session: Session, recommendation): diff --git a/backend/app/plan/utils.py b/backend/app/plan/utils.py index 717638cf..52e2b0c4 100644 --- a/backend/app/plan/utils.py +++ b/backend/app/plan/utils.py @@ -1,9 +1,9 @@ +import ast import os -import time import msgpack from uuid import UUID -from typing import Any from utils.s3 import read_from_s3 +from backend.addresses.Address import Address from backend.app.config import get_settings from backend.app.plan.data_classes import PropertyRequestData from backend.app.db.functions.tasks.Tasks import SubTaskInterface @@ -52,21 +52,20 @@ def patch_epc(patch, epc_records): def extract_property_request_data( - config, patches, already_installed, non_invasive_recommendations, valuation_data, uprn + address: Address, patches, already_installed, non_invasive_recommendations, valuation_data, uprn ): patch_has_uprn = "uprn" in patches[0] if patches else True if patch_has_uprn: patch = next(( - x for x in patches if str(x["uprn"]) == str(config["uprn"]) + x for x in patches if str(x["uprn"]) == str(address.uprn) ), {}) else: patch = next(( - x for x in patches if (x["address"] == config["address"]) and (x["postcode"] == config["postcode"]) + x for x in patches if (x["address"] == address.address) and (x["postcode"] == address.postcode) ), {}) property_already_installed = next(( - x for x in already_installed if - (x["address"] == config["address"]) and (x["postcode"] == config["postcode"]) + x for x in already_installed if (x["address"] == address.address) and (x["postcode"] == address.postcode) ), []) # Because we have some non-invasive recommendations that match on address and postcode, but not UPRN @@ -85,7 +84,7 @@ def extract_property_request_data( else: property_non_invasive_recommendations = next(( x for x in non_invasive_recommendations if - (x["address"] == config["address"]) and (x["postcode"] == config["postcode"]) + (x["address"] == address.address) and (x["postcode"] == address.postcode) ), {}) if isinstance(property_non_invasive_recommendations.get("recommendations"), str): @@ -114,7 +113,7 @@ def extract_property_request_data( else: property_valuation = next(( float(x["valuation"]) for x in valuation_data if - (x["address"] == config["address"]) and (x["postcode"] == config["postcode"]) + (x["address"] == address.address) and (x["postcode"] == address.postcode) ), None) # Return data class to give a structured format @@ -126,14 +125,14 @@ def extract_property_request_data( ) -def parse_eco_packages(config: dict[str, Any], prepared_epc) -> tuple[list[str], int, str, list[str]] | tuple[ +def parse_eco_packages(addr: Address, prepared_epc) -> tuple[list[str], int, str, list[str]] | tuple[ None, None, None, list]: - solar_identification = config.get("solar_reason", None) - cavity_identification = config.get("cavity_reason", None) + solar_identification = addr.solar_reason + cavity_identification = addr.cavity_reason if not solar_identification and not cavity_identification: return None, None, None, [] - landlord_heating_system = config["landlord_heating_system"] + landlord_heating_system = addr.landlord_heating_system # This is the initial version of tackling "already installed" measures already_installed = [] if landlord_heating_system == "air source heat pump": diff --git a/backend/engine/engine.py b/backend/engine/engine.py index 46490289..c19cf28f 100644 --- a/backend/engine/engine.py +++ b/backend/engine/engine.py @@ -672,6 +672,7 @@ async def model_engine(body: PlanTriggerRequest): landlord_ids = addresses.get_landlord_ids() postcodes = addresses.get_postcodes_for_flats() + # Check if we've seen these properties before with db_read_session() as session: existing_properties = db_funcs.property_functions.get_existing_properties( session, body.portfolio_id, uprns, landlord_ids @@ -699,32 +700,31 @@ async def model_engine(body: PlanTriggerRequest): ) # If we have properties that need to be created, we cerate them in bulk + new_property_ids = set() if to_create: with db_session() as session: inserted = db_funcs.property_functions.bulk_create_properties( session, body, to_create, energy_assessments_by_uprn ) + for prop_id, uprn, landlord_property_id in inserted: + new_property_ids.add(prop_id) + # We append the newly created properties to property_lookup for prop_id, uprn, landlord_property_id in inserted: if uprn is not None: property_lookup[("uprn", uprn)] = prop_id if landlord_property_id: property_lookup[("landlord_property_id", landlord_property_id)] = prop_id - # We append the newly created properties to property_lookup - input_properties, inspections_map, eco_packages = [], {}, {} - for addr in tqdm(addresses): - - # Identity data - uprn = addr.uprn - address1 = addr.address1 - postcode = addr.postcode - full_address = addr.full_address - heating_system = addr.heating_system + for addr, config in tqdm( + zip(addresses, plan_input), + total=len(addresses), + desc="Processing properties", + ): # ---------- 1) filter fetched data ---------- - epc_cache = epc_cache_by_uprn[uprn] + epc_cache = epc_cache_by_uprn[addr.uprn] epc_api_data, epc_page, rrn, = epc_cache["epc_api"], epc_cache["epc_page"], epc_cache["epc_page_rrn"] # Extract from EPC cache if epc_cache.get("status") == db_funcs.epc_functions.EpcStoreService.FRESH: @@ -732,19 +732,19 @@ async def model_engine(body: PlanTriggerRequest): # Extract associated UPRNs from the database response associated_uprns = db_funcs.address_functions.get_associated_uprns( - postcode_searches.get(postcode.upper()), uprn=uprn + postcode_searches.get(addr.postcode.upper()), uprn=addr.uprn ) - energy_assessment = energy_assessments_by_uprn.get(uprn) + energy_assessment = energy_assessments_by_uprn.get(addr.uprn) epc_searcher = SearchEpc( - address1=address1, - postcode=postcode, - uprn=uprn, + address1=addr.address1, + postcode=addr.postcode, + uprn=addr.uprn, auth_token=get_settings().EPC_AUTH_TOKEN, os_api_key="", - full_address=full_address, - heating_system=heating_system, + full_address=addr.full_address, + heating_system=addr.heating_system, associated_uprns=associated_uprns ) epc_searcher.ordnance_survey_client.built_form = addr.built_form @@ -754,26 +754,19 @@ async def model_engine(body: PlanTriggerRequest): epc_searcher.find_property(skip_os=True, api_data=epc_api_data, overwrite_sap05=True) epc_searcher.set_uprn_source(file_format=body.file_format) - # ---------- 2) ensure property exists ---------- - with db_session() as session: - property_id, is_new = db_funcs.property_functions.ensure_property_exists( - session, body, epc_searcher, energy_assessment, landlord_property_id=addr.landlord_property_id - ) + lookup_key = ( + ("uprn", addr.uprn) if addr.uprn is not None else ("landlord_property_id", addr.landlord_property_id) + ) + property_id = property_lookup[lookup_key] - if not property_id or (not is_new and not body.multi_plan): + if not property_id: + logger.error("Could not find property ID for address: %s", addr.request_data) + # Should not happen unless input data is inconsistent continue - if is_new: - # TODO: We can probably make these queries in bulk at the front end and use a placeholder - # property ID, and then inject this information afterwards - with db_session() as session: - db_funcs.property_functions.create_property_targets( - session, - property_id=property_id, - portfolio_id=body.portfolio_id, - epc_target=body.goal_value, - heat_demand_target=None - ) + is_new = property_id in new_property_ids + if not is_new and not body.multi_plan: + continue # If we have an energy assessment in place, that is newer than all of the previous EPCs, we use that. # Otherwise, we use the newest EPC @@ -784,7 +777,7 @@ async def model_engine(body: PlanTriggerRequest): ) req_data = extract_property_request_data( - config=config, + address=addr, patches=patches, already_installed=already_installed, non_invasive_recommendations=non_invasive_recommendations, @@ -803,7 +796,7 @@ async def model_engine(body: PlanTriggerRequest): epc_page=epc_page, rrn=rrn, cleaned_address=epc_searcher.address_clean, - config_address=config["address"], + config_address=addr.address, address_postal_town=epc_searcher.address_postal_town ) ) @@ -817,7 +810,7 @@ async def model_engine(body: PlanTriggerRequest): prepared_epc = averages_cleaning(prepared_epc, cleaning_data) # If we have an ECO project, we parse the cavity/solar reasons - eco_packages[property_id] = parse_eco_packages(config, prepared_epc) + eco_packages[property_id] = parse_eco_packages(addr, prepared_epc) # Final step - extract inspections data, if we have it - we inject into property for usage property_inspections = db_funcs.inspections_functions.extract_inspection_data(config) @@ -1332,7 +1325,7 @@ async def model_engine(body: PlanTriggerRequest): scenario_id = body.scenario_id else: with db_session() as session: - engine_scenario = db_funcs.recommendations_functions.create_scenario( + scenario_id = db_funcs.recommendations_functions.create_scenario( session=session, scenario={ "name": body.scenario_name, @@ -1350,7 +1343,6 @@ async def model_engine(body: PlanTriggerRequest): "multi_plan": body.multi_plan } ) - scenario_id = engine_scenario.id for i in tqdm( range(0, len(input_properties), BATCH_SIZE), total=int(np.ceil(len(input_properties) / BATCH_SIZE)) From 9c48bdfbd680f4f9f42a0d56b4a41b9b08d573f1 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sun, 21 Dec 2025 11:05:01 +0800 Subject: [PATCH 134/202] implenenting bulk EPC data upload --- backend/app/db/functions/epc_functions.py | 38 ++++++++++++++++++++++- backend/engine/engine.py | 28 ++++++++--------- 2 files changed, 51 insertions(+), 15 deletions(-) diff --git a/backend/app/db/functions/epc_functions.py b/backend/app/db/functions/epc_functions.py index 27fb9da4..defc24c9 100644 --- a/backend/app/db/functions/epc_functions.py +++ b/backend/app/db/functions/epc_functions.py @@ -1,8 +1,9 @@ from typing import List from datetime import datetime, timedelta, timezone -from sqlalchemy.orm import Session from sqlalchemy.exc import SQLAlchemyError from backend.app.db.models.epc import EpcStore +from sqlmodel import Session +from sqlalchemy.dialects.postgresql import insert class EpcStoreService: @@ -191,3 +192,38 @@ class EpcStoreService: except SQLAlchemyError as e: raise e + + @classmethod + def bulk_upsert_epc_data(cls, session: Session, rows_to_insert: list[dict]): + if not rows_to_insert: + return + + now = datetime.now(timezone.utc) + + values = [ + { + "uprn": row["uprn"], + "epc_api": row["epc_api"], + "epc_api_created_at": now, + "epc_page": row["epc_page"], + "epc_page_rrn": row["epc_page_rrn"], + "epc_page_created_at": now if row["epc_page"] else None, + } + for row in rows_to_insert + ] + + insert_stmt = insert(EpcStore).values(values) + + stmt = insert_stmt.on_conflict_do_update( + index_elements=["uprn"], + set_={ + "epc_api": insert_stmt.excluded.epc_api, + "epc_api_created_at": insert_stmt.excluded.epc_api_created_at, + "epc_page": insert_stmt.excluded.epc_page, + "epc_page_rrn": insert_stmt.excluded.epc_page_rrn, + "epc_page_created_at": insert_stmt.excluded.epc_page_created_at, + }, + ) + + session.execute(stmt) + session.commit() diff --git a/backend/engine/engine.py b/backend/engine/engine.py index c19cf28f..cf147fcd 100644 --- a/backend/engine/engine.py +++ b/backend/engine/engine.py @@ -716,7 +716,7 @@ async def model_engine(body: PlanTriggerRequest): if landlord_property_id: property_lookup[("landlord_property_id", landlord_property_id)] = prop_id - input_properties, inspections_map, eco_packages = [], {}, {} + input_properties, inspections_map, eco_packages, epc_upserts = [], {}, {}, [] for addr, config in tqdm( zip(addresses, plan_input), total=len(addresses), @@ -838,25 +838,25 @@ async def model_engine(body: PlanTriggerRequest): # 2) A real EPC # 3) A UPRN (meaning that a UPRN could be fetched against that property) # We store this data - # TODO: Upload in bulk - with db_session() as session: - if db_funcs.epc_functions.EpcStoreService.check_insert_needed( - epc_cache, epc_searcher.newest_epc.get("estimated"), epc_searcher.uprn - ): - # We store the EPC data we have found for this property - db_funcs.epc_functions.EpcStoreService.upsert_epc_data( - session=session, - uprn=epc_searcher.uprn, - epc_api=epc_searcher.data, - epc_page=epc_page_source.get("page_source"), - epc_page_rrn=epc_page_source.get("rrn"), - ) + if db_funcs.epc_functions.EpcStoreService.check_insert_needed( + epc_cache, epc_searcher.newest_epc.get("estimated"), epc_searcher.uprn, + ): + epc_upserts.append({ + "uprn": epc_searcher.uprn, + "epc_api": epc_searcher.data, + "epc_page": epc_page_source.get("page_source"), + "epc_page_rrn": epc_page_source.get("rrn"), + }) if not input_properties: return Response(status_code=204) check_duplicate_property_ids(input_properties) + # We now bulk upload all of the EPC data + with db_session() as session: + db_funcs.epc_functions.EpcStoreService.bulk_upsert_epc_data(session, epc_upserts) + # We check if we have inspections data and store it in the database if so. We'll update or create # aginst each property if if inspections_map: From 5ed7836fdefdd365924b479f3826eddb9051418c Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sun, 21 Dec 2025 11:08:31 +0800 Subject: [PATCH 135/202] fixing bug with missing fuel type --- backend/Property.py | 7 +++++++ backend/engine/engine.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/backend/Property.py b/backend/Property.py index f8013fb5..b9830b93 100644 --- a/backend/Property.py +++ b/backend/Property.py @@ -1296,6 +1296,13 @@ class Property: else: raise NotImplementedError(f"Unhandled fuel {self.main_fuel['fuel_type']}") + # We handle edge case where no heating system is indicated + if self.main_fuel["fuel_type"] in fuel_map: + mapped_fuel = fuel_map[self.main_fuel["fuel_type"]] + self.heating_energy_source = mapped_fuel + self.hot_water_energy_source = mapped_fuel + return + if len(self.heating_energy_source) > 1: # We treat this as a community scheme self.heating_energy_source = ["Varied (Community Scheme)"] diff --git a/backend/engine/engine.py b/backend/engine/engine.py index cf147fcd..9b22feee 100644 --- a/backend/engine/engine.py +++ b/backend/engine/engine.py @@ -899,7 +899,7 @@ async def model_engine(body: PlanTriggerRequest): # Insert the spatial data logger.info("Getting spatial data") input_properties = OpenUprnClient.set_spatial_data(input_properties, bucket_name=get_settings().DATA_BUCKET) - + [p.set_features(cleaned=cleaned, kwh_client=kwh_client, kwh_predictions=kwh_preds) for p in input_properties] logger.info("Performing solar analysis") From 2bc80b4d508db6287b0c3317c162d9ebdb5be717 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 22 Dec 2025 16:09:31 +0800 Subject: [PATCH 136/202] fixing uprn bug --- backend/Property.py | 41 +++------- .../app/db/functions/property_functions.py | 82 ++++++++++++++++++- backend/engine/engine.py | 71 +++++++++++++--- recommendations/WallRecommendations.py | 2 +- 4 files changed, 151 insertions(+), 45 deletions(-) diff --git a/backend/Property.py b/backend/Property.py index b9830b93..5d4922a3 100644 --- a/backend/Property.py +++ b/backend/Property.py @@ -23,6 +23,7 @@ from recommendations.recommendation_utils import ( from backend.ml_models.AnnualBillSavings import AnnualBillSavings from backend.app.utils import sap_to_epc import backend.app.assumptions as assumptions +from backend.app.db.models.portfolio import rating_lookup ENVIRONMENT = os.environ.get("ENVIRONMENT", "dev") DATA_BUCKET = os.environ.get( @@ -828,7 +829,7 @@ class Property: return property_data @classmethod - def _prepare_rating_field(cls, field, rating_lookup): + def _prepare_rating_field(cls, field): """ Utility function for usage in the lambda, for preparing the _rating fields """ @@ -838,7 +839,7 @@ class Property: else None ) - def get_property_details_epc(self, portfolio_id: int, rating_lookup): + def get_property_details_epc(self, portfolio_id: int): if self.current_energy_bill is None: raise ValueError("Current energy bill has not been set") @@ -869,37 +870,21 @@ class Property: "full_address": self.data["address"], "total_floor_area": float(self.data["total-floor-area"]), "walls": self.walls["clean_description"], - "walls_rating": self._prepare_rating_field( - self.data["walls-energy-eff"], rating_lookup - ), + "walls_rating": self._prepare_rating_field(self.data["walls-energy-eff"]), "roof": self.roof["clean_description"], - "roof_rating": self._prepare_rating_field( - self.data["roof-energy-eff"], rating_lookup - ), + "roof_rating": self._prepare_rating_field(self.data["roof-energy-eff"]), "floor": self.floor["clean_description"], - "floor_rating": self._prepare_rating_field( - self.data["floor-energy-eff"], rating_lookup - ), + "floor_rating": self._prepare_rating_field(self.data["floor-energy-eff"]), "windows": self.windows["clean_description"], - "windows_rating": self._prepare_rating_field( - self.data["windows-energy-eff"], rating_lookup - ), + "windows_rating": self._prepare_rating_field(self.data["windows-energy-eff"]), "heating": self.main_heating["clean_description"], - "heating_rating": self._prepare_rating_field( - self.data["mainheat-energy-eff"], rating_lookup - ), + "heating_rating": self._prepare_rating_field(self.data["mainheat-energy-eff"]), "heating_controls": self.main_heating_controls["clean_description"], - "heating_controls_rating": self._prepare_rating_field( - self.data["mainheatc-energy-eff"], rating_lookup - ), + "heating_controls_rating": self._prepare_rating_field(self.data["mainheatc-energy-eff"]), "hot_water": self.hotwater["clean_description"], - "hot_water_rating": self._prepare_rating_field( - self.data["hot-water-energy-eff"], rating_lookup - ), + "hot_water_rating": self._prepare_rating_field(self.data["hot-water-energy-eff"]), "lighting": self.lighting["clean_description"], - "lighting_rating": self._prepare_rating_field( - self.data["lighting-energy-eff"], rating_lookup - ), + "lighting_rating": self._prepare_rating_field(self.data["lighting-energy-eff"]), "mainfuel": self.main_fuel["clean_description"], "ventilation": self.ventilation["ventilation"], "solar_pv": self.solar_pv["solar_pv"], @@ -908,9 +893,7 @@ class Property: "floor_height": self.floor_height, "heat_loss_corridor": self.heat_loss_corridor["heat_loss_corridor_boolean"], "unheated_corridor_length": self.heat_loss_corridor["length"], - "number_of_open_fireplaces": self.number_of_open_fireplaces[ - "number_of_open_fireplaces" - ], + "number_of_open_fireplaces": self.number_of_open_fireplaces["number_of_open_fireplaces"], "number_of_extensions": self.number_of_extensions["number_of_extensions"], "number_of_storeys": self.number_of_storeys["number_of_storeys"], "mains_gas": self.mains_gas, diff --git a/backend/app/db/functions/property_functions.py b/backend/app/db/functions/property_functions.py index 32cd9a25..3bf4a912 100644 --- a/backend/app/db/functions/property_functions.py +++ b/backend/app/db/functions/property_functions.py @@ -1,10 +1,9 @@ ### # This script contains methods for interacting with the property table in the database ### -from typing import List import datetime import pytz -from sqlalchemy import select, or_ +from sqlalchemy import select, or_, bindparam, update from sqlalchemy.orm import Session from sqlalchemy.orm.exc import NoResultFound from sqlalchemy.dialects.postgresql import insert @@ -272,7 +271,8 @@ def bulk_create_properties( insert(PropertyModel) .values(rows) .on_conflict_do_nothing( - index_elements=["portfolio_id", "uprn"] + index_elements=["portfolio_id", "uprn"], + index_where=PropertyModel.uprn.isnot(None), ) .returning( PropertyModel.id, @@ -285,3 +285,79 @@ def bulk_create_properties( session.flush() return result.fetchall() + + +def bulk_update_properties(session: Session, property_updates: list[dict]): + if not property_updates: + return + + now = datetime.now(pytz.utc) + + stmt = ( + update(PropertyModel) + .where( + PropertyModel.id == bindparam("property_id"), + PropertyModel.portfolio_id == bindparam("portfolio_id"), + ) + .values( + **{k: bindparam(k) for k in property_updates[0]["data"].keys()}, + updated_at=now, + ) + ) + + payload = [] + for row in property_updates: + payload.append({ + "property_id": row["property_id"], + "portfolio_id": row["portfolio_id"], + **row["data"], + }) + + session.execute(stmt, payload) + + +def bulk_upsert_property_details_epc(session: Session, rows: list[dict]): + if not rows: + return + + insert_stmt = insert(PropertyDetailsEpcModel).values(rows) + + update_cols = { + col.name: insert_stmt.excluded[col.name] + for col in PropertyDetailsEpcModel.__table__.columns + if col.name not in ("id",) + } + + stmt = insert_stmt.on_conflict_do_update( + index_elements=["portfolio_id", "property_id"], + set_=update_cols, + ) + + session.execute(stmt) + + +def bulk_upsert_property_spatial(session: Session, rows: list[dict]): + if not rows: + return + + values = [] + for row in rows: + values.append({ + "uprn": row["uprn"], + **row["data"], + }) + + insert_stmt = insert(PropertyDetailsSpatial).values(values) + + update_cols = { + col.name: insert_stmt.excluded[col.name] + for col in PropertyDetailsSpatial.__table__.columns + if col.name not in ("id", "uprn") + } + + stmt = insert_stmt.on_conflict_do_update( + index_elements=["uprn"], + set_=update_cols, + ) + + session.execute(stmt) diff --git a/backend/engine/engine.py b/backend/engine/engine.py index 9b22feee..27020607 100644 --- a/backend/engine/engine.py +++ b/backend/engine/engine.py @@ -23,7 +23,6 @@ from backend.app.db.connection import db_engine import backend.app.db.functions as db_funcs from backend.app.db.functions.tasks.Tasks import SubTaskInterface -from backend.app.db.models.portfolio import rating_lookup from backend.app.plan.schemas import PlanTriggerRequest, WALL_INSULATION_MEASURES, ROOF_INSULATION_MEASURES from backend.app.plan.utils import ( get_cleaned, patch_epc, extract_property_request_data, parse_eco_packages, handle_error, build_cloudwatch_log_url @@ -702,6 +701,7 @@ async def model_engine(body: PlanTriggerRequest): # If we have properties that need to be created, we cerate them in bulk new_property_ids = set() if to_create: + logger.info("Creating %d new properties", len(to_create)) with db_session() as session: inserted = db_funcs.property_functions.bulk_create_properties( session, body, to_create, energy_assessments_by_uprn @@ -722,7 +722,6 @@ async def model_engine(body: PlanTriggerRequest): total=len(addresses), desc="Processing properties", ): - # ---------- 1) filter fetched data ---------- epc_cache = epc_cache_by_uprn[addr.uprn] epc_api_data, epc_page, rrn, = epc_cache["epc_api"], epc_cache["epc_page"], epc_cache["epc_page_rrn"] @@ -838,11 +837,12 @@ async def model_engine(body: PlanTriggerRequest): # 2) A real EPC # 3) A UPRN (meaning that a UPRN could be fetched against that property) # We store this data + uprn_to_check_against = addr.uprn if addr.uprn is not None else epc_searcher.uprn # Until we enforce uprn if db_funcs.epc_functions.EpcStoreService.check_insert_needed( - epc_cache, epc_searcher.newest_epc.get("estimated"), epc_searcher.uprn, + epc_cache, epc_searcher.newest_epc.get("estimated"), uprn_to_check_against, ): epc_upserts.append({ - "uprn": epc_searcher.uprn, + "uprn": uprn_to_check_against, "epc_api": epc_searcher.data, "epc_page": epc_page_source.get("page_source"), "epc_page_rrn": epc_page_source.get("rrn"), @@ -853,16 +853,15 @@ async def model_engine(body: PlanTriggerRequest): check_duplicate_property_ids(input_properties) + logger.info("Inserting property data") # We now bulk upload all of the EPC data with db_session() as session: db_funcs.epc_functions.EpcStoreService.bulk_upsert_epc_data(session, epc_upserts) # We check if we have inspections data and store it in the database if so. We'll update or create # aginst each property if - if inspections_map: - logger.info("Inserting inspections data") - with db_session() as session: - db_funcs.inspections_functions.bulk_upsert_inspections_pg(session, inspections_map) + with db_session() as session: + db_funcs.inspections_functions.bulk_upsert_inspections_pg(session, inspections_map) # Set up model api and warm up the lambdas model_api = ModelApi( @@ -899,7 +898,7 @@ async def model_engine(body: PlanTriggerRequest): # Insert the spatial data logger.info("Getting spatial data") input_properties = OpenUprnClient.set_spatial_data(input_properties, bucket_name=get_settings().DATA_BUCKET) - + [p.set_features(cleaned=cleaned, kwh_client=kwh_client, kwh_predictions=kwh_preds) for p in input_properties] logger.info("Performing solar analysis") @@ -1344,6 +1343,56 @@ async def model_engine(body: PlanTriggerRequest): } ) + # TODO: New + property_updates, property_epc_details, property_spatial_updates = [], [], [] + # plans_to_create = [{property_id, plan_data}] + # recommendations_to_create = [{plan_ref, recommendation_data}] + # funding_to_create = [{plan_ref, funding_data}] + plans_to_create, recommendations_to_create, funding_to_create = [], [], [] + + # Prepare the data that will need to be uploaded in bulk + for p in input_properties: + recommendations_for_property = recommendations.get(p.id, []) + default_recommendations = [r for r in recommendations_for_property if r["default"]] + total_sap_points = sum([r["sap_points"] for r in default_recommendations]) + new_sap_points = float(p.data["current-energy-efficiency"]) + total_sap_points + new_epc = sap_to_epc(new_sap_points) + total_cost = sum([r["total"] for r in default_recommendations]) + valuations = PropertyValuation.estimate(property_instance=p, target_epc=new_epc, total_cost=total_cost) + + # --- property-level updates (always) --- + property_updates.append({ + "property_id": p.id, + "portfolio_id": body.portfolio_id, + "data": p.get_full_property_data(current_valuation=valuations["current_value"]) + }) + + property_epc_details.append(p.get_property_details_epc(portfolio_id=body.portfolio_id)) + + property_spatial_updates.append({"uprn": p.uprn, "data": p.spatial}) + + # --- skip plan creation if no recommendations --- + if not recommendations_for_property: + continue + + plan_data = db_funcs.recommendations_functions.prepare_plan_data( + p, body, scenario_id, eco_packages, valuations, new_sap_points, new_epc, default_recommendations + ) + plans_to_create.append({"property_id": p.id, "plan_data": plan_data}) + + # store recommendations keyed by property + for r in recommendations_for_property: + recommendations_to_create.append({"property_id": p.id, "data": r}) + + # Bulk upload property data + logger.info("Uploading property data in bulk") + with db_session() as session: + db_funcs.property_functions.bulk_update_properties(session, property_updates) + db_funcs.property_functions.bulk_upsert_property_details_epc(session, property_epc_details) + db_funcs.property_functions.bulk_upsert_property_spatial(session, property_spatial_updates) + + # TODO: End New + for i in tqdm( range(0, len(input_properties), BATCH_SIZE), total=int(np.ceil(len(input_properties) / BATCH_SIZE)) ): @@ -1369,9 +1418,7 @@ async def model_engine(body: PlanTriggerRequest): default_recommendations ) - property_details_epc = p.get_property_details_epc( - portfolio_id=body.portfolio_id, rating_lookup=rating_lookup, - ) + property_details_epc = p.get_property_details_epc(portfolio_id=body.portfolio_id) property_data = p.get_full_property_data(current_valuation=valuations["current_value"]) db_funcs.property_functions.create_property_details_epc(session, property_details_epc) diff --git a/recommendations/WallRecommendations.py b/recommendations/WallRecommendations.py index c54c00d9..328f1ab8 100644 --- a/recommendations/WallRecommendations.py +++ b/recommendations/WallRecommendations.py @@ -168,7 +168,7 @@ class WallRecommendations(Definitions): ): return - if u_value: + if u_value is not None: if self.property.walls["thermal_transmittance_unit"] != self.U_VALUE_UNIT: raise NotImplementedError( From c503da05a7c7472d4583eb866740d94f6a031347 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 22 Dec 2025 17:10:39 +0800 Subject: [PATCH 137/202] debugging property duplicated uprns --- backend/Property.py | 3 +- backend/SearchEpc.py | 35 ++++++++++++------- .../app/db/functions/property_functions.py | 27 ++++++++------ backend/engine/engine.py | 10 +++++- 4 files changed, 49 insertions(+), 26 deletions(-) diff --git a/backend/Property.py b/backend/Property.py index 5d4922a3..10af56cc 100644 --- a/backend/Property.py +++ b/backend/Property.py @@ -81,6 +81,7 @@ class Property: postcode, address, epc_record, + uprn=None, # Pass as an optional input property_valuation=None, already_installed=None, non_invasive_recommendations=None, @@ -121,7 +122,7 @@ class Property: self.valuation = property_valuation - self.uprn = epc_record.get("uprn") + self.uprn = uprn if uprn is not None else epc_record.get("uprn") self.uprn_source = self.data.get("uprn-source") self.full_sap_epc = epc_record.get("full_sap_epc") diff --git a/backend/SearchEpc.py b/backend/SearchEpc.py index cb465239..9af7330b 100644 --- a/backend/SearchEpc.py +++ b/backend/SearchEpc.py @@ -553,22 +553,31 @@ class SearchEpc: else: raise ValueError("Multiple UPRNs found - investigate me") - if uprns: - uprn = uprns.pop() - # Convert to int - if not pd.isnull(uprn): - uprn = int(uprn) - else: - newest_epc["uprn-source"] = self.UPRN_SOURCE_SIMULATED - uprn = hash(self.address1 + self.postcode) + # if uprns: + # epc_uprn = uprns.pop() + # # Convert to int + # if not pd.isnull(epc_uprn): + # uprn = int(epc_uprn) + # else: + # newest_epc["uprn-source"] = self.UPRN_SOURCE_SIMULATED + # uprn = hash(self.address1 + self.postcode) + + if self.uprn is not None and uprns: + epc_uprn = uprns.pop() + if int(epc_uprn) != self.uprn: + logger.warning( + f"Provided UPRN {self.uprn} does not match EPC UPRN {epc_uprn}, using provided UPRN" + ) + # We overwrite but in this instance, we've likely got the wrong EPC data + newest_epc["uprn"] = self.uprn if self.fast: - return newest_epc, [], {}, "", "", None, "" + return newest_epc, [], {}, "", "", "" # Retrieve postcode and address address_epc, postcode_epc, address_postal_town = self.format_address(newest_epc=newest_epc) - return newest_epc, older_epcs, full_sap_epc, address_epc, postcode_epc, uprn, address_postal_town + return newest_epc, older_epcs, full_sap_epc, address_epc, postcode_epc, address_postal_town @staticmethod def filter_newest_epc(list_of_epcs: List): @@ -923,7 +932,7 @@ class SearchEpc: @staticmethod def calculate_weighted_lodgement_datetime(epc_data): - numeric_dates = pd.to_datetime(epc_data['lodgement-datetime']).view('int64') + numeric_dates = pd.to_datetime(epc_data['lodgement-datetime']).astype('int64') # Calculate the weighted sum of dates weighted_sum = (numeric_dates * epc_data['weight']).sum() @@ -991,7 +1000,7 @@ class SearchEpc: if response["status"] == 200: ( - self.newest_epc, self.older_epcs, self.full_sap_epc, self.address_clean, self.postcode_clean, self.uprn, + self.newest_epc, self.older_epcs, self.full_sap_epc, self.address_clean, self.postcode_clean, self.address_postal_town ) = self.extract_epc_data(address=self.full_address) @@ -1085,7 +1094,7 @@ class SearchEpc: response = self.get_epc() if response["status"] == 200: ( - self.newest_epc, self.older_epcs, self.full_sap_epc, self.address_clean, self.postcode_clean, self.uprn, + self.newest_epc, self.older_epcs, self.full_sap_epc, self.address_clean, self.postcode_clean, self.address_postal_town ) = self.extract_epc_data() return diff --git a/backend/app/db/functions/property_functions.py b/backend/app/db/functions/property_functions.py index 3bf4a912..99cc8ed7 100644 --- a/backend/app/db/functions/property_functions.py +++ b/backend/app/db/functions/property_functions.py @@ -291,13 +291,13 @@ def bulk_update_properties(session: Session, property_updates: list[dict]): if not property_updates: return - now = datetime.now(pytz.utc) + now = datetime.datetime.now(pytz.utc) stmt = ( - update(PropertyModel) + update(PropertyModel.__table__) .where( - PropertyModel.id == bindparam("property_id"), - PropertyModel.portfolio_id == bindparam("portfolio_id"), + PropertyModel.id == bindparam("b_id"), + PropertyModel.portfolio_id == bindparam("b_portfolio_id"), ) .values( **{k: bindparam(k) for k in property_updates[0]["data"].keys()}, @@ -305,15 +305,20 @@ def bulk_update_properties(session: Session, property_updates: list[dict]): ) ) - payload = [] - for row in property_updates: - payload.append({ - "property_id": row["property_id"], - "portfolio_id": row["portfolio_id"], + payload = [ + { + "b_id": row["property_id"], # renamed bind param + "b_portfolio_id": row["portfolio_id"], **row["data"], - }) + } + for row in property_updates + ] - session.execute(stmt, payload) + session.execute( + stmt, + payload, + execution_options={"synchronize_session": False}, + ) def bulk_upsert_property_details_epc(session: Session, rows: list[dict]): diff --git a/backend/engine/engine.py b/backend/engine/engine.py index 27020607..5aa82dcd 100644 --- a/backend/engine/engine.py +++ b/backend/engine/engine.py @@ -408,6 +408,13 @@ def check_duplicate_property_ids(input_properties): # de-dupe input_uprns raise ValueError(f"Duplicate property IDs in the input data: {duplicates}") + # Check for dupe UPRNS + input_uprns = [x.uprn for x in input_properties if x.uprn is not None] + if input_uprns: + if len(input_uprns) != len(set(input_uprns)): + duplicates = set([x for x in input_uprns if input_uprns.count(x) > 1]) + raise ValueError(f"Duplicate UPRNs in the input properties: {duplicates}") + return True @@ -781,7 +788,7 @@ async def model_engine(body: PlanTriggerRequest): already_installed=already_installed, non_invasive_recommendations=non_invasive_recommendations, valuation_data=valuation_data, - uprn=epc_searcher.uprn, + uprn=addr.uprn, ) # Pull this out as it may get overwritten property_non_invasive_recommendations, patch = req_data.non_invasive_recommendations, req_data.patch @@ -819,6 +826,7 @@ async def model_engine(body: PlanTriggerRequest): input_properties.append( Property( id=property_id, + uprn=addr.uprn, is_new=is_new, address=epc_searcher.address_clean, postcode=epc_searcher.postcode_clean, From f06676f15a0945ee5b56bf625a9b4d621b901194 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 22 Dec 2025 18:46:19 +0800 Subject: [PATCH 138/202] completed database upload refactor --- backend/app/db/functions/funding_functions.py | 70 +++++++++ .../db/functions/recommendations_functions.py | 112 ++++++++++++++ backend/engine/engine.py | 142 ++++++++++-------- 3 files changed, 262 insertions(+), 62 deletions(-) diff --git a/backend/app/db/functions/funding_functions.py b/backend/app/db/functions/funding_functions.py index 51dffa21..df36d308 100644 --- a/backend/app/db/functions/funding_functions.py +++ b/backend/app/db/functions/funding_functions.py @@ -1,5 +1,6 @@ from sqlalchemy.orm import Session from sqlalchemy.exc import SQLAlchemyError +from sqlalchemy import insert from backend.app.db.models.funding import FundingPackage, FundingPackageMeasures @@ -69,3 +70,72 @@ def upload_funding(session: Session, p, plan_id, recommendations_to_upload): session.rollback() print(f"An error occurred: {e}") return False + + +def bulk_upload_funding_packages( + session: Session, + funding_payload: list[dict], +): + """ + Bulk upload: + - funding_package + - funding_package_measures + + Assumes caller manages the transaction. + """ + + if not funding_payload: + return + + # --------------------------------------------------------- + # 1. Prepare funding package rows + # --------------------------------------------------------- + funding_rows = [] + measures_by_index = [] + + for f in funding_payload: + funding_rows.append({ + "plan_id": f["plan_id"], + "scheme": f["scheme"], + "project_funding": f["project_funding"], + "total_uplift": f["total_uplift"], + "full_project_score": f["full_project_score"], + "partial_project_score": f["partial_project_score"], + "uplift_project_score": f["uplift_project_score"], + }) + + measures_by_index.append(f.get("measures", [])) + + # --------------------------------------------------------- + # 2. Insert funding packages and get IDs + # --------------------------------------------------------- + result = session.execute( + insert(FundingPackage) + .values(funding_rows) + .returning(FundingPackage.id) + ) + + funding_package_ids = [row[0] for row in result] + + # --------------------------------------------------------- + # 3. Insert funding package measures + # --------------------------------------------------------- + measures_rows = [] + + for funding_package_id, measures in zip( + funding_package_ids, measures_by_index + ): + for m in measures: + measures_rows.append({ + "funding_package_id": funding_package_id, + "measure": m["measure"], + "material_id": m["material_id"], + "innovation_uplift": m["innovation_uplift"], + "partial_project_score": m["partial_project_score"], + "uplift_project_score": m["uplift_project_score"], + }) + + if measures_rows: + session.execute( + insert(FundingPackageMeasures).values(measures_rows) + ) diff --git a/backend/app/db/functions/recommendations_functions.py b/backend/app/db/functions/recommendations_functions.py index b70111da..5b39f86e 100644 --- a/backend/app/db/functions/recommendations_functions.py +++ b/backend/app/db/functions/recommendations_functions.py @@ -96,6 +96,30 @@ def create_plan(session: Session, plan): raise e +def bulk_create_plans(session: Session, plans_to_create: list[dict]) -> dict[int, int]: + if not plans_to_create: + return {} + + payload = [ + { + "property_id": p["property_id"], + **p["plan_data"], + } + for p in plans_to_create + ] + + stmt = ( + insert(Plan) + .values(payload) + .returning(Plan.id, Plan.property_id) + ) + + result = session.execute(stmt).all() + + # property_id -> plan_id + return {row.property_id: row.id for row in result} + + def create_scenario(session: Session, scenario: dict) -> int: existing_scenario = ( session.query(Scenario) @@ -233,6 +257,94 @@ def upload_recommendations(session: Session, recommendations_to_upload, property return False +def bulk_upload_recommendations_and_materials( + session: Session, + recommendation_payload: list[dict], +): + if not recommendation_payload: + return + + # --------------------------------------------------------- + # 1. Prepare recommendation rows + # --------------------------------------------------------- + recommendation_rows = [] + parts_by_index = [] + plan_ids_by_index = [] + + for rec in recommendation_payload: + recommendation_rows.append({ + "property_id": rec["property_id"], + "type": rec["type"], + "measure_type": rec["measure_type"], + "description": rec["description"], + "estimated_cost": rec["estimated_cost"], + "default": rec["default"], + "starting_u_value": rec["starting_u_value"], + "new_u_value": rec["new_u_value"], + "sap_points": rec["sap_points"], + "heat_demand": rec["heat_demand"], + "kwh_savings": rec["kwh_savings"], + "co2_equivalent_savings": rec["co2_equivalent_savings"], + "energy_savings": rec["energy_savings"], + "energy_cost_savings": rec["energy_cost_savings"], + "total_work_hours": rec["total_work_hours"], + "labour_days": rec["labour_days"], + "already_installed": rec["already_installed"], + }) + + parts_by_index.append(rec["parts"]) + plan_ids_by_index.append(rec["plan_id"]) + + # --------------------------------------------------------- + # 2. Insert recommendations and get IDs + # --------------------------------------------------------- + result = session.execute( + insert(Recommendation) + .values(recommendation_rows) + .returning(Recommendation.id) + ) + + recommendation_ids = [row[0] for row in result] + + # --------------------------------------------------------- + # 3. Insert recommendation materials + # --------------------------------------------------------- + materials_rows = [] + + for recommendation_id, parts in zip(recommendation_ids, parts_by_index): + for part in parts: + materials_rows.append({ + "recommendation_id": recommendation_id, + "material_id": part["material_id"], + "depth": part["depth"], + "quantity": part["quantity"], + "quantity_unit": part["quantity_unit"], + "estimated_cost": part["estimated_cost"], + }) + + if materials_rows: + session.execute( + insert(RecommendationMaterials).values(materials_rows) + ) + + # --------------------------------------------------------- + # 4. Insert plan ↔ recommendation links + # --------------------------------------------------------- + plan_recommendation_rows = [ + { + "plan_id": plan_id, + "recommendation_id": recommendation_id, + } + for plan_id, recommendation_id in zip( + plan_ids_by_index, recommendation_ids + ) + ] + + session.execute( + insert(PlanRecommendations).values(plan_recommendation_rows) + ) + + def chunked(iterable, size=100): for i in range(0, len(iterable), size): yield iterable[i:i + size] diff --git a/backend/engine/engine.py b/backend/engine/engine.py index 5aa82dcd..276444e1 100644 --- a/backend/engine/engine.py +++ b/backend/engine/engine.py @@ -1351,11 +1351,7 @@ async def model_engine(body: PlanTriggerRequest): } ) - # TODO: New property_updates, property_epc_details, property_spatial_updates = [], [], [] - # plans_to_create = [{property_id, plan_data}] - # recommendations_to_create = [{plan_ref, recommendation_data}] - # funding_to_create = [{plan_ref, funding_data}] plans_to_create, recommendations_to_create, funding_to_create = [], [], [] # Prepare the data that will need to be uploaded in bulk @@ -1390,7 +1386,69 @@ async def model_engine(body: PlanTriggerRequest): # store recommendations keyed by property for r in recommendations_for_property: - recommendations_to_create.append({"property_id": p.id, "data": r}) + recommendations_to_create.append({ + "property_id": p.id, + # ---- Recommendation core ---- + "type": r["type"], + "measure_type": r["measure_type"], + "description": r["description"], + "estimated_cost": float(r["total"]), + "default": r["default"], + "starting_u_value": float(r["starting_u_value"]) if r.get("starting_u_value") else None, + "new_u_value": float(r["new_u_value"]) if r.get("new_u_value") else None, + "sap_points": float(r["sap_points"]), + "energy_savings": float(r["heat_demand"]), + "kwh_savings": float(r["kwh_savings"]), + "co2_equivalent_savings": float(r["co2_equivalent_savings"]), + "total_work_hours": float(r["labour_hours"]), + "energy_cost_savings": float(r["energy_cost_savings"]), + "labour_days": float(r["labour_days"]), + "already_installed": r["already_installed"], + "heat_demand": float(r["heat_demand"]), + + # ---- parts ---- + "parts": [ + { + "material_id": part["id"], + "depth": int(part["depth"]) if part.get("depth") else None, + "quantity": float(part["quantity"]) if part.get("quantity") else None, + "quantity_unit": part.get("quantity_unit"), + "estimated_cost": float(part.get("total", part.get("total_cost"))), + } + for part in r.get("parts", []) + ], + }) + + if recommendations_for_property and p.funded_measures: + funding_to_create.append({ + "property_id": p.id, + "scheme": p.scheme, + "project_funding": float(p.project_funding), + "total_uplift": float(p.total_uplift), + "full_project_score": float(p.full_project_score), + "partial_project_score": float(p.partial_project_score), + "uplift_project_score": float(p.uplift_project_score), + "measures": [ + { + "measure": ( + "cavity_wall_insulation" + if part["type"] == "extension_cavity_wall_insulation" + else "sealing_fireplace" + if part["type"] == "sealing_open_fireplace" + else part["type"] + ), + "material_id": ( + part["parts"][0]["id"] + if part.get("parts") + else None + ), + "innovation_uplift": float(part["innovation_uplift"]), + "partial_project_score": float(part["partial_project_score"]), + "uplift_project_score": float(part["uplift_project_score"]), + } + for part in p.funded_measures + ], + }) # Bulk upload property data logger.info("Uploading property data in bulk") @@ -1398,65 +1456,25 @@ async def model_engine(body: PlanTriggerRequest): db_funcs.property_functions.bulk_update_properties(session, property_updates) db_funcs.property_functions.bulk_upsert_property_details_epc(session, property_epc_details) db_funcs.property_functions.bulk_upsert_property_spatial(session, property_spatial_updates) + # # Bulk create plans + plan_id_by_property = db_funcs.recommendations_functions.bulk_create_plans(session, plans_to_create) + recommendation_payload = [ + { + "plan_id": plan_id_by_property[r["property_id"]], + **{k: v for k, v in r.items() if k not in ["parts"]}, + "parts": r["parts"], + } for r in recommendations_to_create if r["property_id"] in plan_id_by_property + ] - # TODO: End New + db_funcs.recommendations_functions.bulk_upload_recommendations_and_materials( + session, recommendation_payload + ) - for i in tqdm( - range(0, len(input_properties), BATCH_SIZE), total=int(np.ceil(len(input_properties) / BATCH_SIZE)) - ): - try: - # Take a slice of the input_properties list to make a batch - batch_properties = input_properties[i:i + BATCH_SIZE] - with db_session() as session: - for p in batch_properties: - recommendations_to_upload = recommendations.get(p.id, []) - default_recommendations = [r for r in recommendations_to_upload if r["default"]] - total_sap_points = sum([r["sap_points"] for r in default_recommendations]) - new_sap_points = float(p.data["current-energy-efficiency"]) + total_sap_points - new_epc = sap_to_epc(new_sap_points) - - total_cost = sum([r["total"] for r in default_recommendations]) - - valuations = PropertyValuation.estimate( - property_instance=p, target_epc=new_epc, total_cost=total_cost - ) - - property_plan_data = db_funcs.recommendations_functions.prepare_plan_data( - p, body, scenario_id, eco_packages, valuations, new_sap_points, new_epc, - default_recommendations - ) - - property_details_epc = p.get_property_details_epc(portfolio_id=body.portfolio_id) - property_data = p.get_full_property_data(current_valuation=valuations["current_value"]) - db_funcs.property_functions.create_property_details_epc(session, property_details_epc) - - db_funcs.property_functions.update_or_create_property_spatial_details( - session, p.uprn, p.spatial - ) - - db_funcs.property_functions.update_property_data( - session, property_id=p.id, portfolio_id=body.portfolio_id, property_data=property_data - ) - - if not recommendations_to_upload: - continue - - new_plan_id = db_funcs.recommendations_functions.create_plan( - session, plan=property_plan_data - ) - - db_funcs.recommendations_functions.upload_recommendations( - session, recommendations_to_upload, p.id, new_plan_id - ) - db_funcs.funding_functions.upload_funding( - session, p, new_plan_id, recommendations_to_upload - ) - - except Exception as e: - # Rollback the session if an error occurs - logger.warning("Failed i = %s" % str(i)) - logger.error(f"An error occurred during batch starting at index {i}: {e}") - logger.error(f"property is uprn {p.uprn} id {p.id} address {p.address}") + funding_payload = [ + {"plan_id": plan_id_by_property[f["property_id"]], **{k: v for k, v in f.items() if k != "property_id"}} + for f in funding_to_create if f["property_id"] in plan_id_by_property + ] + db_funcs.funding_functions.bulk_upload_funding_packages(session, funding_payload) logger.info("Work completed, updating log status") From b881c8358eb125c42b5b036827bc81fc940c5322 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 23 Dec 2025 09:53:03 +0800 Subject: [PATCH 139/202] new solid floor pricing mechanism --- recommendations/Costs.py | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/recommendations/Costs.py b/recommendations/Costs.py index 1184d5ed..b005ab69 100644 --- a/recommendations/Costs.py +++ b/recommendations/Costs.py @@ -350,16 +350,31 @@ class Costs: total_cost = material["total_cost"] * insulation_floor_area - labour_hours = material["labour_hours_per_unit"] * insulation_floor_area - # To install suspended floor insulation, a small to medium size project might be conducted by a team of 3 - # people - labour_days = (labour_hours / 8) / 3 + # We assume the average house takes ~7 days to complete at £300/day incl. VAT, as per checkatrade + # which can be seen here: https://www.checkatrade.com/blog/cost-guides/floor-insulation-cost + # Assumptions + base_days = 7 # The quickest it will be completed + base_area = 45 # The area that can be completed in that time (for a typical 90m2 house) + labour_exponent = 0.85 # Non-linear scaling + daily_labour_rate = 300 # Based on checkatrade + + min_days = 3 # Fewest days it will take + labour_days = max( + min_days, + base_days * (insulation_floor_area / base_area) ** labour_exponent + ) + + labour_cost = labour_days * daily_labour_rate + + total_cost = total_cost + labour_cost + + total_cost = round(total_cost, 2) return { "total": total_cost, "contingency": self.CONTINGENCIES["solid_floor_insulation"] * total_cost, "contingency_rate": self.CONTINGENCIES["solid_floor_insulation"], - "labour_hours": labour_hours, + "labour_hours": labour_days * 8, "labour_days": labour_days, } From aa515a97975401c0caff1adf4ca53673ad7ac3f8 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 24 Dec 2025 12:58:24 +0800 Subject: [PATCH 140/202] added new pricing for air source heat pump --- recommendations/Costs.py | 154 +++++++++++++------------- recommendations/HeatingRecommender.py | 6 +- 2 files changed, 79 insertions(+), 81 deletions(-) diff --git a/recommendations/Costs.py b/recommendations/Costs.py index b005ab69..86062433 100644 --- a/recommendations/Costs.py +++ b/recommendations/Costs.py @@ -1,6 +1,7 @@ import numpy as np from recommendations.county_to_region import county_to_region_map from utils.logger import setup_logger +from backend.ml_models.AnnualBillSavings import AnnualBillSavings logger = setup_logger() @@ -21,25 +22,6 @@ regional_labour_variations = [ {"Region": "Northern Ireland", "Adjustment_Factor": 0.76} ] -# This data is based on the MCS database - taken the figures for June 2024 -MCS_SOLAR_PV_COST_DATA = { - "last_updated": "2024-07-10", - "average_cost_per_kwh": 1825, - "average_cost_per_kwh-Outer London": 1950, - "average_cost_per_kwh-Inner London": 1950, - "average_cost_per_kwh-South East England": 1966, - "average_cost_per_kwh-South West England": 1864, - "average_cost_per_kwh-East of England": 1719, - "average_cost_per_kwh-East Midlands": 1730, - "average_cost_per_kwh-West Midlands": 1789, - "average_cost_per_kwh-North East England": 1872, - "average_cost_per_kwh-North West England": 1860, - "average_cost_per_kwh-Yorkshire and the Humber": 1789, - "average_cost_per_kwh-Wales": 1676, - "average_cost_per_kwh-Scotland": 1781, - "average_cost_per_kwh-Northern Ireland": 1347, -} - # Installers are now working with 435 watt panels PANEL_SIZE = 0.435 @@ -61,47 +43,40 @@ INSTALLER_SOLAR_COSTS = [ {'n_panels': 18, 'array_kwp': 18 * PANEL_SIZE, 'cost': 6792.57, 'installer': 'CEG'} ] +# These are costs we received from CRG, for pricing up air source heat pumps +# These are costs that we have been provided from CRG specifically for air source heat pumps +ASHP_SMALL_SYSTEM_COST = 8812.92 # 4.8 to 8.5, based on their pricing +ASHP_LARGE_SYSTEM_COST = 11053.25 +ASHP_SECURITY = 455.00 +ASHP_WALL_BRACKET = 574.17 +ASHP_DISTRIBUTION_SYSTEM_COSTS = [ + {"n_radiators": 4, "cost": 3380.00}, + {"n_radiators": 5, "cost": 3607.50}, + {"n_radiators": 6, "cost": 4116.67}, + {"n_radiators": 7, "cost": 4647.50}, + {"n_radiators": 8, "cost": 5200.00}, + {"n_radiators": 9, "cost": 5730.83}, + {"n_radiators": 10, "cost": 6283.33}, + {"n_radiators": 11, "cost": 6857.50}, + {"n_radiators": 12, "cost": 7431.67}, + {"n_radiators": 13, "cost": 8016.67}, + {"n_radiators": 14, "cost": 8612.50}, + {"n_radiators": 15, "cost": 9219.17}, + {"n_radiators": 16, "cost": 9804.17}, + {"n_radiators": 17, "cost": 10389.17}, +] +ASHP_CYLINDER_COSTS = [ + {"capacity_l": 120, "cost": 3318.25}, + {"capacity_l": 180, "cost": 3480.75}, + {"capacity_l": 200, "cost": 3853.42}, + {"capacity_l": 250, "cost": 3961.75}, +] + # CEG uses use Solshare as an inverter to provide solar PV to multiple flats. This costs £7500 for the inverter alone # https://midsummerwholesale.co.uk/buy/solshare INSTALLER_SOLAR_PV_INVERTER_COST = 7500 INSTALLER_SOLAR_PV_INVERTER_LABOUR_COST = 500 # Just a rough guess to labour costs -# INSTALLER_SCAFFOLDING_COSTS = [ -# {'stories': 1, 'description': '1 Story Scaffold', 'cost': 531.00, 'installer': 'CEG'}, -# {'stories': 2, 'description': '2 Story Scaffold', 'cost': 841.00, 'installer': 'CEG'}, -# {'stories': 3, 'description': '3 Story Scaffold', 'cost': 1077.00, 'installer': 'CEG'} -# ] - -# This data is based on the MCS database, We use the larger figure between the 2023 and 2024 average, -# to be conservative -MCS_AIR_SOURCE_HEAT_PUMP_COST_DATA = { - "Outer London": 13220, - "Inner London": 13220, - "South East England": 13547, - "South West England": 12776, - "East of England": 12585, - "East Midlands": 12239, - "West Midlands": 13182, - "North East England": 11829, - "North West England": 11714, - "Yorkshire and the Humber": 11919, - "Wales": 13701, - "Scotland": 12586, - "Northern Ireland": 12000, # There are hardly any air source heat pump installs going on in Northern Ireland -} - -INSTALLER_ASHP_COSTS = [ - {'capacity_kw': 5.0, 'brand': 'Mitsubishi', 'tank_size_liters': 150, 'cost': 10149.53, 'installer': 'CEG'}, - {'capacity_kw': 6.0, 'brand': 'Mitsubishi', 'tank_size_liters': 170, 'cost': 10823.48, 'installer': 'CEG'}, - {'capacity_kw': 8.5, 'brand': 'Mitsubishi', 'tank_size_liters': 200, 'cost': 11312.43, 'installer': 'CEG'}, - {'capacity_kw': 11.2, 'brand': 'Mitsubishi', 'tank_size_liters': 250, 'cost': 12156.75, 'installer': 'CEG'}, - {'capacity_kw': 14.0, 'brand': 'Mitsubishi', 'tank_size_liters': 300, 'cost': 14405.54, 'installer': 'CEG'}, - {'capacity_kw': 14.0, 'brand': 'Mitsubishi', 'tank_size_liters': 300, 'cost': 14405.54, 'installer': 'CEG'}, - {'capacity_kw': 17.0, 'brand': 'Grant', 'tank_size_liters': 300, 'cost': 14445.00, 'installer': 'CEG'}, - {'capacity_kw': 20.0, 'brand': 'Ecoforest', 'tank_size_liters': 400, 'cost': 21189.41, 'installer': 'CEG'}, - {'capacity_kw': None, 'brand': '2 x cascaded ASHPs', 'tank_size_liters': 500, 'cost': 22950.00, 'installer': 'CEG'} -] - INSTALLER_SOLAR_BATTERY_COSTS = [ {'capacity_kwh': 5, 'description': 'Battery Add on', 'cost': 3769.89, 'installer': 'JJC'}, # {'capacity_kwh': 10, 'description': 'Battery Add on', 'cost': 4300.00, 'installer': 'CEG'}, @@ -368,7 +343,7 @@ class Costs: total_cost = total_cost + labour_cost - total_cost = round(total_cost, 2) + total_cost = round(total_cost) return { "total": total_cost, @@ -853,32 +828,55 @@ class Costs: "labour_days": labour_days, } - def air_source_heat_pump(self, ashp_size): - """ - Based on the region and type of property, this function will produce a cost estimation for an air source heat - pump. This cost will include the boiler upgrade scheme grant - - """ - # This is the average cost of a project, we'll add some additional contingency - - if ashp_size is None: - cost = [x for x in INSTALLER_ASHP_COSTS if x["capacity_kw"] is None][0]["cost"] + @staticmethod + def _select_cylinder_capacity(occupants: float): + if occupants <= 2: + return 120 + elif occupants <= 3: + return 180 + elif occupants <= 4: + return 200 else: - cost = [x for x in INSTALLER_ASHP_COSTS if x][0]["cost"] + return 250 - # The costs from installers exclude VAT - vat = cost * self.VAT_RATE - cost = cost + vat + def air_source_heat_pump(self, ashp_size: float, number_heated_rooms: int, total_floor_area: float) -> dict: + """ + We produce a cost estimation for an air source heat pump, based on costs we have received from installers. - # We assume 5 days installation - labour_days = 5 - labour_hours = labour_days * 8 + """ + + system_cost = ( + (ASHP_SMALL_SYSTEM_COST if ashp_size <= 8.5 else ASHP_LARGE_SYSTEM_COST) + ASHP_SECURITY + ASHP_WALL_BRACKET + ) + + available_n_rads = [x["n_radiators"] for x in ASHP_DISTRIBUTION_SYSTEM_COSTS] + if number_heated_rooms < min(available_n_rads): + # We use the smallest value + rads_to_use = min(available_n_rads) + elif number_heated_rooms > max(available_n_rads): + # We use the largest value + rads_to_use = max(available_n_rads) + else: + rads_to_use = int(number_heated_rooms) + + distribution_system_cost = [ + x for x in ASHP_DISTRIBUTION_SYSTEM_COSTS if x["n_radiators"] == rads_to_use + ][0]["cost"] + + # Cylinder cost + est_n_occupants = AnnualBillSavings.calculate_occupants(total_floor_area) + cylinder_capacity = self._select_cylinder_capacity(est_n_occupants) + cylinder_cost = [ + x for x in ASHP_CYLINDER_COSTS if x["capacity_l"] == cylinder_capacity + ][0]["cost"] + + total = system_cost + distribution_system_cost + cylinder_cost return { - "total": cost, - "contingency": cost * self.CONTINGENCIES["air_source_heat_pump"], + "total": total, + "contingency": total * self.CONTINGENCIES["air_source_heat_pump"], "contingency_rate": self.CONTINGENCIES["air_source_heat_pump"], - "vat": vat, - "labour_hours": labour_hours, - "labour_days": labour_days, + "vat": 0, + "labour_hours": 80, + "labour_days": 10, } diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py index c5aa8b38..409f9ec6 100644 --- a/recommendations/HeatingRecommender.py +++ b/recommendations/HeatingRecommender.py @@ -526,14 +526,14 @@ class HeatingRecommender: # 1) Best available path: HLP → direct peak if heat_loss_parameter_W_per_m2K is not None: peak_kw = heat_loss_parameter_W_per_m2K * floor_area_m2 * ΔT / 1000.0 - return (peak_kw, peak_kw) # no range needed + return peak_kw, peak_kw # no range needed # 2) Second-best: space-heating demand → HDD method if space_heat_kwh_per_m2_yr is not None: annual_space_kwh = space_heat_kwh_per_m2_yr * floor_area_m2 Htot = annual_space_kwh * 1000.0 / (hdd_base_dd * 24.0) # W/K peak_kw = Htot * ΔT / 1000.0 - return (peak_kw, peak_kw) + return peak_kw, peak_kw # 3) Minimal inputs: primary energy + assumed fraction → range assert epc_primary_kwh_per_m2_yr is not None @@ -547,7 +547,7 @@ class HeatingRecommender: low = to_peak(space_heat_fraction_range[0]) high = to_peak(space_heat_fraction_range[1]) - return (low, high) + return low, high @staticmethod def pick_model(peak_kw_range, models_kw=(5, 6, 8.5, 11.2, 14, 17, 20)): From ff5bc2f834ea8fb56be4214997b402434eeab150 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sun, 28 Dec 2025 16:26:21 +0800 Subject: [PATCH 141/202] adding ventilation handling to prevent extremem values --- backend/engine/engine.py | 8 +++--- recommendations/Recommendations.py | 41 ++++++++++++++++++++++++++++-- 2 files changed, 43 insertions(+), 6 deletions(-) diff --git a/backend/engine/engine.py b/backend/engine/engine.py index 276444e1..58203da7 100644 --- a/backend/engine/engine.py +++ b/backend/engine/engine.py @@ -980,12 +980,12 @@ async def model_engine(body: PlanTriggerRequest): logger.info("Preparing data for scoring in sap change api") recommendations_scoring_data = pd.DataFrame(recommendations_scoring_data) + # Temp putting this here + recommendations_scoring_data["is_post_sap10_ending"] = True recommendations_scoring_data = recommendations_scoring_data.drop( - columns=[ - "rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending", - "carbon_ending" - ] + columns=["rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending", + "carbon_ending"] ) all_predictions = await model_api.async_paginated_predictions( diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py index f984acc3..0b3d1635 100644 --- a/recommendations/Recommendations.py +++ b/recommendations/Recommendations.py @@ -615,7 +615,7 @@ class Recommendations: if metric == "sap": property_phase_impact[metric] = round(property_phase_impact[metric], 2) else: - # We prevent these from being positive + # We prevent mechanical ventilation from being positive property_phase_impact[metric] = ( 0 if property_phase_impact[metric] > 0 else property_phase_impact[metric] ) @@ -632,6 +632,38 @@ class Recommendations: property_phase_impact["carbon"], rec["co2_equivalent_savings"] ) + # Update the current phase values + current_phase_values["sap"] = previous_phase_values["sap"] + property_phase_impact["sap"] + current_phase_values["carbon"] = previous_phase_values["carbon"] - property_phase_impact["carbon"] + + # We also ensure that mechanical ventilation doesn't have an ovely strong negative SAP impact + if rec["type"] == "mechanical_ventilation": + # ventilation is capped by having no greater and a -4 impact + ventilation_sap_limit = -4 + + def _check_veniltation_out_of_bounds(sap_impact): + return (sap_impact < ventilation_sap_limit) or (sap_impact >= 0) + + def _adjust_ventilation_sap(sap_impact): + if sap_impact >= 0: + return -1 + if sap_impact < ventilation_sap_limit: + return ventilation_sap_limit + + ventilation_out_of_bounds = _check_veniltation_out_of_bounds(property_phase_impact["sap"]) + + if ventilation_out_of_bounds: + previous_modelled_sap = previous_phase_values.get("sap_prediction", 0) + proposed_sap_impact = current_phase_sap - previous_modelled_sap + proposal_out_of_bounds = _check_veniltation_out_of_bounds(proposed_sap_impact) + if proposal_out_of_bounds: + property_phase_impact["sap"] = _adjust_ventilation_sap(proposed_sap_impact) + else: + property_phase_impact["sap"] = proposed_sap_impact + + # Update the current phase values + current_phase_values["sap"] = previous_phase_values["sap"] + property_phase_impact["sap"] + if rec["type"] == "loft_insulation": # When we have a loft insulation recommendation, where there is an extension and the existing # amount of loft insulation is already good, we limit the SAP points @@ -642,6 +674,8 @@ class Recommendations: ) if li_sap_limit is not None: property_phase_impact["sap"] = min(property_phase_impact["sap"], li_sap_limit) + # Update the current phase values + current_phase_values["sap"] = previous_phase_values["sap"] + property_phase_impact["sap"] if rec["type"] == "solar_pv": # We use the SAP points in the recommendation as a minimum @@ -649,6 +683,8 @@ class Recommendations: rec["sap_points"] if property_phase_impact["sap"] < rec["sap_points"] else property_phase_impact["sap"] ) + # Update the current phase values + current_phase_values["sap"] = previous_phase_values["sap"] + property_phase_impact["sap"] # Insert this information into the recommendation. if not rec.get("survey", False): @@ -669,7 +705,8 @@ class Recommendations: "representative": rec["recommendation_id"] in representative_ids, "recommendation_id": rec["recommendation_id"], "measure_type": rec["measure_type"], - **current_phase_values + **current_phase_values, + "sap_prediction": phase_energy_efficiency_metrics["sap_change"] } ) From 90c5f12671091086e96042e03142b36938ef8b20 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 1 Jan 2026 11:09:04 +0800 Subject: [PATCH 142/202] handling large floor area --- recommendations/HeatingRecommender.py | 77 +++++-- .../optimiser/funding_optimiser.py | 213 ++++++++++++++++++ .../tests/test_optimiser_functions.py | 1 + recommendations/tests/test_optimisers.py | 96 +------- 4 files changed, 289 insertions(+), 98 deletions(-) diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py index 409f9ec6..b1f6205c 100644 --- a/recommendations/HeatingRecommender.py +++ b/recommendations/HeatingRecommender.py @@ -555,7 +555,9 @@ class HeatingRecommender: for kw in models_kw: if kw >= target: return kw - return None + + # Return the largest + return max(models_kw) def recommend_air_source_heat_pump(self, phase, has_cavity_or_loft_recommendations, _return=False): """ @@ -586,7 +588,15 @@ class HeatingRecommender: ) ashp_size = self.pick_model(estimated_load) - ashp_costs = self.costs.air_source_heat_pump(ashp_size) + number_heated_rooms = self._estimate_n_heated_rooms() + # We now adjust this depending on the floor area to get number of communcal rooms (e.g. hallways) + communal_heated_rooms = self._estimate_n_communal_heated_rooms() + + ashp_costs = self.costs.air_source_heat_pump( + ashp_size, + number_heated_rooms=number_heated_rooms + communal_heated_rooms, + total_floor_area=self.property.floor_area + ) if non_intrusive_recommendation: # Update with non-intrusive recommendation if non_intrusive_recommendation.get("cost"): @@ -907,6 +917,56 @@ class HeatingRecommender: return already_has_hhr and already_has_hhr_contols + def _estimate_n_heated_rooms(self): + # If the property is off-gas and has no heating system in place, the number of heated rooms will actually + # be 0, so we use the number of rooms as the figure + number_heated_rooms = ( + self.property.data["number-heated-rooms"] if self.property.data["number-heated-rooms"] > 0 + else ( + self.property.number_of_rooms - 1 if self.property.number_of_rooms > 1 else + self.property.number_of_rooms + ) + ) + # To be conservative, we adjust if we still have 1 room + if (number_heated_rooms == 1) and (self.property.number_of_rooms > 2): + number_heated_rooms = self.property.number_of_rooms - 1 + + return number_heated_rooms + + def _estimate_n_communal_heated_rooms(self) -> int: + """ + Estimate number of communal circulation rooms (hallways / landings) that may reasonably contain a heater + """ + + # Base assumptions + base_by_type = { + "Flat": 1, + "Maisonette": 1, + "Bungalow": 1, + "House": 2, + } + + # Fallback if property type unknown + base = base_by_type.get(self.property.data["property-type"], 1) + + # Area-based adjustments + if self.property.data["property-type"] in ("Flat", "Maisonette"): + if self.property.floor_area > 90: + return base + 1 # duplex or very large flat + return base + + if self.property.data["property-type"] == "Bungalow": + if self.property.floor_area > 100: + return base + 1 # secondary corridor + return base + + if self.property.data["property-type"] == "House": + if self.property.floor_area > 140: + return base + 1 # extra landing / circulation + return base + + return base + def recommend_hhr_storage_heaters(self, phase, system_change, heating_controls_only, _return=False): """ We will recommend upgrading to a high heat retention storage system, if the current system is not already @@ -1010,18 +1070,7 @@ class HeatingRecommender: else: heating_simulation_config["hot_water_energy_eff_ending"] = self.property.data["hot-water-energy-eff"] - # If the property is off-gas and has no heating system in place, the number of heated rooms will actually - # be 0, so we use the number of rooms as the figure - number_heated_rooms = ( - self.property.data["number-heated-rooms"] if self.property.data["number-heated-rooms"] > 0 - else ( - self.property.number_of_rooms - 1 if self.property.number_of_rooms > 1 else - self.property.number_of_rooms - ) - ) - # To be conservative, we adjust if we still have 1 room - if (number_heated_rooms == 1) and (self.property.number_of_rooms > 2): - number_heated_rooms = self.property.number_of_rooms - 1 + number_heated_rooms = self._estimate_n_heated_rooms() # We focus on the 700 watt product hhrsh_product = next((x for x in self.hhrsh_products if x["size"] == 700), {}) diff --git a/recommendations/optimiser/funding_optimiser.py b/recommendations/optimiser/funding_optimiser.py index a8b998ae..dd7184d0 100644 --- a/recommendations/optimiser/funding_optimiser.py +++ b/recommendations/optimiser/funding_optimiser.py @@ -10,6 +10,7 @@ In the future, we will adapt this into a class-based structure to allow for more from copy import deepcopy import pandas as pd import numpy as np +from itertools import product from backend.app.plan.schemas import ( WALL_INSULATION_MEASURES, ROOF_INSULATION_MEASURES, ECO4_ELIGIBILE_FABRIC_MEASURES @@ -587,6 +588,218 @@ def optimise_with_funding_paths( return solutions +def build_heat_pump_paths( + remaining_wall_measures, + remaining_roof_measures, +): + """ + Build AND-paths using cartesian products. + + Rules: + - Always include air_source_heat_pump + - Choose 1 wall measure if any exist + - Choose 1 roof measure if any exist + """ + + # If a category is empty, use [None] so product still works + wall_choices = remaining_wall_measures or [None] + roof_choices = remaining_roof_measures or [None] + + paths = [] + + for wall, roof in product(wall_choices, roof_choices): + parts = [] + + if wall is not None: + parts.append(wall) + if roof is not None: + parts.append(roof) + + parts.append("air_source_heat_pump") + + paths.append({"AND": parts}) + + return paths + + +def exclude_measure_types(input_measures, excluded_types): + excluded = set(excluded_types) + filtered = [] + + for group in input_measures: + kept = [ + opt for opt in group + if opt["type"] not in excluded + ] + if kept: + filtered.append(kept) + + return filtered + + +def optimise_with_scenarios( + input_measures, + budget=None, + target_gain=None, + enforce_heat_pump_insulation=True, + enforce_fabric_first=False +): + """ + Scenario-based optimiser (funding-agnostic). + + Currently implemented scenarios: + 1) With air source heat pump AND required insulation + """ + + solutions = [] + paths = [] + # Produce the unique list of measure types + all_measure_types = [] + for inputs in input_measures: + all_measure_types.extend([x["type"] for x in inputs]) + all_measure_types = list(set(all_measure_types)) + + if enforce_fabric_first: + # If this is true, it means we only want to consider a fabric first approach. This means that + # - We treat the fabric of the house first + # - Only once the fabric has been upgraded, do we consider heating upgrades + + # This should be wall insulation, roof insulation, floor insulation and windows + fabric_measures = WALL_INSULATION_MEASURES + ROOF_INSULATION_MEASURES + ECO4_ELIGIBILE_FABRIC_MEASURES + + fabric_only_measures = [[opt for opt in group if opt["type"] in fabric_measures] for group in input_measures] + fabric_only_measures = [g for g in fabric_only_measures if g] + + if not fabric_only_measures: + # If we have no fabric measures, it means the work has already been done and we can proceed + # straight to heating optimisation + picked_fabric, fabric_cost, fabric_gain = [], 0, 0 + else: + picked_fabric, fabric_cost, fabric_gain = run_optimizer( + input_measures=fabric_only_measures, + budget=budget, + sub_target_gain=target_gain, + # If we can achieve the target gain with just insulation measures, we're done + ) + + picked_fabric_types = {m["type"] for m in picked_fabric} + + remaining_measures = [] + for group in input_measures: + kept = [m for m in group if m["type"] not in picked_fabric_types] + if kept: + remaining_measures.append(kept) + + picked_extra, extra_cost, extra_gain = run_optimizer( + remaining_measures, + budget=budget - fabric_cost if budget is not None else None, + sub_target_gain=( + target_gain - fabric_gain + if target_gain is not None + else None + ) + ) + + if picked_extra is None: + picked_extra, extra_cost, extra_gain = [], 0, 0 + + solutions.append({ + "scenario": "fabric_first", + "items": picked_fabric + picked_extra, + "fixed_items": picked_fabric, + "total_cost": fabric_cost + extra_cost, + "total_gain": fabric_gain + extra_gain, + }) + return solutions + + # ------------------------------------------------------------------ + # Scenario 1: Air source heat pump with required insulation + # ------------------------------------------------------------------ + if enforce_heat_pump_insulation: + # Wall measures could be IWI or EWI + remaining_wall_measures = [x for x in all_measure_types if x in WALL_INSULATION_MEASURES] + remaining_roof_measures = [x for x in all_measure_types if x in ROOF_INSULATION_MEASURES] + + # Mandatory structure: + # - must include ASHP + # - must include >=1 wall insulation (if still needed) + # - must include >=1 roof insulation (if still needed) + # We need all of the combinations of remaining wall and remaining roof measures + heat_pump_paths = build_heat_pump_paths(remaining_wall_measures, remaining_roof_measures) + paths.extend(heat_pump_paths) + + # ------------------------------------------------------------------ + # Scenario 2: Optimise without air source heat pump + # ------------------------------------------------------------------ + # No special path; just exclude ASHP from options and allow us to optimise. + measures_no_heat_pump = exclude_measure_types(input_measures, ["air_source_heat_pump"]) + + picked, total_cost, total_gain = run_optimizer( + measures_no_heat_pump, + budget=budget, + sub_target_gain=target_gain, + ) + + if picked is not None: + solutions.append({ + "scenario": "no_heat_pump", + "items": picked, + "fixed_items": [], + "total_cost": total_cost, + "total_gain": total_gain, + }) + + fixed_selections = expand_funding_path(input_measures, paths) + + for fixed in fixed_selections: + + # fixed = [(gi, oi, opt), ...] + fixed_items = [opt for (_, _, opt) in fixed] + fixed_groups = {gi for (gi, _, _) in fixed} + + fixed_cost, fixed_gain = sum_cost_gain(fixed_items) + + # Remaining measures (all other groups) + remaining_measures = [ + grp for gi, grp in enumerate(input_measures) + if gi not in fixed_groups + ] + + # Optimise remaining measures + if ( + target_gain is not None + and fixed_gain >= target_gain + ): + picked, sub_cost, sub_gain = [], 0, 0 + else: + picked, sub_cost, sub_gain = run_optimizer( + remaining_measures, + budget=budget - fixed_cost if budget is not None else None, + sub_target_gain=( + target_gain - fixed_gain + if target_gain is not None + else None + ) + ) + + if picked is None: + continue + + total_items = fixed_items + picked + total_cost = fixed_cost + sub_cost + total_gain = fixed_gain + sub_gain + + solutions.append({ + "scenario": "heat_pump_with_insulation", + "items": total_items, + "fixed_items": fixed_items, + "total_cost": total_cost, + "total_gain": total_gain, + }) + + return solutions + + # ---- helpers ------------------------------------------------------------- diff --git a/recommendations/tests/test_optimiser_functions.py b/recommendations/tests/test_optimiser_functions.py index 031bb9ac..865e3398 100644 --- a/recommendations/tests/test_optimiser_functions.py +++ b/recommendations/tests/test_optimiser_functions.py @@ -8,6 +8,7 @@ from recommendations.optimiser.CostOptimiser import CostOptimiser class TestPrepareInputMeasures: + def test_returns_expected_structure_without_ventilation(self): recs = [ [ # loft insulation measure diff --git a/recommendations/tests/test_optimisers.py b/recommendations/tests/test_optimisers.py index e81aac69..ecc6ea56 100644 --- a/recommendations/tests/test_optimisers.py +++ b/recommendations/tests/test_optimisers.py @@ -1,97 +1,14 @@ -import numpy as np -# import pandas as pd from pandas import Timestamp from numpy import nan import datetime -# import backend.app.assumptions as assumptions -# import recommendations.optimiser.optimiser_functions as optimiser_functions -# -# from backend.Funding import Funding -# -# project_scores_matrix = pd.read_csv("/Users/khalimconn-kowlessar/Downloads/ECO4 Full Project Scores Matrix.csv") -# partial_project_scores_matrix = pd.read_csv("backend/tests/test_data/ECO4_Partial_Project_Scores_Matrix_v6.csv") -# partial_project_scores_matrix.columns = ['Measure category', 'Measure_Type', 'Pre_Main_Heating_Source', -# 'Post_Main_Heating_Source', 'Total Floor Area Band', 'Starting Band', -# 'Average Treatable Factor', 'Cost Savings', 'SAP Savings'] -# whlg_eligible_postcodes = pd.DataFrame([{"Postcode": "ab12cd"}]) -# -# funding = Funding( -# project_scores_matrix=project_scores_matrix, -# partial_project_scores_matrix=partial_project_scores_matrix, -# whlg_eligible_postcodes=whlg_eligible_postcodes, -# eco4_social_cavity_abs_rate=13.5, -# eco4_social_solid_abs_rate=17, -# eco4_private_cavity_abs_rate=13.5, -# eco4_private_solid_abs_rate=17, -# gbis_social_cavity_abs_rate=21, -# gbis_social_solid_abs_rate=25, -# gbis_private_cavity_abs_rate=22, -# gbis_private_solid_abs_rate=28, -# tenure="Social" -# ) -# -# # Assume these costs have been adjusted - - -# -# # Insert the funding uplifts -# for recs in property_recommendations: -# for r in recs: -# # Insert randomly -# # Select one of 0, 0.25 or 0.45 -# r["uplift"] = np.random.choice([0, 0.25, 0.45]) -# -# # We calculate the innovation uplift against each measure -# for recs in property_recommendations: -# for r in recs: -# if r["type"] in ["mechanical_ventilation", "low_energy_lighting", "secondary_heating"]: -# r["innovation_uplift"] = 0 -# continue -# r["innovation_uplift"] = funding.get_innovation_uplift( -# measure=r, -# starting_sap=p.data["current-energy-efficiency"], -# floor_area=p.floor_area, -# is_cavity=False, -# current_wall_uvalue=1.7, -# is_partial=False, -# existing_li_thickness=150, -# mainheating=p.main_heating, -# main_fuel=p.main_fuel, -# mainheat_energy_eff=p.data["mainheat-energy-eff"], -# ) -# print(r["innovation_uplift"]) -# -# property_measure_types = {rec["type"] for recs in property_recommendations for rec in recs} -# property_required_measures = [m for m in property_recommendations if m[0]["type"] in []] -# measures_to_optimise = [m for m in property_recommendations if m[0]["type"] not in []] -# -# # If a measure requiring ventilation is selected, and the property does not have ventilation, we enfore -# # its inclusion -# needs_ventilation = any( -# x in property_measure_types for x in assumptions.measures_needing_ventilation -# ) and not p.has_ventilation -# -# input_measures = optimiser_functions.prepare_input_measures( -# measures_to_optimise, "Increasing EPC", needs_ventilation, True -# ) -# -# # ---- main wrapper around your optimiser ---------------------------------- -# -# # Run inputs: -# target_gain = 18.5 -# -# # Run the optimiser with these inouts - - -# tests/test_social_fabric_only.py import numpy as np import pandas as pd import pytest from copy import deepcopy from recommendations.optimiser import optimiser_functions -from recommendations.optimiser.funding_optimiser import optimise_with_funding_paths # wherever you defined it +from recommendations.optimiser.funding_optimiser import optimise_with_funding_paths, build_heat_pump_paths from backend.Funding import Funding from backend.app.plan.schemas import WALL_INSULATION_MEASURES, ROOF_INSULATION_MEASURES, ECO4_ELIGIBILE_FABRIC_MEASURES @@ -799,3 +716,14 @@ def test_private_solid_wall_no_innovation_epc_d(p, funding, mock_project_scores_ 'partial_project_funding': 2300.1000000000004, 'partial_project_score': 135.3, 'total_uplift': 0.0, 'total_uplift_score': 0.0 } + + +def test_build_heat_pump_paths(): + eg1 = build_heat_pump_paths([], ["loft_insulation"]) + + assert eg1 == [{'AND': ['loft_insulation', 'air_source_heat_pump']}] + + eg2 = build_heat_pump_paths(["internal_wall_insulation", "external_wall_insulation"], ["loft_insulation"]) + + assert eg2 == [{'AND': ['internal_wall_insulation', 'loft_insulation', 'air_source_heat_pump']}, + {'AND': ['external_wall_insulation', 'loft_insulation', 'air_source_heat_pump']}] From 6b46542d356a237eb7f72dda3eda35a0a22274d1 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 1 Jan 2026 12:58:14 +0800 Subject: [PATCH 143/202] removing funding --- backend/engine/engine.py | 169 ++---------------- .../optimiser/funding_optimiser.py | 126 ++++++++++--- 2 files changed, 112 insertions(+), 183 deletions(-) diff --git a/backend/engine/engine.py b/backend/engine/engine.py index 58203da7..6ec96aba 100644 --- a/backend/engine/engine.py +++ b/backend/engine/engine.py @@ -45,7 +45,7 @@ from etl.bill_savings.KwhData import KwhData from etl.spatial.OpenUprnClient import OpenUprnClient from etl.find_my_epc.RetrieveFindMyEpc import RetrieveFindMyEpc -from recommendations.optimiser.funding_optimiser import optimise_with_funding_paths +from recommendations.optimiser.funding_optimiser import optimise_with_funding_paths, optimise_with_scenarios from recommendations.recommendation_utils import convert_thickness_to_numeric, get_wall_u_value from utils.logger import setup_logger @@ -1069,21 +1069,6 @@ async def model_engine(body: PlanTriggerRequest): ) gain = optimiser_functions.calculate_gain(body=body, p=p, fixed_gain=fixed_gain, eco_packages=eco_packages) - funding = Funding( - tenure=body.housing_type, - project_scores_matrix=project_scores_matrix, - partial_project_scores_matrix=partial_project_scores_matrix, - whlg_eligible_postcodes=whlg_eligible_postcodes, - eco4_social_cavity_abs_rate=13, - eco4_social_solid_abs_rate=17, - eco4_private_cavity_abs_rate=13, - eco4_private_solid_abs_rate=17, - gbis_social_cavity_abs_rate=21, - gbis_social_solid_abs_rate=25, - gbis_private_cavity_abs_rate=21, - gbis_private_solid_abs_rate=28, - ) - li_thickness = convert_thickness_to_numeric( p.roof["insulation_thickness"], p.roof["is_pitched"], p.roof["is_flat"] ) @@ -1102,41 +1087,8 @@ async def model_engine(body: PlanTriggerRequest): # TODO: Turn this into a function and store the innovaiton uplift for group in measures_to_optimise_with_uplift: for r in group: - - if r["type"] in ["mechanical_ventilation", "low_energy_lighting", "secondary_heating", - "extension_cavity_wall_insulation", "draught_proofing", "sealing_open_fireplace"]: - ( - r["partial_project_score"], - r["partial_project_funding"], - r["innovation_uplift"], - r["uplift_project_score"], - ) = ( - 0, 0, 0, 0 - ) - continue - - ( - r["partial_project_score"], r["partial_project_funding"], r["innovation_uplift"], - r["uplift_project_score"] - ) = funding.get_innovation_uplift( - measure=r, - starting_sap=int(p.data["current-energy-efficiency"]), - floor_area=p.floor_area, - is_cavity=p.walls["is_cavity_wall"], - current_wall_uvalue=current_wall_u_value, - is_partial="partial" in p.walls["clean_description"].lower(), - existing_li_thickness=li_thickness, - mainheating=p.main_heating, - main_fuel=p.main_fuel, - mainheat_energy_eff=p.data["mainheat-energy-eff"], - ) - - if r["already_installed"]: - # if already installed, we zero out the uplift and funding - (r["partial_project_score"], r["partial_project_funding"], r["innovation_uplift"], - r["uplift_project_score"]) = ( - 0, 0, 0, 0 - ) + (r["partial_project_score"], r["partial_project_funding"], r["innovation_uplift"], + r["uplift_project_score"]) = (0, 0, 0, 0) input_measures = optimiser_functions.prepare_input_measures( measures_to_optimise_with_uplift, body.goal, needs_ventilation, funding=True, @@ -1146,62 +1098,36 @@ async def model_engine(body: PlanTriggerRequest): # When the goal is Increasing EPC, we can run the funding optimiser if body.goal == "Increasing EPC": - solutions = optimise_with_funding_paths( + solutions = optimise_with_scenarios( p=p, input_measures=input_measures, - housing_type=body.housing_type, budget=body.budget, target_gain=gain, - funding=funding, - work_package=eco_packages[p.id][2] + enforce_heat_pump_insulation=True, + enforce_fabric_first=False ) # if handle the empty case if solutions.empty: - scheme = "none" - funded_measures, solution = [], [] - ( - project_funding, total_uplift, full_project_score, partial_project_score, uplift_project_score, - battery_sap_score - ) = 0, 0, 0, 0, 0, 0 + solution, battery_sap_score = [], 0 else: - solutions = solutions[ - (solutions["is_eligible"] & (solutions["scheme"] != "none")) | (solutions["scheme"] == "none") - ] if solutions["meets_upgrade_target"].any(): # If we have a solution that meets the upgrade target, we select that one optimal_solution = solutions[solutions["meets_upgrade_target"]].iloc[0] else: - # Pick the cheapest + # We re-organise, taking the solution with the most gain and then the cheapest + solutions = solutions.sort_values( + by=["total_gain", "total_cost"], ascending=[False, True] + ) optimal_solution = solutions.iloc[0] - # This is the list of measures that we will recommend - scheme = optimal_solution["scheme"] - # We create this full list of selected measures, which is used in the next section for setting # default measures - solution = deepcopy(optimal_solution["items"]) + deepcopy(optimal_solution["unfunded_items"]) - funded_measures = deepcopy(optimal_solution["items"]) if scheme != "none" else [] - - # This is the total amount of funding that the project will produce (EXCLUDING uplifts) (£) - project_funding = optimal_solution["full_project_funding"] if scheme == "eco4" else \ - optimal_solution["partial_project_funding"] - # This is the total amount of funding associated to the uplift (£) - total_uplift = optimal_solution["total_uplift"] - # This is the funding scheme selected - # This is the full project ABS - full_project_score = optimal_solution["project_score"] - # This is the partial project ABS - partial_project_score = optimal_solution["partial_project_score"] - # This is the uplift score ABS - uplift_project_score = optimal_solution["total_uplift_score"] - # This is the SAP score associated to a battery - pv_size = next( - (m["array_size"] for m in optimal_solution["items"] if m["type"] == "solar_pv"), 0 - ) + solution = deepcopy(optimal_solution["items"]) + pv_size = float(optimal_solution["array_size"]) battery_sap_score = BatterySAPScorer.score( - starting_sap=optimal_solution["ending_sap"], pv_size=pv_size + starting_sap=optimal_solution["ending_sap_without_battery"], pv_size=pv_size ) else: # We optimise and then we determine eligibility for funding, based on the measures selected @@ -1216,52 +1142,6 @@ async def model_engine(body: PlanTriggerRequest): gain = optimiser.solution_gain post_sap = int(p.data["current-energy-efficiency"]) + gain - recommendation_types = [] - for measures in input_measures: - for measure in measures: - recommendation_types.append(measure["type"]) - recommendation_types = set(recommendation_types) - - has_wall_insulation_recommendation = any( - (m in recommendation_types or "+".join([m, "mechanical_ventilation"])) for m in - WALL_INSULATION_MEASURES - ) - has_roof_insulation_recommendation = any( - (m in recommendation_types or "+".join([m, "mechanical_ventilation"])) for m in - ROOF_INSULATION_MEASURES - ) - - funding.check_funding( - measures=solution, - starting_sap=int(p.data["current-energy-efficiency"]), - ending_sap=int(p.data["current-energy-efficiency"]) + sum([x["gain"] for x in solution]), - floor_area=p.floor_area, - mainheat_description=p.main_heating["clean_description"], - heating_control_description=p.main_heating_controls["clean_description"], - is_cavity=p.walls["is_cavity_wall"], - current_wall_uvalue=current_wall_u_value, - is_partial="partial" in p.walls["clean_description"].lower(), - existing_li_thickness=li_thickness, - mainheating=p.main_heating, - main_fuel=p.main_fuel, - mainheat_energy_eff=p.data["mainheat-energy-eff"], - has_wall_insulation_recommendation=has_wall_insulation_recommendation, - has_roof_insulation_recommendation=has_roof_insulation_recommendation, - ) - - # Determine the scheme - scheme = "none" - if funding.eco4_eligible: - scheme = "eco4" - if scheme == "none" and funding.gbis_eligible: - scheme = "gbis" - - funded_measures = solution if scheme in ["gbis", "eco4"] else [] - project_funding = 0 if funding.full_project_abs is not None else funding.full_project_abs - total_uplift = funding.eco4_uplift - full_project_score = 0 if funding.full_project_abs is not None else funding.full_project_abs - partial_project_score = funding.partial_project_abs - uplift_project_score = funding.eco4_uplift if scheme == "eco4" else funding.gbis_uplift pv_size = next( (m["array_size"] for m in solution if m["type"] == "solar_pv"), 0 ) @@ -1282,21 +1162,6 @@ async def model_engine(body: PlanTriggerRequest): p.id, recommendations, selected, battery_sap_score ) - # TODO: functionise - for measure in funded_measures: - if "+mechanical_ventilation" in measure["type"]: - measure["type"] = measure["type"].split("+mechanical_ventilation")[0] - - p.insert_funding( - scheme=scheme, - funded_measures=funded_measures, - project_funding=project_funding, - total_uplift=total_uplift, - full_project_score=full_project_score, - partial_project_score=partial_project_score, - uplift_project_score=uplift_project_score - ) - # when we have buildings, we tweak our solar PV recommendations as if one unit needs it, we apply it to all # of them # TODO: We can probably do better and optimise at the building level - this is temp @@ -1470,12 +1335,6 @@ async def model_engine(body: PlanTriggerRequest): session, recommendation_payload ) - funding_payload = [ - {"plan_id": plan_id_by_property[f["property_id"]], **{k: v for k, v in f.items() if k != "property_id"}} - for f in funding_to_create if f["property_id"] in plan_id_by_property - ] - db_funcs.funding_functions.bulk_upload_funding_packages(session, funding_payload) - logger.info("Work completed, updating log status") except IntegrityError as e: diff --git a/recommendations/optimiser/funding_optimiser.py b/recommendations/optimiser/funding_optimiser.py index dd7184d0..083b5e99 100644 --- a/recommendations/optimiser/funding_optimiser.py +++ b/recommendations/optimiser/funding_optimiser.py @@ -638,6 +638,7 @@ def exclude_measure_types(input_measures, excluded_types): def optimise_with_scenarios( + p, input_measures, budget=None, target_gain=None, @@ -659,6 +660,21 @@ def optimise_with_scenarios( all_measure_types.extend([x["type"] for x in inputs]) all_measure_types = list(set(all_measure_types)) + # We modify the solar PV gain, if there is a battery, to include an estimated SAP battery uplift, should + # the property hit the upgrade target, plus 1. We add the additional 1 because the higher the starting SAP, + # the lower the battery SAP uplift, so this is a conservative approach since the true SAP score is + # re-calculated later on. + optimisation_measures = deepcopy(input_measures) + for measures in optimisation_measures: + if measures[0]["type"] == "solar_pv": + for x in measures: + if x["has_battery"]: + x["battery_gain"] = BatterySAPScorer.score( + starting_sap=int(p.data["current-energy-efficiency"]) + target_gain + 1, + pv_size=x["array_size"] + ) + x["gain"] += x["battery_gain"] + if enforce_fabric_first: # If this is true, it means we only want to consider a fabric first approach. This means that # - We treat the fabric of the house first @@ -667,7 +683,9 @@ def optimise_with_scenarios( # This should be wall insulation, roof insulation, floor insulation and windows fabric_measures = WALL_INSULATION_MEASURES + ROOF_INSULATION_MEASURES + ECO4_ELIGIBILE_FABRIC_MEASURES - fabric_only_measures = [[opt for opt in group if opt["type"] in fabric_measures] for group in input_measures] + fabric_only_measures = [ + [opt for opt in group if opt["type"] in fabric_measures] for group in optimisation_measures + ] fabric_only_measures = [g for g in fabric_only_measures if g] if not fabric_only_measures: @@ -685,7 +703,7 @@ def optimise_with_scenarios( picked_fabric_types = {m["type"] for m in picked_fabric} remaining_measures = [] - for group in input_measures: + for group in optimisation_measures: kept = [m for m in group if m["type"] not in picked_fabric_types] if kept: remaining_measures.append(kept) @@ -709,15 +727,21 @@ def optimise_with_scenarios( "fixed_items": picked_fabric, "total_cost": fabric_cost + extra_cost, "total_gain": fabric_gain + extra_gain, + "already_installed_gain": sum([x["gain"] for x in picked_fabric + picked_extra if x["already_installed"]]) }) - return solutions + + return append_solution_metrics(solutions, target_gain, p) # ------------------------------------------------------------------ # Scenario 1: Air source heat pump with required insulation # ------------------------------------------------------------------ if enforce_heat_pump_insulation: # Wall measures could be IWI or EWI - remaining_wall_measures = [x for x in all_measure_types if x in WALL_INSULATION_MEASURES] + remaining_wall_measures = [ + x for x in all_measure_types if x in WALL_INSULATION_MEASURES + [ + "internal_wall_insulation+mechanical_ventilation", "external_wall_insulation+mechanical_ventilation" + ] + ] remaining_roof_measures = [x for x in all_measure_types if x in ROOF_INSULATION_MEASURES] # Mandatory structure: @@ -728,28 +752,7 @@ def optimise_with_scenarios( heat_pump_paths = build_heat_pump_paths(remaining_wall_measures, remaining_roof_measures) paths.extend(heat_pump_paths) - # ------------------------------------------------------------------ - # Scenario 2: Optimise without air source heat pump - # ------------------------------------------------------------------ - # No special path; just exclude ASHP from options and allow us to optimise. - measures_no_heat_pump = exclude_measure_types(input_measures, ["air_source_heat_pump"]) - - picked, total_cost, total_gain = run_optimizer( - measures_no_heat_pump, - budget=budget, - sub_target_gain=target_gain, - ) - - if picked is not None: - solutions.append({ - "scenario": "no_heat_pump", - "items": picked, - "fixed_items": [], - "total_cost": total_cost, - "total_gain": total_gain, - }) - - fixed_selections = expand_funding_path(input_measures, paths) + fixed_selections = expand_funding_path(optimisation_measures, paths) for fixed in fixed_selections: @@ -761,7 +764,7 @@ def optimise_with_scenarios( # Remaining measures (all other groups) remaining_measures = [ - grp for gi, grp in enumerate(input_measures) + grp for gi, grp in enumerate(optimisation_measures) if gi not in fixed_groups ] @@ -795,9 +798,76 @@ def optimise_with_scenarios( "fixed_items": fixed_items, "total_cost": total_cost, "total_gain": total_gain, + "already_installed_gain": sum([x["gain"] for x in total_items if x["already_installed"]]) }) - return solutions + # ------------------------------------------------------------------ + # Scenario 2: Optimise without air source heat pump + # ------------------------------------------------------------------ + # No special path; just exclude ASHP from options and allow us to optimise. + measures_no_heat_pump = exclude_measure_types(optimisation_measures, ["air_source_heat_pump"]) + + picked, total_cost, total_gain = run_optimizer( + measures_no_heat_pump, + budget=budget, + sub_target_gain=target_gain, + ) + + if picked is not None: + solutions.append({ + "scenario": "no_heat_pump", + "items": picked, + "fixed_items": [], + "total_cost": total_cost, + "total_gain": total_gain, + "already_installed_gain": sum([x["gain"] for x in picked if x["already_installed"]]) + }) + + solutions_df = append_solution_metrics(solutions, target_gain, p) + + return solutions_df + + +def _get_ending_sap_without_battery(x): + gain = [y["gain"] - y.get("battery_gain", 0) for y in x["items"]] + return float(sum(gain)) + + +def append_solution_metrics(solutions, target_gain, p): + """ + Given a set of solutions, this function will return a dataframe, with cost metrics appended, to allow + the end user to select the optimal solution. + :param solutions: + :param target_gain: + :return: + """ + + solutions_df = pd.DataFrame(solutions) + + if solutions_df.empty: + # We return a blank dataframe + return solutions_df + + # Given the scheme, we now check if the packages are eligible. If they *are* eligible, but they don't meet the + # final upgrade target, we then look to perform a final optimisation pass to meet the target gain. + solutions_df["meets_upgrade_target"] = solutions_df["total_gain"] >= target_gain - 0.1 + # We now can calculate the project ABS, which subtracts from the cost, but this is only relevant for ECO4 + # We flag projects that are including batteries + solutions_df["has_battery"] = solutions_df["items"].apply(has_battery) + solutions_df["array_size"] = solutions_df["items"].apply( + lambda x: sum(float(y["array_size"]) for y in x if "array_size" in y) + ) + + # We need the ending SAP, but we'll need to remove the battery SAP uplift first + + solutions_df["ending_sap_without_battery"] = solutions_df.apply( + lambda x: int(p.data["current-energy-efficiency"]) + _get_ending_sap_without_battery(x), + axis=1 + ) + + solutions_df = solutions_df.sort_values("total_cost", ascending=True) + + return solutions_df # ---- helpers ------------------------------------------------------------- From d009a7adc1494761022979f148d45183a5dbc0fd Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 1 Jan 2026 13:17:13 +0800 Subject: [PATCH 144/202] removing redundant code --- backend/engine/engine.py | 48 +++------------------------------------- 1 file changed, 3 insertions(+), 45 deletions(-) diff --git a/backend/engine/engine.py b/backend/engine/engine.py index 6ec96aba..14087f83 100644 --- a/backend/engine/engine.py +++ b/backend/engine/engine.py @@ -889,7 +889,7 @@ async def model_engine(body: PlanTriggerRequest): with db_read_session() as session: materials = db_funcs.materials_functions.get_materials(session) cleaned = get_cleaned() - project_scores_matrix, partial_project_scores_matrix, whlg_eligible_postcodes = get_funding_data() + # project_scores_matrix, partial_project_scores_matrix, whlg_eligible_postcodes = get_funding_data() kwh_client = KwhData(bucket=get_settings().DATA_BUCKET, read_consumption_data=True) @@ -1042,6 +1042,7 @@ async def model_engine(body: PlanTriggerRequest): property_instance.current_energy_bill = property_current_energy_bill # Insert the predictions into the recommendations and run the optimiser + logger.info("Optimising measures") for p in input_properties: if not recommendations.get(p.id): continue @@ -1069,18 +1070,6 @@ async def model_engine(body: PlanTriggerRequest): ) gain = optimiser_functions.calculate_gain(body=body, p=p, fixed_gain=fixed_gain, eco_packages=eco_packages) - li_thickness = convert_thickness_to_numeric( - p.roof["insulation_thickness"], p.roof["is_pitched"], p.roof["is_flat"] - ) - current_wall_u_value = p.walls["thermal_transmittance"] - if current_wall_u_value is None: - current_wall_u_value = get_wall_u_value( - clean_description=p.walls["clean_description"], - age_band=p.age_band, - is_granite_or_whinstone=p.walls["is_granite_or_whinstone"], - is_sandstone_or_limestone=p.walls["is_sandstone_or_limestone"], - ) - # We insert the innovation uplift measures_to_optimise_with_uplift = deepcopy(measures_to_optimise) @@ -1217,7 +1206,7 @@ async def model_engine(body: PlanTriggerRequest): ) property_updates, property_epc_details, property_spatial_updates = [], [], [] - plans_to_create, recommendations_to_create, funding_to_create = [], [], [] + plans_to_create, recommendations_to_create = [], [] # Prepare the data that will need to be uploaded in bulk for p in input_properties: @@ -1284,37 +1273,6 @@ async def model_engine(body: PlanTriggerRequest): ], }) - if recommendations_for_property and p.funded_measures: - funding_to_create.append({ - "property_id": p.id, - "scheme": p.scheme, - "project_funding": float(p.project_funding), - "total_uplift": float(p.total_uplift), - "full_project_score": float(p.full_project_score), - "partial_project_score": float(p.partial_project_score), - "uplift_project_score": float(p.uplift_project_score), - "measures": [ - { - "measure": ( - "cavity_wall_insulation" - if part["type"] == "extension_cavity_wall_insulation" - else "sealing_fireplace" - if part["type"] == "sealing_open_fireplace" - else part["type"] - ), - "material_id": ( - part["parts"][0]["id"] - if part.get("parts") - else None - ), - "innovation_uplift": float(part["innovation_uplift"]), - "partial_project_score": float(part["partial_project_score"]), - "uplift_project_score": float(part["uplift_project_score"]), - } - for part in p.funded_measures - ], - }) - # Bulk upload property data logger.info("Uploading property data in bulk") with db_session() as session: From 9ac3eeb8a7a5d406f0f16cb508bdb78777980ddf Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 1 Jan 2026 13:19:11 +0800 Subject: [PATCH 145/202] reduced concurrency to 5 lambdas instead of 10 --- serverless.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/serverless.yml b/serverless.yml index 38d8da89..d2d8f50a 100644 --- a/serverless.yml +++ b/serverless.yml @@ -66,7 +66,7 @@ functions: - sqs: arn: arn:aws:sqs:${self:provider.region}:${aws:accountId}:model-engine-queue batchSize: 1 - maximumConcurrency: 10 # Heavily restricts concurrency to avoid overwhelming the ldmbda limits + maximumConcurrency: 5 # Heavily restricts concurrency to avoid overwhelming the ldmbda limits resources: From 860645e30eff124866f663d47b99eb2e2792677f Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 1 Jan 2026 14:02:09 +0800 Subject: [PATCH 146/202] handling db bug for trigger api --- backend/app/db/connection.py | 23 ++ .../db/functions/recommendations_functions.py | 384 ++++++------------ backend/app/plan/router.py | 3 +- backend/engine/engine.py | 35 +- 4 files changed, 143 insertions(+), 302 deletions(-) diff --git a/backend/app/db/connection.py b/backend/app/db/connection.py index a0bbe238..74f3bd2e 100644 --- a/backend/app/db/connection.py +++ b/backend/app/db/connection.py @@ -1,4 +1,5 @@ from sqlalchemy import create_engine +from contextlib import contextmanager from backend.app.config import get_settings from sqlmodel import Session @@ -29,3 +30,25 @@ def get_db_session(): if db_engine is None: raise RuntimeError("Database is not configured. Set DATABASE_URL in environment variables.") return Session(db_engine) + + +@contextmanager +def db_session(): + session = Session(db_engine) + try: + yield session + session.commit() + except Exception: + session.rollback() + raise + finally: + session.close() + + +@contextmanager +def db_read_session(): + session = Session(db_engine, expire_on_commit=False) + try: + yield session + finally: + session.close() diff --git a/backend/app/db/functions/recommendations_functions.py b/backend/app/db/functions/recommendations_functions.py index 5b39f86e..1ffb35d6 100644 --- a/backend/app/db/functions/recommendations_functions.py +++ b/backend/app/db/functions/recommendations_functions.py @@ -9,6 +9,7 @@ from backend.app.db.models.portfolio import ( ) from backend.app.db.models.funding import FundingPackageMeasures, FundingPackage from backend.app.db.models.inspections import InspectionModel +from backend.app.db.connection import db_session, db_read_session def prepare_plan_data( @@ -350,298 +351,143 @@ def chunked(iterable, size=100): yield iterable[i:i + size] -# def fast_delete_recommendations(session, chunk): -# placeholders = ",".join(["(:p{})".format(i) for i in range(len(chunk))]) -# params = {f"p{i}": chunk[i] for i in range(len(chunk))} -# -# sql = text(f""" -# WITH ids(property_id) AS ( -# VALUES {placeholders} -# ) -# DELETE FROM recommendation r -# USING ids -# WHERE r.property_id = ids.property_id; -# """) -# -# session.execute(sql, params, execution_options={"synchronize_session": False}) +def get_property_ids(portfolio_id: int) -> list[int]: + with db_read_session() as session: + return [ + pid for (pid,) in + session.query(PropertyModel.id) + .filter(PropertyModel.portfolio_id == portfolio_id) + .all() + ] + + +def delete_property_batch(session: Session, property_ids: list[int]): + if not property_ids: + return + + # -------------------------------------------------- + # Shared subqueries (computed once) + # -------------------------------------------------- + plan_ids = ( + select(Plan.id) + .where(Plan.property_id.in_(property_ids)) + ) + + recommendation_ids = ( + select(Recommendation.id) + .where(Recommendation.property_id.in_(property_ids)) + ) + + funding_package_ids = ( + select(FundingPackage.id) + .where(FundingPackage.plan_id.in_(plan_ids)) + ) + + # -------------------------------------------------- + # Leaf tables FIRST + # -------------------------------------------------- + session.execute( + delete(RecommendationMaterials) + .where(RecommendationMaterials.recommendation_id.in_(recommendation_ids)) + ) + + session.execute( + delete(PlanRecommendations) + .where(PlanRecommendations.plan_id.in_(plan_ids)) + ) + + session.execute( + delete(FundingPackageMeasures) + .where(FundingPackageMeasures.funding_package_id.in_(funding_package_ids)) + ) + + session.execute( + delete(InspectionModel) + .where(InspectionModel.property_id.in_(property_ids)) + ) + + # -------------------------------------------------- + # Mid-level tables + # -------------------------------------------------- + session.execute( + delete(FundingPackage) + .where(FundingPackage.id.in_(funding_package_ids)) + ) -def fast_delete_recommendations(session, chunk): session.execute( delete(Recommendation) - .where(Recommendation.property_id.in_(chunk)) + .where(Recommendation.id.in_(recommendation_ids)) ) - -def clear_portfolio(session: Session, portfolio_id: int, batch_size=100): - def print_progress(prefix, i, total): - print(f"{prefix} ({i}/{total})") - - # -------------------------- - # Collect IDs up-front - # -------------------------- - property_ids = [ - p.id for p in session.query(PropertyModel.id) - .filter(PropertyModel.portfolio_id == portfolio_id) - ] - - recommendation_ids = [ - r.id for r in session.query(Recommendation.id) - .filter(Recommendation.property_id.in_(property_ids)) - ] - - plan_ids = [ - p.id for p in session.query(Plan.id) - .filter(Plan.portfolio_id == portfolio_id) - ] - - funding_package_ids = [ - fp.id for fp in session.query(FundingPackage.id) - .filter(FundingPackage.plan_id.in_(plan_ids)) - ] - - # ========== BATCH HELPERS ========== - def chunked(lst, n): - for i in range(0, len(lst), n): - yield lst[i:i + n] - - # -------------------------- - # Deleting RecommendationMaterials - # -------------------------- - rm_chunks = list(chunked(recommendation_ids, batch_size)) - total = len(rm_chunks) - for i, chunk in enumerate(rm_chunks, start=1): - print_progress("Deleting RecommendationMaterials", i, total) - session.execute( - delete(RecommendationMaterials) - .where(RecommendationMaterials.recommendation_id.in_(chunk)) - ) - - # -------------------------- - # PlanRecommendations - # -------------------------- - pr_chunks = list(chunked(plan_ids, batch_size)) - total = len(pr_chunks) - for i, chunk in enumerate(pr_chunks, start=1): - print_progress("Deleting PlanRecommendations", i, total) - session.execute( - delete(PlanRecommendations) - .where(PlanRecommendations.plan_id.in_(chunk)) - ) - - # -------------------------- - # FundingPackageMeasures - # -------------------------- - fpm_chunks = list(chunked(funding_package_ids, batch_size)) - total = len(fpm_chunks) - for i, chunk in enumerate(fpm_chunks, start=1): - print_progress("Deleting FundingPackageMeasures", i, total) - session.execute( - delete(FundingPackageMeasures) - .where(FundingPackageMeasures.funding_package_id.in_(chunk)) - ) - - # -------------------------- - # FundingPackages - # -------------------------- - fp_chunks = list(chunked(plan_ids, batch_size)) - total = len(fp_chunks) - for i, chunk in enumerate(fp_chunks, start=1): - print_progress("Deleting FundingPackages", i, total) - session.execute( - delete(FundingPackage) - .where(FundingPackage.plan_id.in_(chunk)) - ) - - # -------------------------- - # Plans - # -------------------------- - plan_chunks = list(chunked(plan_ids, batch_size)) - total = len(plan_chunks) - for i, chunk in enumerate(plan_chunks, start=1): - print_progress("Deleting Plans", i, total) - session.execute( - delete(Plan) - .where(Plan.id.in_(chunk)) - ) - - # -------------------------- - # Scenarios - # -------------------------- - print("Deleting Scenarios…") session.execute( - delete(Scenario) - .where(Scenario.portfolio_id == portfolio_id) + delete(Plan) + .where(Plan.id.in_(plan_ids)) ) - # -------------------------- - # Recommendations (fast delete) - # -------------------------- - # rec_chunks = list(chunked(property_ids, batch_size * 5)) # larger chunks for fast delete - # total = len(rec_chunks) - # for i, chunk in enumerate(rec_chunks, start=1): - # print_progress("Deleting Recommendations", i, total) - # fast_delete_recommendations(session, chunk) - rec_chunks = list(chunked(recommendation_ids, batch_size)) - total = len(rec_chunks) - for i, chunk in enumerate(rec_chunks, start=1): - print_progress("Deleting Recommendations", i, total) - session.execute( - delete(Recommendation) - .where(Recommendation.id.in_(chunk)) - ) - - # -------------------------- - # Inspections - # -------------------------- - insp_chunks = list(chunked(property_ids, batch_size)) - total = len(insp_chunks) - for i, chunk in enumerate(insp_chunks, start=1): - print_progress("Deleting Inspections", i, total) - session.execute( - delete(InspectionModel) - .where(InspectionModel.property_id.in_(chunk)) - ) - - # -------------------------- - # PropertyTargetsModel - # -------------------------- - print("Deleting PropertyTargetsModel…") - session.execute( - delete(PropertyTargetsModel) - .where(PropertyTargetsModel.portfolio_id == portfolio_id) - ) - - # -------------------------- - # PropertyDetailsEpcModel - # -------------------------- - print("Deleting PropertyDetailsEpcModel…") + # -------------------------------------------------- + # Property-scoped tables + # -------------------------------------------------- session.execute( delete(PropertyDetailsEpcModel) - .where(PropertyDetailsEpcModel.portfolio_id == portfolio_id) + .where(PropertyDetailsEpcModel.property_id.in_(property_ids)) ) - # -------------------------- - # Properties - # -------------------------- - prop_chunks = list(chunked(property_ids, batch_size)) - total = len(prop_chunks) - for i, chunk in enumerate(prop_chunks, start=1): - print_progress("Deleting Properties", i, total) + session.execute( + delete(PropertyTargetsModel) + .where(PropertyTargetsModel.property_id.in_(property_ids)) + ) + + # -------------------------------------------------- + # Properties LAST + # -------------------------------------------------- + session.execute( + delete(PropertyModel) + .where(PropertyModel.id.in_(property_ids)) + ) + + +def portfolio_has_properties(portfolio_id: int) -> bool: + with db_read_session() as session: + return session.query( + session.query(PropertyModel) + .filter(PropertyModel.portfolio_id == portfolio_id) + .exists() + ).scalar() + + +def delete_portfolio_scenarios_if_empty(portfolio_id: int): + if portfolio_has_properties(portfolio_id): + print("Properties still exist — skipping scenario deletion") + return + + with db_session() as session: session.execute( - delete(PropertyModel) - .where(PropertyModel.id.in_(chunk)) + delete(Scenario) + .where(Scenario.portfolio_id == portfolio_id) ) - session.commit() - print("Portfolio cleared.") + print("Deleted scenarios for empty portfolio") def clear_portfolio_in_batches( - session: Session, portfolio_id: int, - property_batch_size: int = 10 + property_batch_size: int = 25, ): - # Fetch all property IDs once - property_ids = [ - pid for (pid,) in - session.query(PropertyModel.id) - .filter(PropertyModel.portfolio_id == portfolio_id) - .all() - ] + property_ids = get_property_ids(portfolio_id) - def delete_for_property_batch(prop_ids): - # ---------------------------- - # Recommendations → PlanRecommendations - # ---------------------------- - rec_subq = ( - select(Recommendation.id) - .where(Recommendation.property_id.in_(prop_ids)) - ) + if not property_ids: + print("No properties found.") + delete_portfolio_scenarios_if_empty(portfolio_id) + return - session.execute( - delete(PlanRecommendations) - .where(PlanRecommendations.recommendation_id.in_(rec_subq)) - ) + total = (len(property_ids) + property_batch_size - 1) // property_batch_size - session.execute( - delete(RecommendationMaterials) - .where(RecommendationMaterials.recommendation_id.in_(rec_subq)) - ) + for i, batch in enumerate(chunked(property_ids, property_batch_size), start=1): + print(f"Deleting batch {i}/{total} ({len(batch)} properties)") + with db_session() as session: + delete_property_batch(session, batch) - session.execute( - delete(Recommendation) - .where(Recommendation.property_id.in_(prop_ids)) - ) - - # ---------------------------- - # Inspections - # ---------------------------- - session.execute( - delete(InspectionModel) - .where(InspectionModel.property_id.in_(prop_ids)) - ) - - # ---------------------------- - # Plans (scoped to these properties) - # ---------------------------- - plan_subq = ( - select(Plan.id) - .where(Plan.property_id.in_(prop_ids)) - ) - - session.execute( - delete(PlanRecommendations) - .where(PlanRecommendations.plan_id.in_(plan_subq)) - ) - - session.execute( - delete(FundingPackageMeasures) - .where( - FundingPackageMeasures.funding_package_id.in_( - select(FundingPackage.id) - .where(FundingPackage.plan_id.in_(plan_subq)) - ) - ) - ) - - session.execute( - delete(FundingPackage) - .where(FundingPackage.plan_id.in_(plan_subq)) - ) - - session.execute( - delete(Plan) - .where(Plan.id.in_(plan_subq)) - ) - - # ---------------------------- - # Property-scoped auxiliary tables - # ---------------------------- - session.execute( - delete(PropertyDetailsEpcModel) - .where(PropertyDetailsEpcModel.property_id.in_(prop_ids)) - ) - - session.execute( - delete(PropertyTargetsModel) - .where(PropertyTargetsModel.property_id.in_(prop_ids)) - ) - - # ---------------------------- - # Properties (last) - # ---------------------------- - session.execute( - delete(PropertyModel) - .where(PropertyModel.id.in_(prop_ids)) - ) - - # -------- BATCH DELETE LOOP -------- - property_chunks = list(chunked(property_ids, property_batch_size)) - total_batches = len(property_chunks) - - for i, prop_ids in enumerate(property_chunks, start=1): - print(f"Deleting batch {i}/{total_batches} ({len(prop_ids)} properties)") - delete_for_property_batch(prop_ids) - session.commit() + # scenario deletion happens AFTER all properties are gone + delete_portfolio_scenarios_if_empty(portfolio_id) print("Portfolio cleared in batches.") diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index 5de6b74e..ea41162f 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -73,7 +73,7 @@ async def trigger_plan_entrypoint(body: PlanTriggerRequest): created_at = datetime.now().isoformat() with db_session() as session: # Create a new scenario - new_scenario = create_scenario( + scenario_id = create_scenario( session=session, scenario={ "name": body.scenario_name, @@ -91,7 +91,6 @@ async def trigger_plan_entrypoint(body: PlanTriggerRequest): "multi_plan": body.multi_plan } ) - scenario_id = new_scenario.id # Insert the scenario ID into the data payload data["scenario_id"] = scenario_id diff --git a/backend/engine/engine.py b/backend/engine/engine.py index 14087f83..eb933cc0 100644 --- a/backend/engine/engine.py +++ b/backend/engine/engine.py @@ -8,10 +8,7 @@ import pandas as pd import numpy as np from uuid import UUID -from backend.Funding import Funding from backend.SearchEpc import SearchEpc -from contextlib import contextmanager -from sqlmodel import Session from etl.epc.Record import EPCRecord from sqlalchemy.exc import IntegrityError, OperationalError @@ -19,11 +16,11 @@ from starlette.responses import Response from backend.app.BatterySapScorer import BatterySAPScorer from backend.app.config import get_settings, get_prediction_buckets -from backend.app.db.connection import db_engine +from backend.app.db.connection import db_session, db_read_session import backend.app.db.functions as db_funcs from backend.app.db.functions.tasks.Tasks import SubTaskInterface -from backend.app.plan.schemas import PlanTriggerRequest, WALL_INSULATION_MEASURES, ROOF_INSULATION_MEASURES +from backend.app.plan.schemas import PlanTriggerRequest from backend.app.plan.utils import ( get_cleaned, patch_epc, extract_property_request_data, parse_eco_packages, handle_error, build_cloudwatch_log_url ) @@ -31,6 +28,7 @@ from backend.app.utils import sap_to_epc import backend.app.assumptions as assumptions from backend.ml_models.api import ModelApi +from backend.ml_models.Valuation import PropertyValuation from backend.Property import Property from backend.apis.GoogleSolarApi import GoogleSolarApi from backend.addresses.Addresses import Addresses @@ -39,15 +37,12 @@ from recommendations.optimiser.CostOptimiser import CostOptimiser from recommendations.optimiser.GainOptimiser import GainOptimiser import recommendations.optimiser.optimiser_functions as optimiser_functions from recommendations.Recommendations import Recommendations -from backend.ml_models.Valuation import PropertyValuation +from recommendations.optimiser.funding_optimiser import optimise_with_scenarios from etl.bill_savings.KwhData import KwhData from etl.spatial.OpenUprnClient import OpenUprnClient from etl.find_my_epc.RetrieveFindMyEpc import RetrieveFindMyEpc -from recommendations.optimiser.funding_optimiser import optimise_with_funding_paths, optimise_with_scenarios -from recommendations.recommendation_utils import convert_thickness_to_numeric, get_wall_u_value - from utils.logger import setup_logger from utils.s3 import read_dataframe_from_s3_parquet, read_csv_from_s3, read_excel_from_s3 @@ -530,28 +525,6 @@ def extract_address_data(config, body): return uprn, address1, full_address -@contextmanager -def db_session(): - session = Session(db_engine) - try: - yield session - session.commit() - except Exception: - session.rollback() - raise - finally: - session.close() - - -@contextmanager -def db_read_session(): - session = Session(db_engine, expire_on_commit=False) - try: - yield session - finally: - session.close() - - async def model_engine(body: PlanTriggerRequest): logger.info("Model Engine triggered with body: %s", json.loads(body.model_dump_json())) From 3fc1e69e50f5e0926a6f41ab0993108406010751 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 2 Jan 2026 17:39:48 +0800 Subject: [PATCH 147/202] debugging some failed runs --- .../db/functions/recommendations_functions.py | 158 +++++++++++------- .../epc_attributes/HotWaterAttributes.py | 1 + .../epc_attributes/WallAttributes.py | 2 + recommendations/WallRecommendations.py | 2 + 4 files changed, 105 insertions(+), 58 deletions(-) diff --git a/backend/app/db/functions/recommendations_functions.py b/backend/app/db/functions/recommendations_functions.py index 1ffb35d6..726e919c 100644 --- a/backend/app/db/functions/recommendations_functions.py +++ b/backend/app/db/functions/recommendations_functions.py @@ -1,3 +1,4 @@ +from sqlalchemy import text from sqlalchemy import insert, delete, select from sqlalchemy.orm import Session from sqlalchemy.exc import SQLAlchemyError @@ -365,84 +366,122 @@ def delete_property_batch(session: Session, property_ids: list[int]): if not property_ids: return - # -------------------------------------------------- - # Shared subqueries (computed once) - # -------------------------------------------------- - plan_ids = ( - select(Plan.id) - .where(Plan.property_id.in_(property_ids)) - ) + params = {"property_ids": property_ids} - recommendation_ids = ( - select(Recommendation.id) - .where(Recommendation.property_id.in_(property_ids)) - ) - - funding_package_ids = ( - select(FundingPackage.id) - .where(FundingPackage.plan_id.in_(plan_ids)) + # -------------------------------------------------- + # recommendation_materials (via recommendation) + # -------------------------------------------------- + session.execute( + text(""" + DELETE FROM recommendation_materials rm + USING recommendation r + WHERE rm.recommendation_id = r.id + AND r.property_id = ANY(:property_ids) + """), + params, ) # -------------------------------------------------- - # Leaf tables FIRST + # plan_recommendations (via plan) # -------------------------------------------------- session.execute( - delete(RecommendationMaterials) - .where(RecommendationMaterials.recommendation_id.in_(recommendation_ids)) - ) - - session.execute( - delete(PlanRecommendations) - .where(PlanRecommendations.plan_id.in_(plan_ids)) - ) - - session.execute( - delete(FundingPackageMeasures) - .where(FundingPackageMeasures.funding_package_id.in_(funding_package_ids)) - ) - - session.execute( - delete(InspectionModel) - .where(InspectionModel.property_id.in_(property_ids)) + text(""" + DELETE FROM plan_recommendations pr + USING plan p + WHERE pr.plan_id = p.id + AND p.property_id = ANY(:property_ids) + """), + params, ) # -------------------------------------------------- - # Mid-level tables + # funding_package_measures # -------------------------------------------------- session.execute( - delete(FundingPackage) - .where(FundingPackage.id.in_(funding_package_ids)) - ) - - session.execute( - delete(Recommendation) - .where(Recommendation.id.in_(recommendation_ids)) - ) - - session.execute( - delete(Plan) - .where(Plan.id.in_(plan_ids)) + text(""" + DELETE FROM funding_package_measures fpm + USING funding_package fp, plan p + WHERE fpm.funding_package_id = fp.id + AND fp.plan_id = p.id + AND p.property_id = ANY(:property_ids) + """), + params, ) # -------------------------------------------------- - # Property-scoped tables + # inspections (direct) # -------------------------------------------------- session.execute( - delete(PropertyDetailsEpcModel) - .where(PropertyDetailsEpcModel.property_id.in_(property_ids)) - ) - - session.execute( - delete(PropertyTargetsModel) - .where(PropertyTargetsModel.property_id.in_(property_ids)) + text(""" + DELETE FROM inspections + WHERE property_id = ANY(:property_ids) + """), + params, ) # -------------------------------------------------- - # Properties LAST + # funding_package # -------------------------------------------------- session.execute( - delete(PropertyModel) - .where(PropertyModel.id.in_(property_ids)) + text(""" + DELETE FROM funding_package fp + USING plan p + WHERE fp.plan_id = p.id + AND p.property_id = ANY(:property_ids) + """), + params, + ) + + # -------------------------------------------------- + # recommendation (direct — CRITICAL FIX) + # -------------------------------------------------- + session.execute( + text(""" + DELETE FROM recommendation + WHERE property_id = ANY(:property_ids) + """), + params, + ) + + # -------------------------------------------------- + # plan (direct) + # -------------------------------------------------- + session.execute( + text(""" + DELETE FROM plan + WHERE property_id = ANY(:property_ids) + """), + params, + ) + + # -------------------------------------------------- + # property-scoped tables + # -------------------------------------------------- + session.execute( + text(""" + DELETE FROM property_details_epc + WHERE property_id = ANY(:property_ids) + """), + params, + ) + + session.execute( + text(""" + DELETE FROM property_targets + WHERE property_id = ANY(:property_ids) + """), + params, + ) + + # -------------------------------------------------- + # properties LAST + # -------------------------------------------------- + session.execute( + text(""" + DELETE FROM property + WHERE id = ANY(:property_ids) + """), + params, ) @@ -481,11 +520,14 @@ def clear_portfolio_in_batches( return total = (len(property_ids) + property_batch_size - 1) // property_batch_size - + import time for i, batch in enumerate(chunked(property_ids, property_batch_size), start=1): print(f"Deleting batch {i}/{total} ({len(batch)} properties)") + start_time = time.time() with db_session() as session: delete_property_batch(session, batch) + finish_time = time.time() + print(f"Batch {i} deleted in {finish_time - start_time:.2f} seconds") # scenario deletion happens AFTER all properties are gone delete_portfolio_scenarios_if_empty(portfolio_id) diff --git a/etl/epc_clean/epc_attributes/HotWaterAttributes.py b/etl/epc_clean/epc_attributes/HotWaterAttributes.py index 9966edea..53cd2f97 100644 --- a/etl/epc_clean/epc_attributes/HotWaterAttributes.py +++ b/etl/epc_clean/epc_attributes/HotWaterAttributes.py @@ -100,6 +100,7 @@ class HotWaterAttributes(Definitions): WELSH_TEXT = { "ogçör brif system": "from main system", "o r brif system": "from main system", + "o’r brif system": "from main system", "ogçör brif system, adfer gwres nwyon ffliw": "from main system, flue gas heat recovery", "bwyler/cylchredydd nwy": "gas boiler/circulator", "ogçör brif system, dim thermostat ar y silindr": "from main system, no cylinder thermostat", diff --git a/etl/epc_clean/epc_attributes/WallAttributes.py b/etl/epc_clean/epc_attributes/WallAttributes.py index a390e0a5..0e211983 100644 --- a/etl/epc_clean/epc_attributes/WallAttributes.py +++ b/etl/epc_clean/epc_attributes/WallAttributes.py @@ -39,6 +39,8 @@ class WallAttributes(Definitions): "Waliau ceudod, fel yGÇÖu hadeiladwyd, wediGÇÖu hinswleiddio (rhagdybiaeth)": "Cavity wall, as built, " "insulated (assumed)", "Waliau ceudod, fel yGÇÖu hadeiladwyd, wediGÇÖu hinswleiddio": "Cavity wall, as built, insulated", + "Waliau ceudod, fel y’u hadeiladwyd, wedi’u hinswleiddio (rhagdybiaeth)": "Cavity wall, as built, insulated (" + "assumed)", "Gwenithfaen neu risgraig, fel yGÇÖu hadeiladwyd, dim inswleiddio (rhagdybiaeth)": "Granite or whinstone, " "as built, no insulation (" "assumed)", diff --git a/recommendations/WallRecommendations.py b/recommendations/WallRecommendations.py index 328f1ab8..49483d2f 100644 --- a/recommendations/WallRecommendations.py +++ b/recommendations/WallRecommendations.py @@ -72,6 +72,7 @@ class WallRecommendations(Definitions): 'Timber frame, as built, partial insulation': 'Timber frame, with external insulation', "Sandstone or limestone, as built, no insulation": "Sandstone or limestone, with external insulation", "Sandstone, as built, no insulation": "Sandstone, with external insulation", + "Sandstone, as built, partial insulation": "Sandstone, with external insulation", } # These are the ending descriptions we consider for walls with internal insulation @@ -88,6 +89,7 @@ class WallRecommendations(Definitions): 'Timber frame, as built, partial insulation': 'Timber frame, with internal insulation', "Sandstone or limestone, as built, no insulation": "Sandstone or limestone, with internal insulation", "Sandstone, as built, no insulation": "Sandstone, with internal insulation", + "Sandstone, as built, partial insulation": "Sandstone, with internal insulation", } def __init__( From ddd4aa479b4b79cc15721c9230a623bb92ecc60b Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 2 Jan 2026 20:23:50 +0800 Subject: [PATCH 148/202] fixed bug with wall attributes cleaning --- backend/Property.py | 3 +++ etl/epc_clean/epc_attributes/WallAttributes.py | 3 +++ 2 files changed, 6 insertions(+) diff --git a/backend/Property.py b/backend/Property.py index 10af56cc..70a70307 100644 --- a/backend/Property.py +++ b/backend/Property.py @@ -614,6 +614,9 @@ class Property: # Handling edge case for walls fill_with = False if description == "walls-description" else None fill_dict = dict(zip(template.keys(), [fill_with] * len(template))) + if description == "walls-description": + fill_dict["thermal_transmittance_unit"] = None + fill_dict["insulation_thickness"] = "none" fill_dict.update( { diff --git a/etl/epc_clean/epc_attributes/WallAttributes.py b/etl/epc_clean/epc_attributes/WallAttributes.py index 0e211983..075dee96 100644 --- a/etl/epc_clean/epc_attributes/WallAttributes.py +++ b/etl/epc_clean/epc_attributes/WallAttributes.py @@ -148,6 +148,9 @@ class WallAttributes(Definitions): for key in self.DEFAULT_KEYS: result[key] = False + result["thermal_transmittance_unit"] = None + result["insulation_thickness"] = "none" + return result description = self.description.lower() From 8c7f5f8fb112c16222b8408b981dd04e0f2b6390 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 2 Jan 2026 20:44:34 +0800 Subject: [PATCH 149/202] fixed minor bugs --- backend/engine/engine.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/engine/engine.py b/backend/engine/engine.py index eb933cc0..f9820204 100644 --- a/backend/engine/engine.py +++ b/backend/engine/engine.py @@ -730,7 +730,7 @@ async def model_engine(body: PlanTriggerRequest): epc_searcher.ordnance_survey_client.property_type = addr.property_type # For the moment, our OS API access is unavailable, so we skip and interpolate - epc_searcher.find_property(skip_os=True, api_data=epc_api_data, overwrite_sap05=True) + epc_searcher.find_property(skip_os=True, api_data=None, overwrite_sap05=True) epc_searcher.set_uprn_source(file_format=body.file_format) lookup_key = ( From e93ee337885df0b779c96293a8e7700428d9888d Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sat, 3 Jan 2026 15:28:46 +0800 Subject: [PATCH 150/202] Fixing bug in EPC estimation when we have a heating system and do not find a matching property --- backend/SearchEpc.py | 29 +++++++++++---- backend/engine/engine.py | 2 +- .../d_restart_failed_subtasks.py | 36 +++++++++++++++++++ .../epc_attributes/FloorAttributes.py | 2 ++ 4 files changed, 62 insertions(+), 7 deletions(-) diff --git a/backend/SearchEpc.py b/backend/SearchEpc.py index 9af7330b..deb17ba5 100644 --- a/backend/SearchEpc.py +++ b/backend/SearchEpc.py @@ -663,7 +663,10 @@ class SearchEpc: params["property-type"] = property_type_api_map[property_type] # We take the 20 nearest homes of the relevant type, so not to pull in too many irrelevant homes - epc_response = self.get_epc(params=params, size=100) + # If we get to the final iteration, we fetch more + + size = 1000 if len(postcode) <= 2 else 100 + epc_response = self.get_epc(params=params, size=size) if epc_response["status"] == 200: epc_data = pd.DataFrame(self.data["rows"]) @@ -690,6 +693,12 @@ class SearchEpc: if not epc_data.empty: epc_data = epc_data[~epc_data["mainheat-description"].str.lower().str.contains("sap05:")] + if not epc_data.empty and heating_system is not None: + # If we arrive at the final iteration, we allow ourself to be less strict on heating system + epc_data = epc_data[ + epc_data["mainheat-description"] == heating_system + ] + if not epc_data.empty: # Further processing of the EPC data @@ -747,6 +756,19 @@ class SearchEpc: estimation_built_form = "Enclosed Mid-Terrace" else: estimation_built_form = "Mid-Terrace" + elif built_form == "Detached" and property_type == "Flat": + # We add in a fallback to detached flats, where it can be rarer to see properties of this type + if len(postcode) <= 2: + if sum(epc_data["built-form"] == built_form) > 0: + estimation_built_form = built_form + elif sum(epc_data["built-form"] == "Semi-Detached") > 0: + estimation_built_form = "Semi-Detached" + elif sum(epc_data["built-form"] == "End-Terrace") > 0: + estimation_built_form = "End-Terrace" + else: + estimation_built_form = "Mid-Terrace" + else: + estimation_built_form = built_form else: estimation_built_form = built_form @@ -782,11 +804,6 @@ class SearchEpc: epc_data["property-type"] == estimation_property_type) ] - if heating_system is not None: - epc_data = epc_data[ - epc_data["mainheat-description"] == heating_system - ] - if not epc_data.empty: return epc_data # Return the filtered data if it's not empty diff --git a/backend/engine/engine.py b/backend/engine/engine.py index f9820204..eb933cc0 100644 --- a/backend/engine/engine.py +++ b/backend/engine/engine.py @@ -730,7 +730,7 @@ async def model_engine(body: PlanTriggerRequest): epc_searcher.ordnance_survey_client.property_type = addr.property_type # For the moment, our OS API access is unavailable, so we skip and interpolate - epc_searcher.find_property(skip_os=True, api_data=None, overwrite_sap05=True) + epc_searcher.find_property(skip_os=True, api_data=epc_api_data, overwrite_sap05=True) epc_searcher.set_uprn_source(file_format=body.file_format) lookup_key = ( diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/d_restart_failed_subtasks.py b/etl/customers/peabody/Nov 2025 Consulting Project/d_restart_failed_subtasks.py index 5ef901b2..a8ab230f 100644 --- a/etl/customers/peabody/Nov 2025 Consulting Project/d_restart_failed_subtasks.py +++ b/etl/customers/peabody/Nov 2025 Consulting Project/d_restart_failed_subtasks.py @@ -10,3 +10,39 @@ Additionally, we wil find the problematic records and remove them Given we ran an EPC C scenario, we should check how many properties, below EPC C we have, that have no plan or recommendations in case something went wrong """ +import pandas as pd +from sqlalchemy.orm import Session +from backend.app.db.models.portfolio import PropertyModel +from backend.app.db.connection import db_session + + +def get_uprns_for_portfolio(session: Session, portfolio_id: int) -> list[int]: + return [ + uprn + for (uprn,) in + session.query(PropertyModel.uprn) + .filter(PropertyModel.portfolio_id == portfolio_id) + .all() + if uprn is not None + ] + + +with db_session() as session: + completed_uprns = get_uprns_for_portfolio(session, 419) + +# We now find the portfolio of the SAL, which we did not set off +sal = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20251213 Model " + "data.xlsx", + sheet_name="Standardised Asset List" +) + +missed_properties = sal[~sal["epc_os_uprn"].isin(completed_uprns)] + +# Store +missed_properties.to_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/" + "d_failed_properties_to_restart_20260102.xlsx", + sheet_name="Standardised Asset List", + index=False +) diff --git a/etl/epc_clean/epc_attributes/FloorAttributes.py b/etl/epc_clean/epc_attributes/FloorAttributes.py index 47013aaa..cd1499c2 100644 --- a/etl/epc_clean/epc_attributes/FloorAttributes.py +++ b/etl/epc_clean/epc_attributes/FloorAttributes.py @@ -133,6 +133,8 @@ class FloorAttributes(Definitions): result["another_property_below"] = ( "(another dwelling below)" in description or "(other premises below)" in description + or "another dwelling below" in description + or "other premises below" in description ) thickness_map = { From c8b717f2f8b1c832657340fbf8111b46096044f1 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sat, 3 Jan 2026 15:29:33 +0800 Subject: [PATCH 151/202] bump up concurrency to 7 --- serverless.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/serverless.yml b/serverless.yml index d2d8f50a..6c042739 100644 --- a/serverless.yml +++ b/serverless.yml @@ -66,7 +66,7 @@ functions: - sqs: arn: arn:aws:sqs:${self:provider.region}:${aws:accountId}:model-engine-queue batchSize: 1 - maximumConcurrency: 5 # Heavily restricts concurrency to avoid overwhelming the ldmbda limits + maximumConcurrency: 7 # Heavily restricts concurrency to avoid overwhelming the ldmbda limits resources: From e072d40fa8178de400927584f0f649b91d8dcf59 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sun, 4 Jan 2026 13:29:41 +0800 Subject: [PATCH 152/202] fixed minor bug with epcsearcher and increase concurrency to 10 --- backend/SearchEpc.py | 5 +- .../d_restart_failed_subtasks.py | 114 ++++++++++++++++++ serverless.yml | 2 +- 3 files changed, 118 insertions(+), 3 deletions(-) diff --git a/backend/SearchEpc.py b/backend/SearchEpc.py index deb17ba5..e5bd28da 100644 --- a/backend/SearchEpc.py +++ b/backend/SearchEpc.py @@ -540,7 +540,7 @@ class SearchEpc: newest_epc, older_epcs = self.filter_newest_epc(list_of_epcs=rows) # Ge the uprn from the newest record for this home - uprns = {r["uprn"] for r in rows if r["uprn"]} + uprns = {str(r["uprn"]) for r in rows if r["uprn"]} # We can sometimes have no uprn for a property if (len(uprns) == 0) and len(rows) > 0: logger.warning("Found data but missing uprn") @@ -569,7 +569,8 @@ class SearchEpc: f"Provided UPRN {self.uprn} does not match EPC UPRN {epc_uprn}, using provided UPRN" ) # We overwrite but in this instance, we've likely got the wrong EPC data - newest_epc["uprn"] = self.uprn + # Insert as a string - same format as the raw data + newest_epc["uprn"] = str(self.uprn) if self.fast: return newest_epc, [], {}, "", "", "" diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/d_restart_failed_subtasks.py b/etl/customers/peabody/Nov 2025 Consulting Project/d_restart_failed_subtasks.py index a8ab230f..68978b08 100644 --- a/etl/customers/peabody/Nov 2025 Consulting Project/d_restart_failed_subtasks.py +++ b/etl/customers/peabody/Nov 2025 Consulting Project/d_restart_failed_subtasks.py @@ -46,3 +46,117 @@ missed_properties.to_excel( sheet_name="Standardised Asset List", index=False ) + +# Fixing an error - triggered jobs without removing EWI/IWI so need to delete all plans associated to these scenarios: +scenario_id = None + +from sqlalchemy import select, func +from sqlalchemy.orm import Session +from backend.app.db.models.recommendations import Plan + + +def count_plans_for_scenario(session: Session, scenario_id: int) -> int: + return session.execute( + select(func.count()) + .select_from(Plan) + .where(Plan.scenario_id == scenario_id) + ).scalar_one() + + +with db_session() as session: + n_plans = count_plans_for_scenario(session, scenario_id) + + +def get_plan_ids_for_scenario(session: Session, scenario_id: int) -> list[int]: + result = session.execute( + select(Plan.id) + .where(Plan.scenario_id == scenario_id) + ) + return [row.id for row in result] + + +with db_session() as session: + plan_ids = get_plan_ids_for_scenario(session, scenario_id) + +from sqlalchemy import text +from sqlalchemy.orm import Session + + +def chunked(iterable, size): + for i in range(0, len(iterable), size): + yield iterable[i:i + size] + + +from sqlalchemy import text +from sqlalchemy.orm import Session + + +def delete_plan_batch(session: Session, plan_ids: list[int]): + if not plan_ids: + return + + session.execute(text("SET LOCAL lock_timeout = '5s'")) + + params = {"plan_ids": plan_ids} + + # ---------------------------- + # recommendation_materials + # ---------------------------- + session.execute( + text(""" + DELETE FROM recommendation_materials rm + USING plan_recommendations pr + WHERE rm.recommendation_id = pr.recommendation_id + AND pr.plan_id = ANY(:plan_ids) + """), + params, + ) + + # ---------------------------- + # plan_recommendations + # ---------------------------- + session.execute( + text(""" + DELETE FROM plan_recommendations + WHERE plan_id = ANY(:plan_ids) + """), + params, + ) + + # ---------------------------- + # recommendations (only those used by these plans) + # ---------------------------- + session.execute( + text(""" + DELETE FROM recommendation r + WHERE r.id IN ( + SELECT DISTINCT recommendation_id + FROM plan_recommendations + WHERE plan_id = ANY(:plan_ids) + ) + """), + params, + ) + + # ---------------------------- + # plans LAST + # ---------------------------- + session.execute( + text(""" + DELETE FROM plan + WHERE id = ANY(:plan_ids) + """), + params, + ) + + +batch_size = 25 +total = (len(plan_ids) + batch_size - 1) // batch_size + +for i, batch in enumerate(chunked(plan_ids, batch_size), start=1): + print(f"Deleting plan batch {i}/{total} ({len(batch)} plans)") + + with db_session() as session: + delete_plan_batch(session, batch) + + print(f"Batch {i} committed") diff --git a/serverless.yml b/serverless.yml index 6c042739..38d8da89 100644 --- a/serverless.yml +++ b/serverless.yml @@ -66,7 +66,7 @@ functions: - sqs: arn: arn:aws:sqs:${self:provider.region}:${aws:accountId}:model-engine-queue batchSize: 1 - maximumConcurrency: 7 # Heavily restricts concurrency to avoid overwhelming the ldmbda limits + maximumConcurrency: 10 # Heavily restricts concurrency to avoid overwhelming the ldmbda limits resources: From 10b1ef4b2a86afa1c8950618a58c8e9190ed8179 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sun, 4 Jan 2026 13:55:15 +0800 Subject: [PATCH 153/202] handlging edge case in search epc --- backend/SearchEpc.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/backend/SearchEpc.py b/backend/SearchEpc.py index e5bd28da..a633176e 100644 --- a/backend/SearchEpc.py +++ b/backend/SearchEpc.py @@ -551,7 +551,15 @@ class SearchEpc: # Take the uprn from the most recent uprns = {newest_epc["uprn"]} else: - raise ValueError("Multiple UPRNs found - investigate me") + # We check if we have UPRNs that match the one we're given and if so, filter on those + if self.uprn is not None: + uprns = {u for u in uprns if int(u) == self.uprn} + if len(uprns) == 1: + logger.info( + f"Multiple UPRNs found but one matches provided UPRN {self.uprn}, using this UPRN" + ) + else: + raise ValueError("Multiple UPRNs found - investigate me") # if uprns: # epc_uprn = uprns.pop() From eb347a4dfe38f29b514969e3767ac15ee1d7c503 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 5 Jan 2026 18:10:27 +0000 Subject: [PATCH 154/202] boosting concurrency to 12 and adding catch for OS no data --- .../e_additional_uprns.py | 145 ++++++++++++++++++ etl/spatial/OpenUprnClient.py | 14 +- serverless.yml | 2 +- 3 files changed, 159 insertions(+), 2 deletions(-) create mode 100644 etl/customers/peabody/Nov 2025 Consulting Project/e_additional_uprns.py diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/e_additional_uprns.py b/etl/customers/peabody/Nov 2025 Consulting Project/e_additional_uprns.py new file mode 100644 index 00000000..7b7ab5ac --- /dev/null +++ b/etl/customers/peabody/Nov 2025 Consulting Project/e_additional_uprns.py @@ -0,0 +1,145 @@ +# We look to match the missed properties to the UPRNS that were sent over by Peabody +from tqdm import tqdm +import pandas as pd +import os +from utils.s3 import read_dataframe_from_s3_parquet + +cleaned_uprns = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting " + "Project/PeabodyPropertymatched_Dec25_propref_UPRN.xlsx" +) + +# Grab the problematic records +problematic_records = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting " + "Project/data_validation/to_standardise_uprns.xlsx" +) +# Remove dupe on Org Ref +problematic_records = problematic_records.drop_duplicates("Org Ref") + +df = problematic_records.merge( + cleaned_uprns, + left_on="Org Ref", + right_on="reference" +) + +# df_had_uprn = df[~pd.isnull(df["UPRN"])] + +# We prepare the data for analysis +df["landlord_property_id"] = df["Org Ref"].copy() +df["domna_property_id"] = df["Org Ref"].copy() + +df = df.rename( + columns={ + "Address 1": "domna_address_1", + "Postcode": "postcode", + "Type": "landlord_property_type", + "Attachment": "landlord_built_form", + "Heating": "landlord_heating_system", + "out_uprn": "epc_os_uprn" + } +) + + +def make_full_address(x): + to_join = [x['domna_address_1'], x['Address 2'], x['Address 3']] + to_join = [x for x in to_join if not pd.isnull(x) and x != ''] + return ", ".join(to_join) + + +df["domna_full_address"] = df.apply(lambda x: make_full_address(x), axis=1) + +df = df[ + [ + "domna_address_1", "Address 2", "Address 3", "postcode", "landlord_property_type", + "landlord_built_form", "landlord_heating_system", "epc_os_uprn", "Total Floor Area (m2)", + "domna_property_id", "domna_full_address" + ] +] + +df["landlord_built_form"] = df["landlord_built_form"].map( + { + "MidTerrace": "Mid-Terrace", + "EndTerrace": "End-Terrace", + "SemiDetached": "Semi-Detached", + "Detached": "Detached", + "EnclosedEndTerrace": "Enclosed End-Terrace", + "EnclosedMidTerrace": "Enclosed Mid-Terrace", + } +) + +# We have a lot of dupes - remove them +df["epc_os_uprn"].duplicated().sum() + +dupe_uprns = df[df["epc_os_uprn"].duplicated()]["epc_os_uprn"].values +dupe_df = df[df["epc_os_uprn"].isin(dupe_uprns)] +dupe_df = dupe_df.sort_values("epc_os_uprn", ascending=True) +# Remove clear duplicate UPRNs because of unreliability +df = df[~df["epc_os_uprn"].isin(dupe_uprns)] + +filename = ( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20260105 - additional " + "UPRNS.xlsx" +) +with pd.ExcelWriter(filename) as writer: + df.to_excel(writer, sheet_name="Standardised Asset List", index=False) + +# Check these are valid +# We check UPRN validity against our OS data +# uprn_filenames = read_dataframe_from_s3_parquet( +# bucket_name="retrofit-data-dev", file_key="spatial/filename_meta.parquet" +# ) +# +# # We're going to: +# # 1) Grab a filename +# # 2) Read it in +# # 3) Check which UPRNS from our data are in that file +# # 4) Keep a record of which UPRNS were found where +# +# for uprn_file in tqdm(uprn_filenames['filenames'].values, total=len(uprn_filenames)): +# spatial_data = read_dataframe_from_s3_parquet( +# bucket_name="retrofit-data-dev", file_key=f"spatial/{uprn_file}" +# ) +# +# uprns_in_file = df[ +# df['out_uprn'].astype('Int64').isin(spatial_data['UPRN'].astype('Int64').values) +# ].copy() +# +# print("Found {} UPRNS in file {}".format(len(uprns_in_file), uprn_file)) +# if len(uprns_in_file) > 0: +# # Store the found UPRNS in the validation cache +# data_to_store = uprns_in_file[["Org Ref", "UPRN"]].copy() +# data_to_store["Source File"] = uprn_file +# # Store +# data_to_store.to_csv( +# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting " +# f"Project/data_validation/missing_uprn_validation_cache/{uprn_file.split('.parquet')[0]}_found_uprns.csv", +# index=False +# ) +# +# storage_locations = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting " +# "Project/data_validation/missing_uprn_validation_cache") +# # List contents +# folder_contents = os.listdir(storage_locations) +# # Grab files and concatenate +# all_found_uprns = [] +# for file in folder_contents: +# if file.endswith("_found_uprns.csv"): +# df = pd.read_csv(os.path.join(storage_locations, file)) +# all_found_uprns.append(df) +# +# all_found_uprns = pd.concat(all_found_uprns) +# +# invalid = df[ +# ~df["Org Ref"].isin(all_found_uprns["Org Ref"].values) +# ] +# +# uprn_example = 10095401237 +# eg = uprn_filenames[ +# (uprn_filenames["upper"] >= uprn_example) & (uprn_filenames["lower"] <= uprn_example) +# ] +# eg2 = read_dataframe_from_s3_parquet( +# bucket_name="retrofit-data-dev", file_key=f"spatial/{eg['filenames'].values[0]}" +# ) +# +# eg2[eg2["UPRN"] == uprn_example] diff --git a/etl/spatial/OpenUprnClient.py b/etl/spatial/OpenUprnClient.py index 36cf2d7b..8cef80b1 100644 --- a/etl/spatial/OpenUprnClient.py +++ b/etl/spatial/OpenUprnClient.py @@ -150,9 +150,21 @@ class OpenUprnClient: ) spatial_df = spatial_data[spatial_data["UPRN"].isin(associated_uprn)] + # If this is empty, we get the nearest property + for p in input_properties: if p.uprn in associated_uprn: - p.set_spatial(spatial_df[spatial_df["UPRN"] == p.uprn]) + p_spatial_df = spatial_df[spatial_df["UPRN"] == p.uprn] + if p_spatial_df.empty: + # Backup method - take the closest UPRN as a proxy + logger.info("Ordnance survey not found - faking the cloest property for a best estimation") + p_spatial_df = spatial_data.loc[ + (spatial_data["UPRN"] - p.uprn).abs().idxmin() + ].copy() + p_spatial_df["LATITUDE"], p_spatial_df["LONGITUDE"] = None, None + p_spatial_df = p_spatial_df.to_frame().T + + p.set_spatial(p_spatial_df) if p.uprn_source == SearchEpc.UPRN_SOURCE_SIMULATED: p.set_spatial(cls.empty_spatial_df()) diff --git a/serverless.yml b/serverless.yml index 38d8da89..f3def028 100644 --- a/serverless.yml +++ b/serverless.yml @@ -66,7 +66,7 @@ functions: - sqs: arn: arn:aws:sqs:${self:provider.region}:${aws:accountId}:model-engine-queue batchSize: 1 - maximumConcurrency: 10 # Heavily restricts concurrency to avoid overwhelming the ldmbda limits + maximumConcurrency: 12 # Heavily restricts concurrency to avoid overwhelming the ldmbda limits resources: From 13ba6c0159cecf2943ab0e3c527d06e751cab9b6 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 5 Jan 2026 18:36:07 +0000 Subject: [PATCH 155/202] added missing heating --- backend/app/assumptions.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/backend/app/assumptions.py b/backend/app/assumptions.py index 898f586b..97a1df76 100644 --- a/backend/app/assumptions.py +++ b/backend/app/assumptions.py @@ -91,6 +91,8 @@ DESCRIPTIONS_TO_FUEL_TYPES = { "Air source heat pump, Warm air, electric": {"fuel": "Electricity", "cop": AVERAGE_ASHP_EFFICIENCY / 100}, "Boiler and underfloor heating, electric": {"fuel": "Electricity", "cop": 1}, "Community scheme with CHP, mains gas": {"fuel": "Natural Gas", "cop": 0.85}, + "Air source heat pump, radiators and underfloor, electric": {"fuel": "Electricity", + "cop": AVERAGE_ASHP_EFFICIENCY / 100}, } # These are the measure types where if there is a ventilation recommendation, we force the inclusion of it From 8803feec5e3f54a016ff42605359d0fe5c6e14a0 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 5 Jan 2026 19:34:05 +0000 Subject: [PATCH 156/202] added enforce fabric first option --- backend/app/plan/schemas.py | 3 +++ backend/engine/engine.py | 4 ++-- recommendations/optimiser/funding_optimiser.py | 4 +++- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/backend/app/plan/schemas.py b/backend/app/plan/schemas.py index 6f6db328..edac31dc 100644 --- a/backend/app/plan/schemas.py +++ b/backend/app/plan/schemas.py @@ -133,6 +133,9 @@ class PlanTriggerRequest(BaseModel): task_id: Optional[str] = None subtask_id: Optional[str] = None + # Optional flag to trigger a fabric first task + enforce_fabric_first: Optional[bool] = False + @model_validator(mode="after") def check_indexes(self): if (self.index_start is None) != (self.index_end is None): diff --git a/backend/engine/engine.py b/backend/engine/engine.py index eb933cc0..f6990c5c 100644 --- a/backend/engine/engine.py +++ b/backend/engine/engine.py @@ -1066,7 +1066,7 @@ async def model_engine(body: PlanTriggerRequest): budget=body.budget, target_gain=gain, enforce_heat_pump_insulation=True, - enforce_fabric_first=False + enforce_fabric_first=body.enforce_fabric_first ) # if handle the empty case @@ -1119,7 +1119,7 @@ async def model_engine(body: PlanTriggerRequest): # Add best practice measures (ventilation/trickle vents) selected = optimiser_functions.add_best_practice_measures(p.id, solution, recommendations, selected) - # Final flattening + # Final flattening - we pass what the battery SAP score would be, regardless if the battery was selected recommendations[p.id] = optimiser_functions.flatten_recommendations_with_defaults( p.id, recommendations, selected, battery_sap_score ) diff --git a/recommendations/optimiser/funding_optimiser.py b/recommendations/optimiser/funding_optimiser.py index 083b5e99..1d4fc682 100644 --- a/recommendations/optimiser/funding_optimiser.py +++ b/recommendations/optimiser/funding_optimiser.py @@ -681,7 +681,9 @@ def optimise_with_scenarios( # - Only once the fabric has been upgraded, do we consider heating upgrades # This should be wall insulation, roof insulation, floor insulation and windows - fabric_measures = WALL_INSULATION_MEASURES + ROOF_INSULATION_MEASURES + ECO4_ELIGIBILE_FABRIC_MEASURES + fabric_measures = WALL_INSULATION_MEASURES + ROOF_INSULATION_MEASURES + ECO4_ELIGIBILE_FABRIC_MEASURES + [ + "internal_wall_insulation+mechanical_ventilation", "external_wall_insulation+mechanical_ventilation" + ] fabric_only_measures = [ [opt for opt in group if opt["type"] in fabric_measures] for group in optimisation_measures From 45170d4724070ec76b92df9512c52d3afadca0af Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 6 Jan 2026 18:35:08 +0000 Subject: [PATCH 157/202] switched off hhrsh for comunity heating in place --- backend/diagnostics/portfolio_diagnostics.py | 3 + .../f_diagnostics.py | 63 ++++++ recommendations/HeatingRecommender.py | 5 +- recommendations/Recommendations.py | 15 +- sfr/principal_pitch/2_export_data.py | 183 ++++++++++-------- 5 files changed, 186 insertions(+), 83 deletions(-) create mode 100644 backend/diagnostics/portfolio_diagnostics.py create mode 100644 etl/customers/peabody/Nov 2025 Consulting Project/f_diagnostics.py diff --git a/backend/diagnostics/portfolio_diagnostics.py b/backend/diagnostics/portfolio_diagnostics.py new file mode 100644 index 00000000..bcdec24e --- /dev/null +++ b/backend/diagnostics/portfolio_diagnostics.py @@ -0,0 +1,3 @@ +""" +This script is set up to perform broad portfolio diagnostics to identify potential issues +""" diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/f_diagnostics.py b/etl/customers/peabody/Nov 2025 Consulting Project/f_diagnostics.py new file mode 100644 index 00000000..4c2a49ca --- /dev/null +++ b/etl/customers/peabody/Nov 2025 Consulting Project/f_diagnostics.py @@ -0,0 +1,63 @@ +""" +This script performs a deep dive into the various scenarios and checks fundamental things +This includes: +1) Do properties that should have a plan, have a plan? E.g. if the property is EPC D, and has a plan getting up to +# EPC C, there should be a plan +2) If the plan is fabric first, make sure they are actually fabric first +""" +import pandas as pd + +scenario_names = { + 871: "EPC C, fabric first, no solid floor, ashp 3.0", + 863: "EPC B, No EWI IWI, No Solid Floor, ASHP 3.0 COP", + 862: "EPC B, No solid floor, ASHP COP 3.0", + 861: "EPC C, No EWI IWI, No Solid Floor, ASHP 3.0 COP", + 859: "EPC C, no solid floor, ashp 3.0", +} + +scenario_sap_targets = { + 871: 69, + 863: 81, + 862: 81, + 861: 69, + 859: 69, +} + +for scenario_id, scenario_name in scenario_names.items(): + # Read in the recommended measures + + df = pd.read_excel( + f"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/" + f"{scenario_name}.xlsx" + ) + + # find properties that are below the scenario sap target, but have no recommended measures + df["below_scenario_target"] = df["current_sap_points"] < scenario_sap_targets[scenario_id] + df["no_recommended_measures"] = df["sap_points"] == 0 + + problematic_properties = df[ + df["below_scenario_target"] & df["no_recommended_measures"] + ] + + # show all columns + # Source - https://stackoverflow.com/a + # Posted by YOLO, modified by community. See post 'Timeline' for change history + # Retrieved 2026-01-06, License - CC BY-SA 4.0 + + pd.set_option('display.max_rows', 500) + pd.set_option('display.max_columns', 500) + pd.set_option('display.width', 1000) + problematic_properties.head(len(problematic_properties)) + + # + + plan_input = [ + { + "uprn": 100022725126, + "address": "FLAT 5 Daveys Court", + "postcode": "WC2N 4BW" + } + ] + +# Plan notes: +# UPRN: 5870109770, property ID: 281244 - need to delete and re-build all scenarios diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py index b1f6205c..c7c5895d 100644 --- a/recommendations/HeatingRecommender.py +++ b/recommendations/HeatingRecommender.py @@ -167,9 +167,12 @@ class HeatingRecommender: hhr_suitable = no_mains or self.has_electric_heating_description or self.has_room_heaters + # If the property has community heating heaters in place, we don't recommend HHRSH + has_community_heating = self.property.main_fuel["is_community"] + hhr_suitable = hhr_suitable and ( "underfloor heating" not in self.property.main_heating["clean_description"] - ) + ) and not has_community_heating # If the property has a ground source heat pump, or air source heat pump, we don't recommend HHRSH diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py index 0b3d1635..29ba267a 100644 --- a/recommendations/Recommendations.py +++ b/recommendations/Recommendations.py @@ -718,7 +718,8 @@ class Recommendations: ): # Handle the case of community schemes - if (heating_description == "Community scheme") or (hotwater_description == "Community scheme") and ( + if (heating_description in ["Community scheme", 'Community scheme, plus solar']) or ( + hotwater_description in ["Community scheme", 'Community scheme, plus solar']) and ( "not community" not in main_fuel_description ): if main_fuel_description in ["mains gas (community)", "UNKNOWN"]: @@ -742,6 +743,18 @@ class Recommendations: "heating_cop": 0.85, "hotwater_cop": 0.85 } + + # Handling specific case + if main_fuel_description in ["To be used only when there is no heating/hot-water system"] and ( + "electric heaters" in heating_description.lower() + ): + return { + "heating_fuel_type": "Electricity", + "hotwater_fuel_type": "Electricity", + "heating_cop": 1, + "hotwater_cop": 1 + } + logger.warning( "Unhandled community fuel." f"Fuel: {main_fuel_description}" diff --git a/sfr/principal_pitch/2_export_data.py b/sfr/principal_pitch/2_export_data.py index d05275ea..7574414c 100644 --- a/sfr/principal_pitch/2_export_data.py +++ b/sfr/principal_pitch/2_export_data.py @@ -11,8 +11,21 @@ from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcMod # PORTFOLIO_ID = 206 # SCENARIOS = [389] -PORTFOLIO_ID = 404 -SCENARIOS = [829] +PORTFOLIO_ID = 419 # Peabody +SCENARIOS = [ + 871, # EPC C - fabric first, no solid floor, ashp 3.0 + 863, # EPC B, No EWI/IWI, No Solid Floor, ASHP 3.0 COP + 862, # EPC B - No solid floor, ASHP COP 3.0 + 861, # EPC C, No EWI/IWI, No Solid Floor, ASHP 3.0 COP + 859, # EPC C - no solid floor, ashp 3.0 +] +scenario_names = { + 871: "EPC C, fabric first, no solid floor, ashp 3.0", + 863: "EPC B, No EWI IWI, No Solid Floor, ASHP 3.0 COP", + 862: "EPC B, No solid floor, ASHP COP 3.0", + 861: "EPC C, No EWI IWI, No Solid Floor, ASHP 3.0 COP", + 859: "EPC C, no solid floor, ashp 3.0", +} def get_data(portfolio_id, scenario_ids): @@ -84,88 +97,96 @@ properties_df = pd.DataFrame(properties_data) plans_df = pd.DataFrame(plans_data) recommendations_df = pd.DataFrame(recommendations_data) -recommended_measures_df = recommendations_df[ - ["property_id", "measure_type", "estimated_cost", "default"] -] -recommended_measures_df = recommended_measures_df[recommended_measures_df["default"]] -recommended_measures_df = recommended_measures_df.drop(columns=["default"]) - -post_install_sap = recommendations_df[["property_id", "default", "sap_points"]] -post_install_sap = post_install_sap[post_install_sap["default"]] -# Sum up the sap points by property id -post_install_sap = post_install_sap.groupby("property_id")[["sap_points"]].sum().reset_index() - -# Find dupes by property id and measure type -dupes = recommended_measures_df.duplicated( - subset=["property_id", "measure_type"], keep=False -) -dupe_df = recommended_measures_df[dupes] - -if dupe_df.shape: - # Drop dupes - happened due to a funny bug - recommended_measures_df = recommended_measures_df.drop_duplicates( - subset=["property_id", "measure_type"], keep='first' - ) - -recommendations_measures_pivot = recommended_measures_df.pivot( - index='property_id', - columns='measure_type', - values='estimated_cost' -) -recommendations_measures_pivot = recommendations_measures_pivot.reset_index() - -# Total cost is the row sum, excluding the property_id column -recommendations_measures_pivot["total_retrofit_cost"] = recommendations_measures_pivot.drop( - columns=["property_id"] -).sum(axis=1) - -df = properties_df[ - [ - "landlord_property_id", "property_id", "uprn", "address", "postcode", "property_type", "walls", "roof", - "heating", "windows", - "current_epc_rating", - "current_sap_points", "total_floor_area", "number_of_rooms", - ] -].merge( - recommendations_measures_pivot, how="left", on="property_id" -).merge( - post_install_sap, how="left", on="property_id" -) - -df = df.drop(columns=["property_id"]) -df["sap_points"] = df["sap_points"].fillna(0) - -df["predicted_post_works_sap"] = df["current_sap_points"] + df["sap_points"] -df["predicted_post_works_sap"] = df["predicted_post_works_sap"].round() -df["predicted_post_works_epc"] = df["predicted_post_works_sap"].apply(lambda x: sap_to_epc(x)) - -# We merge this back to the main dataframe, which will contain the bathrooms from utils.s3 import read_csv_from_s3, read_excel_from_s3 -# asset_list = read_csv_from_s3(bucket_name="retrofit-plan-inputs-dev", filepath='8/206/asset_list.csv') -asset_list = read_excel_from_s3( - bucket_name="retrofit-plan-inputs-dev", file_key="2/404/20251211T163200754Z/asset_list.xlsx", - header_row=0, sheet_name="Standardised Asset List" -) -asset_list = pd.DataFrame(asset_list) -asset_list = asset_list.rename( - columns={ - "postcode": "domna_postcode" - } -) -if "domna_full_address": - # For Peabody - asset_list["domna_full_address"] = asset_list["domna_address_1"] +# asset_list = read_excel_from_s3( +# bucket_name="retrofit-plan-inputs-dev", file_key="2/404/20251211T163200754Z/asset_list.xlsx", +# header_row=0, sheet_name="Standardised Asset List" +# ) -asset_list = asset_list[["domna_full_address", "domna_postcode", "epc_os_uprn", ]].copy() -asset_list = asset_list.rename(columns={"epc_os_uprn": "uprn"}) -df["uprn"] = df["uprn"].astype(str) -asset_list["uprn"] = asset_list["uprn"].astype("Int64").astype(str) -asset_list = asset_list.merge( - df.drop(columns=["address", "postcode", "property_type", "total_floor_area"]), - how="left", - on="uprn" -) + +for scenario_id in SCENARIOS: + # Get recs for this scenario + recommended_measures_df = recommendations_df[recommendations_df["Scenario ID"] == scenario_id][ + ["property_id", "measure_type", "estimated_cost", "default"] + ] + recommended_measures_df = recommended_measures_df[recommended_measures_df["default"]] + recommended_measures_df = recommended_measures_df.drop(columns=["default"]) + + post_install_sap = recommendations_df[recommendations_df["Scenario ID"] == scenario_id][ + ["property_id", "default", "sap_points"]] + post_install_sap = post_install_sap[post_install_sap["default"]] + # Sum up the sap points by property id + post_install_sap = post_install_sap.groupby(["property_id"])[["sap_points"]].sum().reset_index() + + # Find dupes by property id and measure type + dupes = recommended_measures_df.duplicated(subset=["property_id", "measure_type"], keep=False) + dupe_df = recommended_measures_df[dupes] + + if dupe_df.shape: + # Drop dupes - happened due to a funny bug + recommended_measures_df = recommended_measures_df.drop_duplicates( + subset=["property_id", "measure_type"], keep='first' + ) + + recommendations_measures_pivot = recommended_measures_df.pivot( + index='property_id', + columns='measure_type', + values='estimated_cost' + ) + recommendations_measures_pivot = recommendations_measures_pivot.reset_index() + + # Total cost is the row sum, excluding the property_id column + recommendations_measures_pivot["total_retrofit_cost"] = recommendations_measures_pivot.drop( + columns=["property_id"] + ).sum(axis=1) + + df = properties_df[ + [ + "landlord_property_id", "property_id", "uprn", "address", "postcode", "property_type", "walls", "roof", + "heating", "windows", + "current_epc_rating", + "current_sap_points", "total_floor_area", "number_of_rooms", + ] + ].merge( + recommendations_measures_pivot, how="left", on="property_id" + ).merge( + post_install_sap, how="left", on="property_id" + ) + + df = df.drop(columns=["property_id"]) + df["sap_points"] = df["sap_points"].fillna(0) + + df["predicted_post_works_sap"] = df["current_sap_points"] + df["sap_points"] + df["predicted_post_works_sap"] = df["predicted_post_works_sap"].round() + df["predicted_post_works_epc"] = df["predicted_post_works_sap"].apply(lambda x: sap_to_epc(x)) + df["uprn"] = df["uprn"].astype(str) + + # Create excel to store to + filename = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting " + f"Project/{scenario_names[scenario_id]}.xlsx") + with pd.ExcelWriter(filename) as writer: + df.to_excel(writer, sheet_name="properties", index=False) + + +# asset_list = pd.DataFrame(asset_list) +# asset_list = asset_list.rename( +# columns={ +# "postcode": "domna_postcode" +# } +# ) +# if "domna_full_address": +# # For Peabody +# asset_list["domna_full_address"] = asset_list["domna_address_1"] +# +# asset_list = asset_list[["domna_full_address", "domna_postcode", "epc_os_uprn", ]].copy() +# asset_list = asset_list.rename(columns={"epc_os_uprn": "uprn"}) +# asset_list["uprn"] = asset_list["uprn"].astype("Int64").astype(str) +# asset_list = asset_list.merge( +# df.drop(columns=["address", "postcode", "property_type", "total_floor_area"]), +# how="left", +# on="uprn" +# ) # Get conservation area data from property details spatial. based on the UPRNs From 608ff71d357961463ce3e4f5c6ee3f02120435c6 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 6 Jan 2026 20:58:13 +0000 Subject: [PATCH 158/202] minor tweaks and bug fixes for properties that failed diagnostic tests --- .../f_diagnostics.py | 91 ++++++++++++++++--- recommendations/HeatingRecommender.py | 11 ++- recommendations/WindowsRecommendations.py | 17 +++- 3 files changed, 102 insertions(+), 17 deletions(-) diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/f_diagnostics.py b/etl/customers/peabody/Nov 2025 Consulting Project/f_diagnostics.py index 4c2a49ca..fa7383a2 100644 --- a/etl/customers/peabody/Nov 2025 Consulting Project/f_diagnostics.py +++ b/etl/customers/peabody/Nov 2025 Consulting Project/f_diagnostics.py @@ -23,9 +23,10 @@ scenario_sap_targets = { 859: 69, } +problems = [] for scenario_id, scenario_name in scenario_names.items(): # Read in the recommended measures - + print("Reading") df = pd.read_excel( f"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/" f"{scenario_name}.xlsx" @@ -34,30 +35,98 @@ for scenario_id, scenario_name in scenario_names.items(): # find properties that are below the scenario sap target, but have no recommended measures df["below_scenario_target"] = df["current_sap_points"] < scenario_sap_targets[scenario_id] df["no_recommended_measures"] = df["sap_points"] == 0 + df["zero_cost"] = df["total_retrofit_cost"] == 0 + df["sap_points_above_zero"] = df["sap_points"] > 0 + + # Also look for zero cost and SAP points > 0 problematic_properties = df[ - df["below_scenario_target"] & df["no_recommended_measures"] - ] + (df["below_scenario_target"] & df["no_recommended_measures"]) + ].copy() + + if scenario_sap_targets[scenario_id] == 81: + problematic_properties = problematic_properties[problematic_properties["property_type"] != "Flat"] + + zero_cost_above_zero_sap = df[ + (df["sap_points_above_zero"] & df["zero_cost"]) + ].copy() # show all columns # Source - https://stackoverflow.com/a # Posted by YOLO, modified by community. See post 'Timeline' for change history # Retrieved 2026-01-06, License - CC BY-SA 4.0 - pd.set_option('display.max_rows', 500) - pd.set_option('display.max_columns', 500) - pd.set_option('display.width', 1000) - problematic_properties.head(len(problematic_properties)) + # pd.set_option('display.max_rows', 500) + # pd.set_option('display.max_columns', 500) + # pd.set_option('display.width', 1000) + # problematic_properties.head(len(problematic_properties)) - # + print(f"We have {len(problematic_properties)} problematic properties for scenario {scenario_name} ({scenario_id})") + print(f"We have {len(zero_cost_above_zero_sap)} zero cost properties for scenario {scenario_name} ({scenario_id})") + + problems.append(problematic_properties) + problems.append(zero_cost_above_zero_sap) + + # plan_input = [ + # { + # "uprn": 100022725126, + # "address": "FLAT 5 Daveys Court", + # "postcode": "WC2N 4BW" + # } + # ] + + # plan_input = [ + # { + # "uprn": 100120966352, + # "address": "FLAT 11 Kingsgate", + # "postcode": "OX18 2BP" + # } + # ] plan_input = [ { - "uprn": 100022725126, - "address": "FLAT 5 Daveys Court", - "postcode": "WC2N 4BW" + "uprn": 200003371857, + "postcode": "SE1 5SJ", + "address": "39 BUTTERMERE CLOSE", } ] +all_problems = pd.concat(problems) +all_problems = all_problems.drop_duplicates(subset=["uprn"]) + +sal = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20251213 Model " + "data.xlsx", + sheet_name="Standardised Asset List" +) +sal2 = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20260105 - additional " + "UPRNS.xlsx", + sheet_name="Standardised Asset List" +) + +sal = pd.concat([sal, sal2]) + +retry = sal[sal["epc_os_uprn"].isin(all_problems["uprn"])] + +# Store +retry.to_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/" + "d_problematic_properties_to_review_20260106.xlsx", + sheet_name="Standardised Asset List", + index=False +) + +# Delete associated plans +# 1) Get the property IDs for these UPRNS, for this portfolio +portfolio_id = 419 +uprns = retry + +# TODO: Delete all plans for these properties and re-build # Plan notes: # UPRN: 5870109770, property ID: 281244 - need to delete and re-build all scenarios +# UPRN: 100022725126, property ID: 283781 - need to delete and re-build all scenarios + + +# Bugs: +12156800 diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py index c7c5895d..fdc25bf9 100644 --- a/recommendations/HeatingRecommender.py +++ b/recommendations/HeatingRecommender.py @@ -170,9 +170,14 @@ class HeatingRecommender: # If the property has community heating heaters in place, we don't recommend HHRSH has_community_heating = self.property.main_fuel["is_community"] - hhr_suitable = hhr_suitable and ( - "underfloor heating" not in self.property.main_heating["clean_description"] - ) and not has_community_heating + # If the property currently has electric underfloor heating, we allow this if there is elecric immersion + # hot water heating + underfloor_not_an_issue = True + if self.property.main_heating["has_electric_underfloor_heating"]: + if self.property.hotwater["heater_type"] != "electric immersion": + underfloor_not_an_issue = False + + hhr_suitable = hhr_suitable and not has_community_heating and underfloor_not_an_issue # If the property has a ground source heat pump, or air source heat pump, we don't recommend HHRSH diff --git a/recommendations/WindowsRecommendations.py b/recommendations/WindowsRecommendations.py index 8bdab5d1..7b85ac49 100644 --- a/recommendations/WindowsRecommendations.py +++ b/recommendations/WindowsRecommendations.py @@ -86,9 +86,17 @@ class WindowsRecommendations: # We scale the number of windows based on the proportion of existing glazing if self.property.data["multi-glaze-proportion"] != "": - n_windows_scalar = 1 - ( - int(self.property.data["multi-glaze-proportion"]) / 100 - ) + + if (self.property.windows["clean_description"] == "Some double glazing") and ( + self.property.data["windows-energy-eff"] == "Very Poor") and ( + self.property.data["multi-glaze-proportion"] == 100 + ): + # In this case, we assume all of the dinwos need replacing + n_windows_scalar = 1 + else: + n_windows_scalar = 1 - ( + int(self.property.data["multi-glaze-proportion"]) / 100 + ) else: n_windows_scalar = self.COVERAGE_MAP.get( self.property.windows["glazing_coverage"], 1 @@ -97,6 +105,9 @@ class WindowsRecommendations: number_of_windows *= n_windows_scalar number_of_windows = np.ceil(number_of_windows) + # Handle edge case - prevent number of windows 0 + number_of_windows = max(1, number_of_windows) + # We then price the job based on the number of windows that there are cost_result = self.costs.window_glazing( number_of_windows=number_of_windows, From b2d07cfd7c81f5b301d4efd0700f6afb1299fd82 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 7 Jan 2026 20:40:37 +0000 Subject: [PATCH 159/202] preparing the already installed code for Peabody --- backend/app/db/models/portfolio.py | 4 + backend/app/db/models/recommendations.py | 55 ++ .../f_diagnostics.py | 126 +++- .../g_rebaselining_installed_measrues.py | 714 ++++++++++++++++++ sfr/principal_pitch/2_export_data.py | 134 +++- 5 files changed, 998 insertions(+), 35 deletions(-) create mode 100644 etl/customers/peabody/Nov 2025 Consulting Project/g_rebaselining_installed_measrues.py diff --git a/backend/app/db/models/portfolio.py b/backend/app/db/models/portfolio.py index ea9f9976..e17e5856 100644 --- a/backend/app/db/models/portfolio.py +++ b/backend/app/db/models/portfolio.py @@ -106,6 +106,10 @@ class PropertyModel(Base): current_epc_rating = Column(Enum(Epc)) current_sap_points = Column(Float) current_valuation = Column(Float) + # Following fields are for recording already installed adjustments to a property's SAP + installed_measures_sap_point_adjustment = Column(Float) + is_sap_points_adjusted_for_installed_measures = Column(Boolean, default=False) + original_sap_points = Column(Float) class FeatureRating(enum.Enum): diff --git a/backend/app/db/models/recommendations.py b/backend/app/db/models/recommendations.py index 800596ec..ed1fcefa 100644 --- a/backend/app/db/models/recommendations.py +++ b/backend/app/db/models/recommendations.py @@ -146,3 +146,58 @@ class Scenario(Base): valuation_return_on_investment = Column(String) property_valuation_increase = Column(Float) labour_days = Column(Float) + + +class MeasureType(enum.Enum): + air_source_heat_pump = "air_source_heat_pump" + boiler_upgrade = "boiler_upgrade" + high_heat_retention_storage_heaters = "high_heat_retention_storage_heaters" + secondary_heating = "secondary_heating" + + roomstat_programmer_trvs = "roomstat_programmer_trvs" + time_temperature_zone_control = "time_temperature_zone_control" + cylinder_thermostat = "cylinder_thermostat" + + cavity_wall_insulation = "cavity_wall_insulation" + extension_cavity_wall_insulation = "extension_cavity_wall_insulation" + external_wall_insulation = "external_wall_insulation" + internal_wall_insulation = "internal_wall_insulation" + loft_insulation = "loft_insulation" + flat_roof_insulation = "flat_roof_insulation" + room_roof_insulation = "room_roof_insulation" + solid_floor_insulation = "solid_floor_insulation" + suspended_floor_insulation = "suspended_floor_insulation" + + double_glazing = "double_glazing" + secondary_glazing = "secondary_glazing" + draught_proofing = "draught_proofing" + + mechanical_ventilation = "mechanical_ventilation" + low_energy_lighting = "low_energy_lighting" + solar_pv = "solar_pv" + hot_water_tank_insulation = "hot_water_tank_insulation" + sealing_open_fireplace = "sealing_open_fireplace" + + +class InstalledMeasure(Base): + __tablename__ = "installed_measure" + + id = Column(BigInteger, primary_key=True, autoincrement=True) + uprn = Column(BigInteger, nullable=False) + measure_type = Column( + Enum( + MeasureType, + name="measure_type", + values_callable=lambda e: [m.value for m in e], + create_type=False, # <-- critical + ), + nullable=False, + ) + installed_at = Column(TIMESTAMP) + sap_points = Column(Float) + carbon_savings = Column(Float) + kwh_savings = Column(Float) + bill_savings = Column(Float) + heat_demand_savings = Column(Float) + source = Column(String) + is_active = Column(Boolean, nullable=False, default=True) diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/f_diagnostics.py b/etl/customers/peabody/Nov 2025 Consulting Project/f_diagnostics.py index fa7383a2..4b946c60 100644 --- a/etl/customers/peabody/Nov 2025 Consulting Project/f_diagnostics.py +++ b/etl/customers/peabody/Nov 2025 Consulting Project/f_diagnostics.py @@ -120,13 +120,127 @@ retry.to_excel( # Delete associated plans # 1) Get the property IDs for these UPRNS, for this portfolio portfolio_id = 419 -uprns = retry +uprns = retry["epc_os_uprn"].tolist() # TODO: Delete all plans for these properties and re-build -# Plan notes: -# UPRN: 5870109770, property ID: 281244 - need to delete and re-build all scenarios -# UPRN: 100022725126, property ID: 283781 - need to delete and re-build all scenarios +from sqlalchemy.orm import Session +from backend.app.db.models.portfolio import PropertyModel +from backend.app.db.connection import db_session +from backend.app.db.models.recommendations import Plan +from sqlalchemy import select, delete +from sqlalchemy.exc import NoResultFound +from sqlalchemy.orm import sessionmaker -# Bugs: -12156800 +def get_property_ids_for_uprns(session: Session, portfolio_id: int, uprns: list[int]) -> list[int]: + return [ + property.id + for property in session.query(PropertyModel) + .filter( + PropertyModel.portfolio_id == portfolio_id, + PropertyModel.uprn.in_(uprns) + ) + .all() + ] + + +with db_session() as session: + property_ids_to_delete = get_property_ids_for_uprns(session, portfolio_id, uprns) + + +# Get all and delete plans for these property IDs +def get_all_plans_for_property_ids(session: Session, property_ids: list[int]) -> list[Plan]: + return session.query(Plan).filter(Plan.property_id.in_(property_ids)).all() + + +def get_ids_of_plans_for_deletion(session: Session, property_ids: list[int]) -> list[int]: + return [ + plan.id + for plan in session.query(Plan) + .filter(Plan.property_id.in_(property_ids)) + .all() + ] + + +with db_session() as session: + plan_ids_to_delete = get_ids_of_plans_for_deletion(session, property_ids_to_delete) + + +def chunked(iterable, size): + for i in range(0, len(iterable), size): + yield iterable[i:i + size] + + +from sqlalchemy import text +from sqlalchemy.orm import Session + + +def delete_plan_batch(session: Session, plan_ids: list[int]): + if not plan_ids: + return + + session.execute(text("SET LOCAL lock_timeout = '5s'")) + + params = {"plan_ids": plan_ids} + + # ---------------------------- + # recommendation_materials + # ---------------------------- + session.execute( + text(""" + DELETE FROM recommendation_materials rm + USING plan_recommendations pr + WHERE rm.recommendation_id = pr.recommendation_id + AND pr.plan_id = ANY(:plan_ids) + """), + params, + ) + + # ---------------------------- + # plan_recommendations + # ---------------------------- + session.execute( + text(""" + DELETE FROM plan_recommendations + WHERE plan_id = ANY(:plan_ids) + """), + params, + ) + + # ---------------------------- + # recommendations (only those used by these plans) + # ---------------------------- + session.execute( + text(""" + DELETE FROM recommendation r + WHERE r.id IN ( + SELECT DISTINCT recommendation_id + FROM plan_recommendations + WHERE plan_id = ANY(:plan_ids) + ) + """), + params, + ) + + # ---------------------------- + # plans LAST + # ---------------------------- + session.execute( + text(""" + DELETE FROM plan + WHERE id = ANY(:plan_ids) + """), + params, + ) + + +batch_size = 25 +total = (len(plan_ids_to_delete) + batch_size - 1) // batch_size + +for i, batch in enumerate(chunked(plan_ids_to_delete, batch_size), start=1): + print(f"Deleting plan batch {i}/{total} ({len(batch)} plans)") + + with db_session() as session: + delete_plan_batch(session, batch) + + print(f"Batch {i} committed") diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/g_rebaselining_installed_measrues.py b/etl/customers/peabody/Nov 2025 Consulting Project/g_rebaselining_installed_measrues.py new file mode 100644 index 00000000..bc628630 --- /dev/null +++ b/etl/customers/peabody/Nov 2025 Consulting Project/g_rebaselining_installed_measrues.py @@ -0,0 +1,714 @@ +import pandas as pd +from sqlalchemy.orm import sessionmaker +from backend.app.db.connection import db_engine, db_read_session, db_session +from backend.app.db.models.recommendations import Recommendation, Plan, PlanRecommendations, RecommendationMaterials, \ + InstalledMeasure +from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel +from sqlalchemy import func +from backend.app.utils import sap_to_epc +from typing import Dict, List, Set +from recommendations.Costs import Costs +from backend.app.db.models.portfolio import Epc + + +def get_all_data(portfolio_id, scenario_ids): + session = sessionmaker(bind=db_engine)() + session.begin() + + # -------------------- + # Properties + # -------------------- + properties_query = session.query( + PropertyModel, + PropertyDetailsEpcModel + ).join( + PropertyDetailsEpcModel, + PropertyModel.id == PropertyDetailsEpcModel.property_id + ).filter( + PropertyModel.portfolio_id == portfolio_id + ).all() + + properties_data = [ + { + **{col.name: getattr(p.PropertyModel, col.name) + for col in PropertyModel.__table__.columns}, + **{col.name: getattr(p.PropertyDetailsEpcModel, col.name) + for col in PropertyDetailsEpcModel.__table__.columns}, + } + for p in properties_query + ] + + # -------------------- + # Plans + # -------------------- + plans_query = session.query(Plan).filter( + Plan.scenario_id.in_(scenario_ids) + ).all() + + plans_data = [ + {col.name: getattr(plan, col.name) for col in Plan.__table__.columns} + for plan in plans_query + ] + + plan_ids = [p["id"] for p in plans_data] + + # -------------------- + # Recommendations (NO materials yet) + # -------------------- + recommendations_query = session.query( + Recommendation, + Plan.scenario_id + ).join( + PlanRecommendations, + Recommendation.id == PlanRecommendations.recommendation_id + ).join( + Plan, + Plan.id == PlanRecommendations.plan_id + ).filter( + PlanRecommendations.plan_id.in_(plan_ids), + ).all() + + recommendations_data = [ + { + **{col.name: getattr(r.Recommendation, col.name) + for col in Recommendation.__table__.columns}, + "scenario_id": r.scenario_id, + "materials": [] # placeholder + } + for r in recommendations_query + ] + + session.close() + + return properties_data, plans_data, recommendations_data + + +PORTFOLIO_ID = 419 # Peabody +SCENARIOS = [ + # 871, # EPC C - fabric first, no solid floor, ashp 3.0 + # 863, # EPC B, No EWI/IWI, No Solid Floor, ASHP 3.0 COP + # 862, # EPC B - No solid floor, ASHP COP 3.0 + # 861, # EPC C, No EWI/IWI, No Solid Floor, ASHP 3.0 COP + # 859, # EPC C - no solid floor, ashp 3.0 + 885, # EPC B - fabric first, no solid floor, ashp 3.0 +] + +# properties_data, plans_data, recommendations_data = get_all_data(portfolio_id=PORTFOLIO_ID, scenario_ids=SCENARIOS) +# # Store this data as dataframes for analysis +# properties_df = pd.DataFrame(properties_data) +# plans_df = pd.DataFrame(plans_data) +# recommendations_df = pd.DataFrame(recommendations_data) + +# Save CSVs +# properties_df.to_csv( +# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/" +# "f_peabody_properties_data_20260108.csv", +# index=False +# ) +# plans_df.to_csv( +# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/" +# "f_peabody_plans_data_20260108.csv", +# index=False +# ) +# recommendations_df.to_csv( +# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/" +# "f_peabody_recommendations_data_20260108.csv", +# index=False +# ) +# Read csvs +properties_df = pd.read_csv( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/" + "f_peabody_properties_data_20260108.csv" +) +plans_df = pd.read_csv( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/" + "f_peabody_plans_data_20260108.csv" +) +recommendations_df = pd.read_csv( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/" + "f_peabody_recommendations_data_20260108.csv" +) + +sustainability_data = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody " + "- Data Extracts for Domna.xlsx", + sheet_name="Sustainability" +) + +# recommendations_df = pd.read_excel( +# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/EPC B, " +# "No solid floor, ASHP COP 3.0.xlsx" +# ) + +# recommendations_df2 = recommendations_df2.merge( +# properties_df[["id", "uprn"]], +# left_on="property_id", +# right_on="id", +# how="left" +# ).rename(columns={"id_x": "id"}).drop(columns=["id_y"]) +# recommendations_df["uprn"] = recommendations_df["uprn"].astype(int).astype(str) + +# We just need all of the measure types, per property +recommendation_measure_types = recommendations_df[["property_id", "measure_type"]].drop_duplicates() +recommendation_measure_types["flag"] = True + +# We pivot +recommendations_measures_pivot = recommendation_measure_types.pivot( + index='property_id', + columns='measure_type', + values='flag' +) +recommendations_measures_pivot = recommendations_measures_pivot.reset_index() + +# Create a total cost column +recommendations_total_cost = recommendations_df.groupby("property_id")["estimated_cost"].sum().reset_index() +recommendations_measures_pivot = recommendations_measures_pivot.merge( + recommendations_total_cost, how="left", on="property_id" +) + +properties_to_recs = properties_df.rename(columns={"solar_pv": "solar_data"}).merge( + recommendations_measures_pivot, how="left", left_on="id", right_on="property_id" +) +properties_to_recs["estimated_cost"] = properties_to_recs["estimated_cost"].fillna(0) + +sustainability_data["has_cavity_insulation"] = sustainability_data["Wall Insulation"].isin( + ["FilledCavity", "FilledCavityPlusInternal", "FilledCavityPlusExternal"] +) +sustainability_data["has_iwi"] = sustainability_data["Wall Insulation"].isin( + ["Internal", "FilledCavityPlusInternal"] +) +sustainability_data["has_ewi"] = sustainability_data["Wall Insulation"].isin( + ["External", "FilledCavityPlusExternal"] +) +sustainability_data["has_loft_insulation"] = sustainability_data["Roof Insulation"].isin( + ["mm300", "mm250"] +) +sustainability_data["has_glazing"] = sustainability_data["Glazing"].isin( + ["Double 2002 or later", "Double but age unknown", "Triple", "DoubleKnownData", "Secondary", "TripleKnownData"] +) + +sustainability_data["has_floor_insulation"] = sustainability_data["Floor Insulation"].isin( + ["RetroFitted"] +) + +sustainability_data["has_efficient_boiler"] = ( + sustainability_data["Heating"].isin(["Boilers"]) & sustainability_data["Boiler Efficiency"].isin(["A"]) +) +sustainability_data["has_ashp"] = (sustainability_data["Heating"].isin(["Heat pumps (wet)"])) + +sustainability_data["has_top_heat_controls"] = ( + sustainability_data["Controls Adequacy"].isin(["Top Spec"]) +) + +sustainability_data["has_optimal_heat_controls"] = ( + sustainability_data["Controls Adequacy"].isin(["Optimal"]) +) +sustainability_data["has_flat_roof_insulation"] = ( + (sustainability_data["Roof Construction"] == "Flat") & + (sustainability_data["Roof Insulation"].isin(["mm50", "mm150", "mm100"])) +) + +properties_to_recs["uprn"] = properties_to_recs["uprn"].astype(str) +comparison = sustainability_data.merge( + properties_to_recs[ + ["uprn", "cavity_wall_insulation", "external_wall_insulation", "internal_wall_insulation", "loft_insulation", + "double_glazing", "secondary_glazing", "suspended_floor_insulation", "boiler_upgrade", "air_source_heat_pump", + "time_temperature_zone_control", "roomstat_programmer_trvs", "flat_roof_insulation", "room_roof_insulation" + ] + ], + left_on="UPRN", + right_on="uprn", + how="left" +) + +# Flag entries where we've been told that walls are already insulated, but we have recommendations for wall insulation +# ------------ Walls ------------ +comparison["conflict_cavity_wall_insulation"] = ( + (comparison["has_cavity_insulation"]) & + (pd.isnull(comparison["cavity_wall_insulation"]) == False) +) +comparison["conflict_iwi_wall_insulation"] = ( + (comparison["has_iwi"]) & + (pd.isnull(comparison["internal_wall_insulation"]) == False) +) +comparison["conflict_ewi_wall_insulation"] = ( + (comparison["has_ewi"]) & + (pd.isnull(comparison["internal_wall_insulation"]) == False) +) + +cwi_conflicting = comparison[comparison["conflict_cavity_wall_insulation"] == True] +iwi_conflicting = comparison[comparison["conflict_iwi_wall_insulation"] == True] +ewi_conflicting = comparison[comparison["conflict_ewi_wall_insulation"] == True] + +# ------------ Roof ------------ +comparison["conflict_roof_insulation"] = ( + (comparison["has_loft_insulation"]) & + (pd.isnull(comparison["loft_insulation"]) == False) +) + +loft_conflicting = comparison[comparison["conflict_roof_insulation"] == True] + +# ------------ Windows ------------ +comparison["conflict_double_glazing"] = ( + (comparison["has_glazing"]) & + ( + (pd.isnull(comparison["double_glazing"]) == False) | (pd.isnull(comparison["secondary_glazing"]) == False) + ) +) +windows_conflicting = comparison[comparison["conflict_double_glazing"] == True] + +# ------------ Floors ------------ +comparison["conflict_suspended_floor_insulation"] = ( + (comparison["has_floor_insulation"]) & + (pd.isnull(comparison["suspended_floor_insulation"]) == False) +) +floors_conflicting = comparison[comparison["conflict_suspended_floor_insulation"] == True] + +# ------------ Boiler Upgrade ------------ +comparison["conflict_boiler_upgrade"] = ( + (comparison["has_efficient_boiler"]) & + (pd.isnull(comparison["boiler_upgrade"]) == False) +) +boiler_conflicting = comparison[comparison["conflict_boiler_upgrade"] == True] + +# ------------ ASHP ------------ +comparison["conflict_air_source_heat_pump"] = ( + (comparison["has_ashp"]) & + (pd.isnull(comparison["air_source_heat_pump"]) == False) +) +ashp_conflicting = comparison[comparison["conflict_air_source_heat_pump"] == True] + +# ------------ heat controls ------------ +comparison["conflict_time_temperature_zone_control"] = ( + (comparison["has_top_heat_controls"]) & + (pd.isnull(comparison["time_temperature_zone_control"]) == False) +) +comparison["conflict_roomstat_programmer_trvs"] = ( + (comparison["has_optimal_heat_controls"]) & + (pd.isnull(comparison["roomstat_programmer_trvs"]) == False) +) +ttzc_conflicting = comparison[comparison["conflict_time_temperature_zone_control"] == True] +rst_conflicting = comparison[comparison["conflict_roomstat_programmer_trvs"] == True] + +# ------------ Flat Roof Insulation ----------- +comparison["conflict_flat_roof_insulation"] = ( + (comparison["has_flat_roof_insulation"]) & + (pd.isnull(comparison["flat_roof_insulation"]) == False) +) +flat_roof_conflicting = comparison[comparison["conflict_flat_roof_insulation"] == True] + +# All properties with conflicts +all_conflicts = pd.concat( + [ + cwi_conflicting, + iwi_conflicting, + ewi_conflicting, + loft_conflicting, + windows_conflicting, + floors_conflicting, + boiler_conflicting, + ashp_conflicting, + ttzc_conflicting, + rst_conflicting, + flat_roof_conflicting + ] +) + +all_conflicts["UPRN"].nunique() + + +# What do I need to do: +# TODO: - need to get a view of "all" measures for the property, not just recommended. We can do this but just looking +# at one scenario +# 1) I should store the current recommendations table, for the portfolio as a backup +# 2) I need a total of already installed SAP points for each property. This should probably be stored on the +# property_details_epc tabe +# 3) For anything already installed, I should mark already installed as True, and set the cost to zero +# 4) I need to update the plan cost to remove the cost of the installed measures + + +### Rebaselining + + +def get_installed_sap_adjustments_by_uprn_for_portfolio( + session, + portfolio_id: int, +) -> Dict[int, float]: + """ + Returns { uprn -> total_sap_delta } + """ + + uprn_subquery = ( + session.query(PropertyModel.uprn) + .filter(PropertyModel.portfolio_id == portfolio_id) + .filter(PropertyModel.uprn.isnot(None)) + .subquery() + ) + + rows = ( + session.query( + InstalledMeasure.uprn, + func.coalesce(func.sum(InstalledMeasure.sap_points), 0.0), + ) + .filter(InstalledMeasure.is_active.is_(True)) + .filter(InstalledMeasure.uprn.in_(uprn_subquery)) + .group_by(InstalledMeasure.uprn) + .all() + ) + + return {uprn: float(delta) for uprn, delta in rows} + + +def get_installed_measure_types_by_uprn( + session, + uprn: int, +) -> Set[str]: + rows = ( + session.query(InstalledMeasure.measure_type) + .filter(InstalledMeasure.uprn == uprn) + .filter(InstalledMeasure.is_active.is_(True)) + .all() + ) + + # Convert enums → strings + return { + r[0].value if hasattr(r[0], "value") else r[0] + for r in rows + } + + +# ------------------------------------------------------------ +# PROPERTY REBASING (READ-ONLY) +# ------------------------------------------------------------ + +def compute_property_sap_updates( + properties: List[PropertyModel], + sap_adjustments: Dict[int, float], +) -> List[dict]: + """ + Returns property SAP rebasing results. + Does NOT mutate DB objects. + """ + + updates = [] + + for prop in properties: + if prop.uprn is None or prop.original_sap_points is None: + continue + + sap_delta = sap_adjustments.get(prop.uprn, 0.0) + new_sap = prop.original_sap_points + sap_delta + + updates.append({ + "property_id": prop.id, + "uprn": prop.uprn, + "original_sap_points": prop.original_sap_points, + "installed_sap_delta": sap_delta, + "new_sap_points": new_sap, + "is_adjusted": sap_delta != 0, + }) + + return updates + + +# ------------------------------------------------------------ +# PLAN RECOMPUTATION HELPERS +# ------------------------------------------------------------ + +def get_effective_plan_recommendations( + session, + plan_id: int, + excluded_measure_types: Set[str], +) -> List[Recommendation]: + q = ( + session.query(Recommendation) + .join(PlanRecommendations) + .filter(PlanRecommendations.plan_id == plan_id) + .filter(Recommendation.default.is_(True)) + ) + + if excluded_measure_types: + q = q.filter( + ~Recommendation.measure_type.in_(excluded_measure_types) + ) + + return q.all() + + +def aggregate_plan_metrics(recommendations: list[Recommendation]): + agg = { + "sap_points": 0.0, + "co2_savings": 0.0, + "energy_bill_savings": 0.0, + "energy_consumption_savings": 0.0, + "valuation_increase": 0.0, + "cost_of_works": 0.0, + "contingency_cost": 0.0, + } + + for r in recommendations: + agg["sap_points"] += r.sap_points or 0.0 + agg["co2_savings"] += r.co2_equivalent_savings or 0.0 + agg["energy_bill_savings"] += r.energy_cost_savings or 0.0 + agg["energy_consumption_savings"] += r.energy_savings or 0.0 + agg["valuation_increase"] += r.property_valuation_increase or 0.0 + + base_cost = r.estimated_cost or 0.0 + agg["cost_of_works"] += base_cost + agg["contingency_cost"] += calculate_contingency_for_recommendation(r) + + return agg + + +# ------------------------------------------------------------ +# PLAN REBASING (READ-ONLY) +# ------------------------------------------------------------ + +def compute_plan_updates( + session, + plans: List[Plan], + properties_by_id: Dict[int, PropertyModel], + epcs_by_property_id: Dict[int, PropertyDetailsEpcModel], + property_sap_updates: Dict[int, dict], +) -> List[dict]: + """ + Computes plan metrics assuming properties are already rebased. + """ + + updates = [] + + for plan in plans: + prop = properties_by_id.get(plan.property_id) + epc = epcs_by_property_id.get(plan.property_id) + prop_update = property_sap_updates.get(plan.property_id) + + if not prop or not epc or not prop_update: + continue + + installed_types = get_installed_measure_types_by_uprn( + session, prop.uprn + ) + + future_recs = get_effective_plan_recommendations( + session, + plan.id, + installed_types, + ) + + metrics = aggregate_plan_metrics(future_recs) + + baseline_bill = ( + epc.heating_cost_current + + epc.hot_water_cost_current + + epc.lighting_cost_current + + epc.appliances_cost_current + + epc.gas_standing_charge + + epc.electricity_standing_charge + ) + + post_sap = prop_update["new_sap_points"] + metrics["sap_points"] + + updates.append({ + "plan_id": plan.id, + "property_id": plan.property_id, + + # SAP / EPC + "post_sap_points": post_sap, + "post_epc_rating": sap_to_epc(post_sap), + + # Carbon + "co2_savings": metrics["co2_savings"], + "post_co2_emissions": ( + epc.co2_emissions - metrics["co2_savings"] + if epc.co2_emissions is not None + else None + ), + + # Energy bills + "energy_bill_savings": metrics["energy_bill_savings"], + "post_energy_bill": baseline_bill - metrics["energy_bill_savings"], + + # Energy consumption + "energy_consumption_savings": metrics["energy_consumption_savings"], + "post_energy_consumption": ( + epc.primary_energy_consumption + - metrics["energy_consumption_savings"] + ), + + # Valuation + "valuation_increase": metrics["valuation_increase"], + "valuation_post_retrofit": ( + prop.current_valuation + metrics["valuation_increase"] + if prop.current_valuation is not None + else None + ), + + # Costs + "cost_of_works": metrics["cost_of_works"], + "contingency_cost": metrics["contingency_cost"], + }) + + return updates + + +def calculate_contingency_for_recommendation( + recommendation, +) -> float: + """ + Recompute contingency for a recommendation using the same + logic as the costing engine. + + Assumptions: + - recommendation.estimated_cost is the 'total' cost + - contingency is a percentage of total + """ + + if recommendation.estimated_cost is None: + return 0.0 + + # Normalise measure_type (Enum → str) + measure_type = ( + recommendation.measure_type.value + if hasattr(recommendation.measure_type, "value") + else recommendation.measure_type + ) + + # Measure-specific contingency if defined, else global fallback + contingency_rate = Costs.CONTINGENCIES.get( + measure_type, + Costs.CONTINGENCY, # default (e.g. 10%) + ) + + return recommendation.estimated_cost * contingency_rate + + +def persist_property_sap_updates( + property_updates_by_id: dict[int, dict], +): + """ + Writes adjusted SAP values back to property table. + Safe to re-run. + """ + + with db_session() as session: + properties = ( + session.query(PropertyModel) + .filter(PropertyModel.id.in_(property_updates_by_id.keys())) + .all() + ) + + for prop in properties: + update = property_updates_by_id[prop.id] + + prop.installed_measures_sap_point_adjustment = update["installed_sap_delta"] + prop.is_sap_points_adjusted_for_installed_measures = update["is_adjusted"] + prop.current_sap_points = update["new_sap_points"] + prop.current_epc_rating = sap_to_epc(update["new_sap_points"]) + + print(f"✅ Updated {len(properties)} properties") + + +def persist_plan_updates(plan_updates: list[dict]): + """ + Writes recalculated plan metrics. + Safe to re-run. + """ + + with db_session() as session: + plans = ( + session.query(Plan) + .filter(Plan.id.in_([u["plan_id"] for u in plan_updates])) + .all() + ) + + plans_by_id = {p.id: p for p in plans} + + for update in plan_updates: + plan = plans_by_id.get(update["plan_id"]) + if not plan: + continue + + # SAP / EPC + plan.post_sap_points = update["post_sap_points"] + plan.post_epc_rating = Epc(update["post_epc_rating"]) + + # Carbon + plan.co2_savings = update["co2_savings"] + plan.post_co2_emissions = update["post_co2_emissions"] + + # Energy + plan.energy_bill_savings = update["energy_bill_savings"] + plan.post_energy_bill = update["post_energy_bill"] + + plan.energy_consumption_savings = update["energy_consumption_savings"] + plan.post_energy_consumption = update["post_energy_consumption"] + + # Valuation + plan.valuation_increase = update["valuation_increase"] + plan.valuation_post_retrofit = update["valuation_post_retrofit"] + + # Costs + plan.cost_of_works = update["cost_of_works"] + plan.contingency_cost = update["contingency_cost"] + + print(f"✅ Updated {len(plans)} plans") + + +# ------------------------------------------------------------ +# EXECUTION (DRY RUN) +# ------------------------------------------------------------ + +PORTFOLIO_ID = 430 +# TODO - run the original sap points update on the peabody portfolio + +with db_read_session() as session: + properties = ( + session.query(PropertyModel) + .filter(PropertyModel.portfolio_id == PORTFOLIO_ID) + .all() + ) + + plans = ( + session.query(Plan) + .filter(Plan.portfolio_id == PORTFOLIO_ID) + .all() + ) + + epcs = { + e.property_id: e + for e in ( + session.query(PropertyDetailsEpcModel) + .join(PropertyModel) + .filter(PropertyModel.portfolio_id == PORTFOLIO_ID) + .all() + ) + } + + sap_adjustments = get_installed_sap_adjustments_by_uprn_for_portfolio( + session, + PORTFOLIO_ID, + ) + + property_updates = compute_property_sap_updates( + properties, + sap_adjustments, + ) + + property_updates_by_id = { + u["property_id"]: u + for u in property_updates + } + + properties_by_id = {p.id: p for p in properties} + + plan_updates = compute_plan_updates( + session, + plans, + properties_by_id, + epcs, + property_updates_by_id, + ) + +# When ready to run! +persist_property_sap_updates(property_updates_by_id) +persist_plan_updates(plan_updates) diff --git a/sfr/principal_pitch/2_export_data.py b/sfr/principal_pitch/2_export_data.py index 7574414c..89c29ce4 100644 --- a/sfr/principal_pitch/2_export_data.py +++ b/sfr/principal_pitch/2_export_data.py @@ -3,11 +3,14 @@ This script prepares the data for the financial model """ import pandas as pd +import numpy as np from backend.app.utils import sap_to_epc from sqlalchemy.orm import sessionmaker -from backend.app.db.connection import db_engine -from backend.app.db.models.recommendations import Recommendation, Plan, PlanRecommendations +from backend.app.db.connection import db_engine, db_read_session +from backend.app.db.models.recommendations import Recommendation, Plan, PlanRecommendations, RecommendationMaterials from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel, PropertyDetailsSpatial +from backend.app.db.functions.materials_functions import get_materials +from collections import defaultdict # PORTFOLIO_ID = 206 # SCENARIOS = [389] @@ -18,6 +21,7 @@ SCENARIOS = [ 862, # EPC B - No solid floor, ASHP COP 3.0 861, # EPC C, No EWI/IWI, No Solid Floor, ASHP 3.0 COP 859, # EPC C - no solid floor, ashp 3.0 + 885, # EPC B - fabric first, no solid floor, ashp 3.0 ] scenario_names = { 871: "EPC C, fabric first, no solid floor, ashp 3.0", @@ -25,6 +29,7 @@ scenario_names = { 862: "EPC B, No solid floor, ASHP COP 3.0", 861: "EPC C, No EWI IWI, No Solid Floor, ASHP 3.0 COP", 859: "EPC C, no solid floor, ashp 3.0", + 885: "EPC B, fabric first, no solid floor, ashp 3.0" } @@ -32,60 +37,97 @@ def get_data(portfolio_id, scenario_ids): session = sessionmaker(bind=db_engine)() session.begin() - # Get properties and their details for a specific portfolio + # -------------------- + # Properties + # -------------------- properties_query = session.query( PropertyModel, PropertyDetailsEpcModel ).join( - PropertyDetailsEpcModel, PropertyModel.id == PropertyDetailsEpcModel.property_id + PropertyDetailsEpcModel, + PropertyModel.id == PropertyDetailsEpcModel.property_id ).filter( - PropertyModel.portfolio_id == portfolio_id # Filter by portfolio ID + PropertyModel.portfolio_id == portfolio_id ).all() - # Transform properties data to include all fields dynamically properties_data = [ - {**{col.name: getattr(prop.PropertyModel, col.name) for col in PropertyModel.__table__.columns}, - **{col.name: getattr(prop.PropertyDetailsEpcModel, col.name) for col in - PropertyDetailsEpcModel.__table__.columns}} - for prop in properties_query + { + **{col.name: getattr(p.PropertyModel, col.name) + for col in PropertyModel.__table__.columns}, + **{col.name: getattr(p.PropertyDetailsEpcModel, col.name) + for col in PropertyDetailsEpcModel.__table__.columns}, + } + for p in properties_query ] - # Get property IDs from fetched properties + # -------------------- + # Plans + # -------------------- + plans_query = session.query(Plan).filter( + Plan.scenario_id.in_(scenario_ids) + ).all() - # Get plans linked to the fetched properties - plans_query = session.query(Plan).filter(Plan.scenario_id.in_(scenario_ids)).all() - - # Transform plans data to include all fields dynamically plans_data = [ {col.name: getattr(plan, col.name) for col in Plan.__table__.columns} for plan in plans_query ] - # Extract plan IDs for filtering recommendations through PlanRecommendations - plan_ids = [plan['id'] for plan in plans_data] + plan_ids = [p["id"] for p in plans_data] - # Get recommendations through PlanRecommendations for those plans and that are default + # -------------------- + # Recommendations (NO materials yet) + # -------------------- recommendations_query = session.query( Recommendation, Plan.scenario_id ).join( - PlanRecommendations, Recommendation.id == PlanRecommendations.recommendation_id + PlanRecommendations, + Recommendation.id == PlanRecommendations.recommendation_id ).join( - Plan, Plan.id == PlanRecommendations.plan_id # Join with Plan to access scenario_id + Plan, + Plan.id == PlanRecommendations.plan_id ).filter( PlanRecommendations.plan_id.in_(plan_ids), - Recommendation.default == True # Filtering for default recommendations + Recommendation.default.is_(True) ).all() - # Transform recommendations data to include all fields dynamically and include scenario_id recommendations_data = [ - {**{col.name: getattr(rec.Recommendation, col.name) if hasattr(rec, 'Recommendation') else getattr(rec, - col.name) for - col in Recommendation.__table__.columns}, - "Scenario ID": rec.scenario_id} - for rec in recommendations_query + { + **{col.name: getattr(r.Recommendation, col.name) + for col in Recommendation.__table__.columns}, + "scenario_id": r.scenario_id, + "materials": [] # placeholder + } + for r in recommendations_query ] + recommendation_ids = [r["id"] for r in recommendations_data] + + # -------------------- + # Recommendation materials (SEPARATE QUERY) + # -------------------- + materials_query = session.query( + RecommendationMaterials + ).filter( + RecommendationMaterials.recommendation_id.in_(recommendation_ids) + ).all() + + # Group materials by recommendation_id + materials_by_recommendation = defaultdict(list) + + for m in materials_query: + materials_by_recommendation[m.recommendation_id].append({ + "material_id": m.material_id, + "depth": m.depth, + "quantity": m.quantity, + "quantity_unit": m.quantity_unit, + "estimated_cost": m.estimated_cost, + }) + + # Attach materials safely (no filtering side effects) + for r in recommendations_data: + r["materials"] = materials_by_recommendation.get(r["id"], []) + session.close() return properties_data, plans_data, recommendations_data @@ -97,6 +139,40 @@ properties_df = pd.DataFrame(properties_data) plans_df = pd.DataFrame(plans_data) recommendations_df = pd.DataFrame(recommendations_data) +with db_read_session() as session: + materials = get_materials(session) + +materials = pd.DataFrame(materials) + +material_lookup = ( + materials + .set_index("id")[["type", "includes_battery"]] + .to_dict("index") +) + + +def has_solar_with_battery(materials_list): + for m in materials_list or []: + mat = material_lookup.get(m["material_id"]) + if not mat: + continue + if mat["type"] == "solar_pv" and mat["includes_battery"]: + return True + return False + + +recommendations_df["has_solar_with_battery"] = ( + recommendations_df["materials"].apply(has_solar_with_battery) +) + +recommendations_df["measure_type"] = np.where( + recommendations_df["has_solar_with_battery"] == True, + recommendations_df["measure_type"] + "_with_battery", + recommendations_df["measure_type"] +) + +# Adjust material type to indicate if there is a battery included + from utils.s3 import read_csv_from_s3, read_excel_from_s3 # asset_list = read_excel_from_s3( @@ -107,13 +183,13 @@ from utils.s3 import read_csv_from_s3, read_excel_from_s3 for scenario_id in SCENARIOS: # Get recs for this scenario - recommended_measures_df = recommendations_df[recommendations_df["Scenario ID"] == scenario_id][ + recommended_measures_df = recommendations_df[recommendations_df["scenario_id"] == scenario_id][ ["property_id", "measure_type", "estimated_cost", "default"] ] recommended_measures_df = recommended_measures_df[recommended_measures_df["default"]] recommended_measures_df = recommended_measures_df.drop(columns=["default"]) - post_install_sap = recommendations_df[recommendations_df["Scenario ID"] == scenario_id][ + post_install_sap = recommendations_df[recommendations_df["scenario_id"] == scenario_id][ ["property_id", "default", "sap_points"]] post_install_sap = post_install_sap[post_install_sap["default"]] # Sum up the sap points by property id From fb6fca896627dd932ac157ca504586b926024052 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 8 Jan 2026 12:18:40 +0000 Subject: [PATCH 160/202] added additional loggging into engine --- .../db/functions/recommendations_functions.py | 8 +- backend/app/db/models/portfolio.py | 12 + backend/engine/engine.py | 8 + .../g_rebaselining_installed_measrues.py | 405 +++++++++++++++--- .../h_reset_estimated_epcs.py | 156 +++++++ 5 files changed, 515 insertions(+), 74 deletions(-) create mode 100644 etl/customers/peabody/Nov 2025 Consulting Project/h_reset_estimated_epcs.py diff --git a/backend/app/db/functions/recommendations_functions.py b/backend/app/db/functions/recommendations_functions.py index 726e919c..ae178c8a 100644 --- a/backend/app/db/functions/recommendations_functions.py +++ b/backend/app/db/functions/recommendations_functions.py @@ -1,15 +1,11 @@ from sqlalchemy import text -from sqlalchemy import insert, delete, select +from sqlalchemy import insert, delete from sqlalchemy.orm import Session from sqlalchemy.exc import SQLAlchemyError from backend.app.db.models.recommendations import ( Plan, Recommendation, RecommendationMaterials, PlanRecommendations, Scenario ) -from backend.app.db.models.portfolio import ( - PropertyModel, PropertyTargetsModel, PropertyDetailsEpcModel -) -from backend.app.db.models.funding import FundingPackageMeasures, FundingPackage -from backend.app.db.models.inspections import InspectionModel +from backend.app.db.models.portfolio import PropertyModel from backend.app.db.connection import db_session, db_read_session diff --git a/backend/app/db/models/portfolio.py b/backend/app/db/models/portfolio.py index e17e5856..d151bdc4 100644 --- a/backend/app/db/models/portfolio.py +++ b/backend/app/db/models/portfolio.py @@ -192,6 +192,18 @@ class PropertyDetailsEpcModel(Base): gas_standing_charge = Column(Float) electricity_standing_charge = Column(Float) + # Columns for re-baselining if we have an already installed measure + original_co2_emissions = Column(Float) + original_primary_energy_consumption = Column(Float) + original_current_energy_demand = Column(Float) + original_current_energy_demand_heating_hotwater = Column(Float) + # Adjustments + installed_measures_co2_adjustment = Column(Float) + installed_measures_energy_demand_adjustment = Column(Float) + installed_measures_total_energy_bill_adjustment = Column(Float) + installed_measures_heat_demand_adjustment = Column(Float) + is_epc_adjusted_for_installed_measures = Column(Boolean, default=False) + class PropertyDetailsSpatial(Base): __tablename__ = "property_details_spatial" diff --git a/backend/engine/engine.py b/backend/engine/engine.py index f6990c5c..9a9c30a2 100644 --- a/backend/engine/engine.py +++ b/backend/engine/engine.py @@ -535,12 +535,14 @@ async def model_engine(body: PlanTriggerRequest): logger.info("Getting the inputs") if body.file_type == "xlsx": + logger.info("Getting the plan input") plan_input = read_excel_from_s3( bucket_name=get_settings().PLAN_TRIGGER_BUCKET, file_key=body.trigger_file_path, sheet_name=body.sheet_name, header_row=0, ) + logger.into("Got the plan input from excel") # We now handle the case where the input data is a Domna standardised assset list if body.file_format == "domna_asset_list": @@ -619,9 +621,11 @@ async def model_engine(body: PlanTriggerRequest): raise ValueError("Other formats not yet supported") else: + logger.info("Getting the plan input from csv") plan_input = read_csv_from_s3( bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.trigger_file_path ) + logger.info("Got the plan input from csv") # We then slide it on the indexes if they are provided if body.index_start is not None and body.index_end is not None: @@ -640,12 +644,14 @@ async def model_engine(body: PlanTriggerRequest): if "domna_valuation" in plan_input[0]: valuation_data = [{"uprn": x["uprn"], "valuation": x["domna_valuation"]} for x in plan_input] + logger.info("Getting cleaning_data") cleaning_data = read_dataframe_from_s3_parquet( bucket_name=get_settings().DATA_BUCKET, file_key="sap_change_model/cleaning_dataset.parquet", ) # Prepare input data addresses = Addresses.from_plan_input(plan_input, body) + logger.info("Checking database for existing properties") uprns = addresses.get_uprns() landlord_ids = addresses.get_landlord_ids() @@ -670,6 +676,7 @@ async def model_engine(body: PlanTriggerRequest): if key not in property_lookup: to_create.append(addr) + logger.info("Checking database for EPC cache") # Pre-requests to the db with db_read_session() as session: epc_cache_by_uprn = db_funcs.epc_functions.EpcStoreService.get_epcs_for_uprns(session, uprns) @@ -679,6 +686,7 @@ async def model_engine(body: PlanTriggerRequest): ) # If we have properties that need to be created, we cerate them in bulk + logger.info("Determine new properties to be created") new_property_ids = set() if to_create: logger.info("Creating %d new properties", len(to_create)) diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/g_rebaselining_installed_measrues.py b/etl/customers/peabody/Nov 2025 Consulting Project/g_rebaselining_installed_measrues.py index bc628630..d310ffa4 100644 --- a/etl/customers/peabody/Nov 2025 Consulting Project/g_rebaselining_installed_measrues.py +++ b/etl/customers/peabody/Nov 2025 Consulting Project/g_rebaselining_installed_measrues.py @@ -10,6 +10,10 @@ from typing import Dict, List, Set from recommendations.Costs import Costs from backend.app.db.models.portfolio import Epc +pd.set_option('display.max_rows', 500) +pd.set_option('display.max_columns', 500) +pd.set_option('display.width', 1000) + def get_all_data(portfolio_id, scenario_ids): session = sessionmaker(bind=db_engine)() @@ -140,70 +144,65 @@ sustainability_data = pd.read_excel( # "No solid floor, ASHP COP 3.0.xlsx" # ) -# recommendations_df2 = recommendations_df2.merge( -# properties_df[["id", "uprn"]], -# left_on="property_id", -# right_on="id", -# how="left" -# ).rename(columns={"id_x": "id"}).drop(columns=["id_y"]) -# recommendations_df["uprn"] = recommendations_df["uprn"].astype(int).astype(str) - # We just need all of the measure types, per property -recommendation_measure_types = recommendations_df[["property_id", "measure_type"]].drop_duplicates() +recommendation_measure_types = recommendations_df[ + ["property_id", "measure_type" + , "sap_points", "heat_demand", "kwh_savings", "co2_equivalent_savings", + "energy_cost_savings" + ] +].drop_duplicates() recommendation_measure_types["flag"] = True # We pivot -recommendations_measures_pivot = recommendation_measure_types.pivot( +recommendations_measures_pivot = recommendation_measure_types[ + ["property_id", "measure_type", "flag"] +].drop_duplicates().pivot( index='property_id', columns='measure_type', values='flag' ) recommendations_measures_pivot = recommendations_measures_pivot.reset_index() -# Create a total cost column -recommendations_total_cost = recommendations_df.groupby("property_id")["estimated_cost"].sum().reset_index() -recommendations_measures_pivot = recommendations_measures_pivot.merge( - recommendations_total_cost, how="left", on="property_id" -) - properties_to_recs = properties_df.rename(columns={"solar_pv": "solar_data"}).merge( - recommendations_measures_pivot, how="left", left_on="id", right_on="property_id" + recommendations_measures_pivot, how="left", on="property_id" ) -properties_to_recs["estimated_cost"] = properties_to_recs["estimated_cost"].fillna(0) -sustainability_data["has_cavity_insulation"] = sustainability_data["Wall Insulation"].isin( +sustainability_data["cavity_wall_insulation"] = sustainability_data["Wall Insulation"].isin( ["FilledCavity", "FilledCavityPlusInternal", "FilledCavityPlusExternal"] ) -sustainability_data["has_iwi"] = sustainability_data["Wall Insulation"].isin( +sustainability_data["internal_wall_insulation"] = sustainability_data["Wall Insulation"].isin( ["Internal", "FilledCavityPlusInternal"] ) -sustainability_data["has_ewi"] = sustainability_data["Wall Insulation"].isin( +sustainability_data["external_wall_insulation"] = sustainability_data["Wall Insulation"].isin( ["External", "FilledCavityPlusExternal"] ) -sustainability_data["has_loft_insulation"] = sustainability_data["Roof Insulation"].isin( +sustainability_data["loft_insulation"] = sustainability_data["Roof Insulation"].isin( ["mm300", "mm250"] ) -sustainability_data["has_glazing"] = sustainability_data["Glazing"].isin( +sustainability_data["double_glazing"] = sustainability_data["Glazing"].isin( + ["Double 2002 or later", "Double but age unknown", "Triple", "DoubleKnownData", "Secondary", "TripleKnownData"] +) +sustainability_data["secondary_glazing"] = sustainability_data["Glazing"].isin( ["Double 2002 or later", "Double but age unknown", "Triple", "DoubleKnownData", "Secondary", "TripleKnownData"] ) -sustainability_data["has_floor_insulation"] = sustainability_data["Floor Insulation"].isin( +sustainability_data["suspended_floor_insulation"] = sustainability_data["Floor Insulation"].isin( ["RetroFitted"] ) -sustainability_data["has_efficient_boiler"] = ( +sustainability_data["boiler_upgrade"] = ( sustainability_data["Heating"].isin(["Boilers"]) & sustainability_data["Boiler Efficiency"].isin(["A"]) ) -sustainability_data["has_ashp"] = (sustainability_data["Heating"].isin(["Heat pumps (wet)"])) +sustainability_data["air_source_heat_pump"] = (sustainability_data["Heating"].isin(["Heat pumps (wet)"])) -sustainability_data["has_top_heat_controls"] = ( +sustainability_data["time_temperature_zone_control"] = ( sustainability_data["Controls Adequacy"].isin(["Top Spec"]) ) -sustainability_data["has_optimal_heat_controls"] = ( +sustainability_data["roomstat_programmer_trvs"] = ( sustainability_data["Controls Adequacy"].isin(["Optimal"]) ) -sustainability_data["has_flat_roof_insulation"] = ( +sustainability_data["flat_roof_insulation"] = ( (sustainability_data["Roof Construction"] == "Flat") & (sustainability_data["Roof Insulation"].isin(["mm50", "mm150", "mm100"])) ) @@ -218,22 +217,23 @@ comparison = sustainability_data.merge( ], left_on="UPRN", right_on="uprn", - how="left" + how="left", + suffixes=("", "_from_recs") ) # Flag entries where we've been told that walls are already insulated, but we have recommendations for wall insulation # ------------ Walls ------------ comparison["conflict_cavity_wall_insulation"] = ( - (comparison["has_cavity_insulation"]) & - (pd.isnull(comparison["cavity_wall_insulation"]) == False) + (comparison["cavity_wall_insulation"]) & + (pd.isnull(comparison["cavity_wall_insulation_from_recs"]) == False) ) comparison["conflict_iwi_wall_insulation"] = ( - (comparison["has_iwi"]) & - (pd.isnull(comparison["internal_wall_insulation"]) == False) + (comparison["internal_wall_insulation"]) & + (pd.isnull(comparison["internal_wall_insulation_from_recs"]) == False) ) comparison["conflict_ewi_wall_insulation"] = ( - (comparison["has_ewi"]) & - (pd.isnull(comparison["internal_wall_insulation"]) == False) + (comparison["external_wall_insulation"]) & + (pd.isnull(comparison["external_wall_insulation_from_recs"]) == False) ) cwi_conflicting = comparison[comparison["conflict_cavity_wall_insulation"] == True] @@ -241,59 +241,66 @@ iwi_conflicting = comparison[comparison["conflict_iwi_wall_insulation"] == True] ewi_conflicting = comparison[comparison["conflict_ewi_wall_insulation"] == True] # ------------ Roof ------------ -comparison["conflict_roof_insulation"] = ( - (comparison["has_loft_insulation"]) & - (pd.isnull(comparison["loft_insulation"]) == False) +comparison["conflict_loft_insulation"] = ( + (comparison["loft_insulation"]) & + (pd.isnull(comparison["loft_insulation_from_recs"]) == False) ) -loft_conflicting = comparison[comparison["conflict_roof_insulation"] == True] +loft_conflicting = comparison[comparison["conflict_loft_insulation"] == True] # ------------ Windows ------------ comparison["conflict_double_glazing"] = ( - (comparison["has_glazing"]) & + (comparison["double_glazing"]) & ( - (pd.isnull(comparison["double_glazing"]) == False) | (pd.isnull(comparison["secondary_glazing"]) == False) + (pd.isnull(comparison["double_glazing_from_recs"]) == False) ) ) -windows_conflicting = comparison[comparison["conflict_double_glazing"] == True] +comparison["conflict_secondary_glazing"] = ( + (comparison["secondary_glazing"]) & + ( + (pd.isnull(comparison["secondary_glazing_from_recs"]) == False) + ) +) +double_glazing_conflicting = comparison[comparison["conflict_double_glazing"] == True] +secondary_glazing_conflicting = comparison[comparison["conflict_secondary_glazing"] == True] # ------------ Floors ------------ comparison["conflict_suspended_floor_insulation"] = ( - (comparison["has_floor_insulation"]) & - (pd.isnull(comparison["suspended_floor_insulation"]) == False) + (comparison["suspended_floor_insulation"]) & + (pd.isnull(comparison["suspended_floor_insulation_from_recs"]) == False) ) floors_conflicting = comparison[comparison["conflict_suspended_floor_insulation"] == True] # ------------ Boiler Upgrade ------------ comparison["conflict_boiler_upgrade"] = ( - (comparison["has_efficient_boiler"]) & - (pd.isnull(comparison["boiler_upgrade"]) == False) + (comparison["boiler_upgrade"]) & + (pd.isnull(comparison["boiler_upgrade_from_recs"]) == False) ) boiler_conflicting = comparison[comparison["conflict_boiler_upgrade"] == True] # ------------ ASHP ------------ comparison["conflict_air_source_heat_pump"] = ( - (comparison["has_ashp"]) & - (pd.isnull(comparison["air_source_heat_pump"]) == False) + (comparison["air_source_heat_pump"]) & + (pd.isnull(comparison["air_source_heat_pump_from_recs"]) == False) ) ashp_conflicting = comparison[comparison["conflict_air_source_heat_pump"] == True] # ------------ heat controls ------------ comparison["conflict_time_temperature_zone_control"] = ( - (comparison["has_top_heat_controls"]) & - (pd.isnull(comparison["time_temperature_zone_control"]) == False) + (comparison["time_temperature_zone_control"]) & + (pd.isnull(comparison["time_temperature_zone_control_from_recs"]) == False) ) comparison["conflict_roomstat_programmer_trvs"] = ( - (comparison["has_optimal_heat_controls"]) & - (pd.isnull(comparison["roomstat_programmer_trvs"]) == False) + (comparison["roomstat_programmer_trvs"]) & + (pd.isnull(comparison["roomstat_programmer_trvs_from_recs"]) == False) ) ttzc_conflicting = comparison[comparison["conflict_time_temperature_zone_control"] == True] rst_conflicting = comparison[comparison["conflict_roomstat_programmer_trvs"] == True] # ------------ Flat Roof Insulation ----------- comparison["conflict_flat_roof_insulation"] = ( - (comparison["has_flat_roof_insulation"]) & - (pd.isnull(comparison["flat_roof_insulation"]) == False) + (comparison["flat_roof_insulation"]) & + (pd.isnull(comparison["flat_roof_insulation_from_recs"]) == False) ) flat_roof_conflicting = comparison[comparison["conflict_flat_roof_insulation"] == True] @@ -304,7 +311,8 @@ all_conflicts = pd.concat( iwi_conflicting, ewi_conflicting, loft_conflicting, - windows_conflicting, + double_glazing_conflicting, + secondary_glazing_conflicting, floors_conflicting, boiler_conflicting, ashp_conflicting, @@ -314,8 +322,100 @@ all_conflicts = pd.concat( ] ) -all_conflicts["UPRN"].nunique() +all_conflicts = all_conflicts[ + [ + "uprn", + 'conflict_cavity_wall_insulation', + 'conflict_iwi_wall_insulation', + 'conflict_ewi_wall_insulation', + 'conflict_loft_insulation', + 'conflict_double_glazing', + 'conflict_secondary_glazing', + 'conflict_suspended_floor_insulation', 'conflict_boiler_upgrade', + 'conflict_air_source_heat_pump', + 'conflict_time_temperature_zone_control', 'conflict_roomstat_programmer_trvs', 'conflict_flat_roof_insulation'] +] +all_conflicts = all_conflicts.rename( + columns={ + "conflict_cavity_wall_insulation": "cavity_wall_insulation", + "conflict_iwi_wall_insulation": "internal_wall_insulation", + "conflict_ewi_wall_insulation": "external_wall_insulation", + "conflict_loft_insulation": "loft_insulation", + "conflict_double_glazing": "double_glazing", + "conflict_secondary_glazing": "secondary_glazing", + "conflict_suspended_floor_insulation": "suspended_floor_insulation", + "conflict_boiler_upgrade": "boiler_upgrade", + "conflict_air_source_heat_pump": "air_source_heat_pump", + "conflict_time_temperature_zone_control": "time_temperature_zone_control", + "conflict_roomstat_programmer_trvs": "roomstat_programmer_trvs", + "conflict_flat_roof_insulation": "flat_roof_insulation" + + } +) + +# Reshape by UPRN by melting +all_conflicts = all_conflicts.melt( + id_vars=["uprn"], + var_name="measure_type", + value_name="already_installed" +) + +recommendations_df["property_id"] = recommendations_df["property_id"].astype(int).astype(str) +properties_df["property_id"] = properties_df["property_id"].astype(int).astype(str) + +recs_with_uprn = recommendations_df.merge( + properties_df[["property_id", "uprn"]], + on="property_id", + how="left", + suffixes=("", "_prop") +) + +recs_with_uprn = ( + recs_with_uprn + .sort_values("sap_points", ascending=False) + .groupby(["uprn", "measure_type"], as_index=False) + .first() +) + +recs_with_uprn["uprn"] = recs_with_uprn["uprn"].astype(str) + +installed_measures_df = all_conflicts.merge( + recs_with_uprn[["uprn", "measure_type", "sap_points", "heat_demand", "kwh_savings", "co2_equivalent_savings", + "energy_cost_savings"]], + how="left", + on=["uprn", "measure_type"] +) + +installed_measures_df = installed_measures_df[installed_measures_df["already_installed"] == True] + +for col in ["sap_points", "heat_demand", "kwh_savings", "co2_equivalent_savings", "energy_cost_savings"]: + print(f"n missings for {col}: {pd.isnull(installed_measures_df[col]).sum()}", ) + +# Do some calcs on SAP impact +sap_impact = installed_measures_df.groupby(["uprn"])["sap_points"].sum().reset_index() +properties_sap = properties_df[["uprn", "current_sap_points", "current_epc_rating"]].copy() +properties_sap["uprn"] = properties_sap["uprn"].astype(str) + +old_sap_vs_new = properties_sap.merge( + sap_impact, how="inner", on="uprn" +) +old_sap_vs_new["new_sap_points"] = old_sap_vs_new["current_sap_points"] + old_sap_vs_new["sap_points"] +old_sap_vs_new["new_epc_rating"] = old_sap_vs_new["new_sap_points"].apply( + lambda x: sap_to_epc(x) +) +# How many properties go from below C to above +old_sap_vs_new[old_sap_vs_new["current_sap_points"] < 69]["new_epc_rating"].value_counts() +changed = old_sap_vs_new[ + (old_sap_vs_new["current_sap_points"] < 69) & (old_sap_vs_new["new_sap_points"] >= 69) + ] +properties_df[properties_df["current_sap_points"] < 69].shape + +old_sap_vs_new[old_sap_vs_new["current_epc_rating"].isin(["Epc.F", "Epc.G"])] + +25979 - 3891 + +sustainability_data[sustainability_data["UPRN"] == "100021204260"] # What do I need to do: # TODO: - need to get a view of "all" measures for the property, not just recommended. We can do this but just looking @@ -330,12 +430,26 @@ all_conflicts["UPRN"].nunique() ### Rebaselining -def get_installed_sap_adjustments_by_uprn_for_portfolio( +from typing import Dict +from sqlalchemy import func + + +def get_installed_measure_adjustments_by_uprn_for_portfolio( session, portfolio_id: int, -) -> Dict[int, float]: +) -> Dict[int, dict]: """ - Returns { uprn -> total_sap_delta } + Returns per-UPRN installed-measure adjustments. + + { + uprn: { + sap_points: float, + co2: float, + energy_kwh: float, + energy_bill: float, + heat_demand: float, + } + } """ uprn_subquery = ( @@ -347,8 +461,22 @@ def get_installed_sap_adjustments_by_uprn_for_portfolio( rows = ( session.query( - InstalledMeasure.uprn, - func.coalesce(func.sum(InstalledMeasure.sap_points), 0.0), + InstalledMeasure.uprn.label("uprn"), + + func.coalesce(func.sum(InstalledMeasure.sap_points), 0.0) + .label("sap_points"), + + func.coalesce(func.sum(InstalledMeasure.carbon_savings), 0.0) + .label("co2"), + + func.coalesce(func.sum(InstalledMeasure.kwh_savings), 0.0) + .label("energy_kwh"), + + func.coalesce(func.sum(InstalledMeasure.bill_savings), 0.0) + .label("energy_bill"), + + func.coalesce(func.sum(InstalledMeasure.heat_demand_savings), 0.0) + .label("heat_demand"), ) .filter(InstalledMeasure.is_active.is_(True)) .filter(InstalledMeasure.uprn.in_(uprn_subquery)) @@ -356,7 +484,16 @@ def get_installed_sap_adjustments_by_uprn_for_portfolio( .all() ) - return {uprn: float(delta) for uprn, delta in rows} + return { + row.uprn: { + "sap_points": float(row.sap_points), + "co2": float(row.co2), + "energy_kwh": float(row.energy_kwh), + "energy_bill": float(row.energy_bill), + "heat_demand": float(row.heat_demand), + } + for row in rows + } def get_installed_measure_types_by_uprn( @@ -608,6 +745,62 @@ def persist_property_sap_updates( print(f"✅ Updated {len(properties)} properties") +def compute_epc_rebasing_updates( + epcs: Dict[int, PropertyDetailsEpcModel], + properties_by_id: Dict[int, PropertyModel], + installed_adjustments_by_uprn: Dict[int, dict], +) -> Dict[int, dict]: + """ + Computes EPC rebasing updates without mutating DB objects. + Keyed by property_id. + """ + + updates: Dict[int, dict] = {} + + for property_id, epc in epcs.items(): + prop = properties_by_id.get(property_id) + if not prop or prop.uprn is None: + continue + + adj = installed_adjustments_by_uprn.get(prop.uprn) + if not adj: + continue + + updates[property_id] = { + "property_id": property_id, + + # Originals (only set once) + "original_co2_emissions": ( + epc.original_co2_emissions + if epc.original_co2_emissions is not None + else epc.co2_emissions + ), + "original_primary_energy_consumption": ( + epc.original_primary_energy_consumption + if epc.original_primary_energy_consumption is not None + else epc.primary_energy_consumption + ), + "original_current_energy_demand": ( + epc.original_current_energy_demand + if epc.original_current_energy_demand is not None + else epc.current_energy_demand + ), + "original_current_energy_demand_heating_hotwater": ( + epc.original_current_energy_demand_heating_hotwater + if epc.original_current_energy_demand_heating_hotwater is not None + else epc.current_energy_demand_heating_hotwater + ), + + # Adjustments (always re-applied from originals) + "installed_measures_co2_adjustment": adj["co2"], + "installed_measures_energy_demand_adjustment": adj["energy_kwh"], + "installed_measures_total_energy_bill_adjustment": adj["energy_bill"], + "installed_measures_heat_demand_adjustment": adj["heat_demand"], + } + + return updates + + def persist_plan_updates(plan_updates: list[dict]): """ Writes recalculated plan metrics. @@ -654,6 +847,74 @@ def persist_plan_updates(plan_updates: list[dict]): print(f"✅ Updated {len(plans)} plans") +def persist_epc_rebasing_updates( + epc_updates_by_property_id: Dict[int, dict], +): + """ + Overwrites EPC metrics using installed-measure rebasing. + Safe to re-run. + """ + + with db_session() as session: + epcs = ( + session.query(PropertyDetailsEpcModel) + .filter( + PropertyDetailsEpcModel.property_id.in_( + epc_updates_by_property_id.keys() + ) + ) + .all() + ) + + for epc in epcs: + u = epc_updates_by_property_id[epc.property_id] + + # Store originals once + epc.original_co2_emissions = u["original_co2_emissions"] + epc.original_primary_energy_consumption = ( + u["original_primary_energy_consumption"] + ) + epc.original_current_energy_demand = ( + u["original_current_energy_demand"] + ) + epc.original_current_energy_demand_heating_hotwater = ( + u["original_current_energy_demand_heating_hotwater"] + ) + + # Apply rebased values + epc.co2_emissions = ( + u["original_co2_emissions"] + - u["installed_measures_co2_adjustment"] + ) + + epc.primary_energy_consumption = ( + u["original_primary_energy_consumption"] + - u["installed_measures_heat_demand_adjustment"] + ) + + epc.current_energy_demand = ( + u["original_current_energy_demand"] + - u["installed_measures_energy_demand_adjustment"] + ) + + # Flags + audit fields + epc.installed_measures_co2_adjustment = ( + u["installed_measures_co2_adjustment"] + ) + epc.installed_measures_energy_demand_adjustment = ( + u["installed_measures_energy_demand_adjustment"] + ) + epc.installed_measures_total_energy_bill_adjustment = ( + u["installed_measures_total_energy_bill_adjustment"] + ) + epc.installed_measures_heat_demand_adjustment = ( + u["installed_measures_heat_demand_adjustment"] + ) + epc.is_epc_adjusted_for_installed_measures = True + + print(f"✅ Updated {len(epcs)} EPC records") + + # ------------------------------------------------------------ # EXECUTION (DRY RUN) # ------------------------------------------------------------ @@ -684,22 +945,29 @@ with db_read_session() as session: ) } - sap_adjustments = get_installed_sap_adjustments_by_uprn_for_portfolio( - session, - PORTFOLIO_ID, + installed_adjustments = ( + get_installed_measure_adjustments_by_uprn_for_portfolio( + session, + PORTFOLIO_ID, + ) ) property_updates = compute_property_sap_updates( properties, - sap_adjustments, + {uprn: v["sap_points"] for uprn, v in installed_adjustments.items()} ) + properties_by_id = {p.id: p for p in properties} property_updates_by_id = { u["property_id"]: u for u in property_updates } - properties_by_id = {p.id: p for p in properties} + epc_updates = compute_epc_rebasing_updates( + epcs, + properties_by_id, + installed_adjustments, + ) plan_updates = compute_plan_updates( session, @@ -712,3 +980,4 @@ with db_read_session() as session: # When ready to run! persist_property_sap_updates(property_updates_by_id) persist_plan_updates(plan_updates) +persist_epc_rebasing_updates(epc_updates) diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/h_reset_estimated_epcs.py b/etl/customers/peabody/Nov 2025 Consulting Project/h_reset_estimated_epcs.py new file mode 100644 index 00000000..57b858ce --- /dev/null +++ b/etl/customers/peabody/Nov 2025 Consulting Project/h_reset_estimated_epcs.py @@ -0,0 +1,156 @@ +import pandas as pd +from sqlalchemy.orm import Session +from sqlalchemy import text, select +from backend.app.db.connection import db_read_session +from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel +from backend.app.db.models.recommendations import Plan + +PORTFOLIO_ID = 431 + +with db_read_session() as session: + # Get all properties from PropertyDetailsEpcModel, where estimated is True, for portfolio 419 + estimated_epcs = session.query(PropertyDetailsEpcModel).filter( + # PropertyDetailsEpcModel.estimated == True, + PropertyDetailsEpcModel.property_id.in_( + session.query(PropertyModel.id).filter(PropertyModel.portfolio_id == PORTFOLIO_ID) + ) + ).all() + + # Get the ids + estimated_epc_ids = [epc.property_id for epc in estimated_epcs] + +# I want to get the UPRNS for these properties, from the property model +with db_read_session() as session: + estimated_uprns = session.query(PropertyModel.uprn).filter( + PropertyModel.id.in_( + session.query(PropertyDetailsEpcModel.property_id).filter( + PropertyDetailsEpcModel.id.in_(estimated_epc_ids) + ) + ) + ).all() + + estimated_uprns_list = [uprn for (uprn,) in estimated_uprns] + +# Go the the SAL +sal_1 = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20251213 Model " + "data.xlsx", + sheet_name="Standardised Asset List" +) +sal_2 = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20260105 - additional " + "UPRNS.xlsx", + sheet_name="Standardised Asset List" +) + +sal = pd.concat([sal_1, sal_2]) +sal = sal.drop_duplicates(subset=['epc_os_uprn']) + +estimated_to_refresh = sal[sal["epc_os_uprn"].isin(estimated_uprns_list)].copy() + +SCENARIOS = [ + 871, # EPC C - fabric first, no solid floor, ashp 3.0 + 863, # EPC B, No EWI/IWI, No Solid Floor, ASHP 3.0 COP + 862, # EPC B - No solid floor, ASHP COP 3.0 + 861, # EPC C, No EWI/IWI, No Solid Floor, ASHP 3.0 COP + 859, # EPC C - no solid floor, ashp 3.0 + 885, # EPC B - fabric first, no solid floor, ashp 3.0 +] + +# Get all plans, associated to these properties - the property IDs are in estimated_epc_ids +with db_read_session() as session: + result = session.execute( + select(Plan.id, Plan.property_id) + .where(Plan.property_id.in_(estimated_epc_ids)) + ) + plans = [ + { + "plan_id": row.id, + "property_id": row.property_id, + } for row in result + ] + +df = pd.DataFrame(plans) +df = df.sort_values("property_id", ascending=True) + +agg = df.groupby("property_id").size().reset_index(name="n_plans") +agg = agg.sort_values("n_plans", ascending=True) +agg[agg["n_plans"] != 1] +assert all(agg["n_plans"] == 1) + + +def delete_plan_batch(session: Session, plan_ids: list[int]): + if not plan_ids: + return + + session.execute(text("SET LOCAL lock_timeout = '5s'")) + + params = {"plan_ids": plan_ids} + + # ---------------------------- + # recommendation_materials + # ---------------------------- + session.execute( + text(""" + DELETE FROM recommendation_materials rm + USING plan_recommendations pr + WHERE rm.recommendation_id = pr.recommendation_id + AND pr.plan_id = ANY(:plan_ids) + """), + params, + ) + + # ---------------------------- + # plan_recommendations + # ---------------------------- + session.execute( + text(""" + DELETE FROM plan_recommendations + WHERE plan_id = ANY(:plan_ids) + """), + params, + ) + + # ---------------------------- + # recommendations (only those used by these plans) + # ---------------------------- + session.execute( + text(""" + DELETE FROM recommendation r + WHERE r.id IN ( + SELECT DISTINCT recommendation_id + FROM plan_recommendations + WHERE plan_id = ANY(:plan_ids) + ) + """), + params, + ) + + # ---------------------------- + # plans LAST + # ---------------------------- + session.execute( + text(""" + DELETE FROM plan + WHERE id = ANY(:plan_ids) + """), + params, + ) + + +# Store the SAL +filename = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20260101 " + "sal.xlsx") + +with pd.ExcelWriter(filename) as writer: + sal.to_excel(writer, sheet_name="Standardised Asset List", index=False) + # Top 1000 for testing + sal.iloc[0:1000, :].to_excel(writer, sheet_name="batch 1", index=False) + # Batch 2 is the next 20,000 + sal.iloc[1000:21000, :].to_excel(writer, sheet_name="batch 2", index=False) + # Batch 3 is the next 20,000 + sal.iloc[21000:41000, :].to_excel(writer, sheet_name="batch 3", index=False) + + sal.iloc[41000:61000, :].to_excel(writer, sheet_name="batch 4", index=False) + sal.iloc[61000:81000, :].to_excel(writer, sheet_name="batch 5", index=False) + sal.iloc[81000:, :].to_excel(writer, sheet_name="batch 5", index=False) From d70f7ebf73b4769b47d18c28b04a80b1868ec7f5 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 8 Jan 2026 12:31:56 +0000 Subject: [PATCH 161/202] fixed typo --- backend/engine/engine.py | 2 +- .../Nov 2025 Consulting Project/h_reset_estimated_epcs.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/backend/engine/engine.py b/backend/engine/engine.py index 9a9c30a2..3ece966b 100644 --- a/backend/engine/engine.py +++ b/backend/engine/engine.py @@ -542,7 +542,7 @@ async def model_engine(body: PlanTriggerRequest): sheet_name=body.sheet_name, header_row=0, ) - logger.into("Got the plan input from excel") + logger.info("Got the plan input from excel") # We now handle the case where the input data is a Domna standardised assset list if body.file_format == "domna_asset_list": diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/h_reset_estimated_epcs.py b/etl/customers/peabody/Nov 2025 Consulting Project/h_reset_estimated_epcs.py index 57b858ce..65843ed3 100644 --- a/etl/customers/peabody/Nov 2025 Consulting Project/h_reset_estimated_epcs.py +++ b/etl/customers/peabody/Nov 2025 Consulting Project/h_reset_estimated_epcs.py @@ -75,8 +75,8 @@ df = df.sort_values("property_id", ascending=True) agg = df.groupby("property_id").size().reset_index(name="n_plans") agg = agg.sort_values("n_plans", ascending=True) -agg[agg["n_plans"] != 1] -assert all(agg["n_plans"] == 1) +agg[agg["n_plans"] != 2] +assert all(agg["n_plans"] == 2) def delete_plan_batch(session: Session, plan_ids: list[int]): From 82452150fc45054eb25d312fcd9b40f21fa65508 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 8 Jan 2026 12:57:00 +0000 Subject: [PATCH 162/202] try bump mem up to 4gb --- backend/engine/engine.py | 3 ++- .../Nov 2025 Consulting Project/h_reset_estimated_epcs.py | 4 ++-- serverless.yml | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/backend/engine/engine.py b/backend/engine/engine.py index 3ece966b..4f5ee3c1 100644 --- a/backend/engine/engine.py +++ b/backend/engine/engine.py @@ -703,7 +703,8 @@ async def model_engine(body: PlanTriggerRequest): property_lookup[("uprn", uprn)] = prop_id if landlord_property_id: property_lookup[("landlord_property_id", landlord_property_id)] = prop_id - + + logger.info("Processing each property for model input preparation") input_properties, inspections_map, eco_packages, epc_upserts = [], {}, {}, [] for addr, config in tqdm( zip(addresses, plan_input), diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/h_reset_estimated_epcs.py b/etl/customers/peabody/Nov 2025 Consulting Project/h_reset_estimated_epcs.py index 65843ed3..83799eff 100644 --- a/etl/customers/peabody/Nov 2025 Consulting Project/h_reset_estimated_epcs.py +++ b/etl/customers/peabody/Nov 2025 Consulting Project/h_reset_estimated_epcs.py @@ -75,8 +75,8 @@ df = df.sort_values("property_id", ascending=True) agg = df.groupby("property_id").size().reset_index(name="n_plans") agg = agg.sort_values("n_plans", ascending=True) -agg[agg["n_plans"] != 2] -assert all(agg["n_plans"] == 2) +agg[agg["n_plans"] != 3] +assert all(agg["n_plans"] == 3) def delete_plan_batch(session: Session, plan_ids: list[int]): diff --git a/serverless.yml b/serverless.yml index f3def028..9e57ca95 100644 --- a/serverless.yml +++ b/serverless.yml @@ -60,7 +60,7 @@ functions: image: uri: ${env:ECR_URI}:${env:GITHUB_SHA} timeout: 900 - memorySize: 3008 + memorySize: 4096 role: EngineLambdaRole events: - sqs: From 1d81db508df9dc3ef14d59f659900da889f7feb0 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 8 Jan 2026 13:13:41 +0000 Subject: [PATCH 163/202] testing moving runtime to function level --- serverless.yml | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/serverless.yml b/serverless.yml index 9e57ca95..8ce5347f 100644 --- a/serverless.yml +++ b/serverless.yml @@ -3,7 +3,6 @@ service: retrofit-platform provider: name: aws region: eu-west-2 - runtime: python3.11 architecture: x86_64 environment: API_KEY: ${env:API_KEY} @@ -46,7 +45,9 @@ custom: functions: + # ZIP-based Lambda (FastAPI) fastapi-backend: + runtime: python3.11 handler: backend.app.main.handler timeout: 600 memorySize: 512 @@ -56,6 +57,7 @@ functions: path: /{proxy+} method: ANY + # TRUE container-image Lambda (4 GB+ supported) model-engine-lambda: image: uri: ${env:ECR_URI}:${env:GITHUB_SHA} @@ -66,16 +68,16 @@ functions: - sqs: arn: arn:aws:sqs:${self:provider.region}:${aws:accountId}:model-engine-queue batchSize: 1 - maximumConcurrency: 12 # Heavily restricts concurrency to avoid overwhelming the ldmbda limits - + maximumConcurrency: 12 resources: Resources: + EngineQueue: Type: AWS::SQS::Queue Properties: QueueName: model-engine-queue - VisibilityTimeout: 910 # must be >= lambda timeout (900) + VisibilityTimeout: 910 FastApiLambdaRole: Type: AWS::IAM::Role From 15ca3fa571ea3e14cd1cfb7e3ad6d218bde627b2 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 8 Jan 2026 13:20:41 +0000 Subject: [PATCH 164/202] specify runtime in sls --- serverless.yml | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/serverless.yml b/serverless.yml index 8ce5347f..a8e64107 100644 --- a/serverless.yml +++ b/serverless.yml @@ -35,9 +35,11 @@ plugins: - serverless-domain-manager custom: - pythonRequirements: - fileName: backend/app/requirements/requirements.txt - dockerizePip: true + custom: + pythonRequirements: + fileName: backend/app/requirements/requirements.txt + dockerizePip: true + pythonBin: python3.11 customDomain: domainName: api.${self:provider.environment.DOMAIN_NAME} createRoute53Record: true From 8439600463449a3b6195d92464d549a19b2eedbc Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 8 Jan 2026 13:33:00 +0000 Subject: [PATCH 165/202] another try on sls --- serverless.yml | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/serverless.yml b/serverless.yml index a8e64107..c27c0658 100644 --- a/serverless.yml +++ b/serverless.yml @@ -1,9 +1,18 @@ service: retrofit-platform +frameworkVersion: '3' + provider: name: aws region: eu-west-2 architecture: x86_64 + + # REQUIRED for TRUE image Lambdas in Serverless v3 + ecr: + images: + modelEngineImage: + uri: ${env:ECR_URI}:${env:GITHUB_SHA} + environment: API_KEY: ${env:API_KEY} ENVIRONMENT: ${env:ENVIRONMENT} @@ -18,8 +27,6 @@ provider: DB_USERNAME: ${env:DB_USERNAME} DB_PASSWORD: ${env:DB_PASSWORD} DB_PORT: ${env:DB_PORT} - ECR_URI: ${env:ECR_URI} - GITHUB_SHA: ${env:GITHUB_SHA} SAP_PREDICTIONS_BUCKET: ${env:SAP_PREDICTIONS_BUCKET} CARBON_PREDICTIONS_BUCKET: ${env:CARBON_PREDICTIONS_BUCKET} HEAT_PREDICTIONS_BUCKET: ${env:HEAT_PREDICTIONS_BUCKET} @@ -35,11 +42,11 @@ plugins: - serverless-domain-manager custom: - custom: - pythonRequirements: - fileName: backend/app/requirements/requirements.txt - dockerizePip: true - pythonBin: python3.11 + pythonRequirements: + fileName: backend/app/requirements/requirements.txt + dockerizePip: true + pythonBin: python3.11 + customDomain: domainName: api.${self:provider.environment.DOMAIN_NAME} createRoute53Record: true @@ -47,7 +54,9 @@ custom: functions: - # ZIP-based Lambda (FastAPI) + # ───────────────────────────────────────────── + # ZIP-BASED FASTAPI LAMBDA + # ───────────────────────────────────────────── fastapi-backend: runtime: python3.11 handler: backend.app.main.handler @@ -59,10 +68,12 @@ functions: path: /{proxy+} method: ANY - # TRUE container-image Lambda (4 GB+ supported) + # ───────────────────────────────────────────── + # TRUE CONTAINER-IMAGE LAMBDA (4GB+ SUPPORTED) + # ───────────────────────────────────────────── model-engine-lambda: image: - uri: ${env:ECR_URI}:${env:GITHUB_SHA} + name: modelEngineImage timeout: 900 memorySize: 4096 role: EngineLambdaRole From ef350df6d8c1f42218a42ed0eb61a2f66627872d Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 8 Jan 2026 13:40:48 +0000 Subject: [PATCH 166/202] reverted --- serverless.yml | 31 ++++++++----------------------- 1 file changed, 8 insertions(+), 23 deletions(-) diff --git a/serverless.yml b/serverless.yml index c27c0658..f3def028 100644 --- a/serverless.yml +++ b/serverless.yml @@ -1,18 +1,10 @@ service: retrofit-platform -frameworkVersion: '3' - provider: name: aws region: eu-west-2 + runtime: python3.11 architecture: x86_64 - - # REQUIRED for TRUE image Lambdas in Serverless v3 - ecr: - images: - modelEngineImage: - uri: ${env:ECR_URI}:${env:GITHUB_SHA} - environment: API_KEY: ${env:API_KEY} ENVIRONMENT: ${env:ENVIRONMENT} @@ -27,6 +19,8 @@ provider: DB_USERNAME: ${env:DB_USERNAME} DB_PASSWORD: ${env:DB_PASSWORD} DB_PORT: ${env:DB_PORT} + ECR_URI: ${env:ECR_URI} + GITHUB_SHA: ${env:GITHUB_SHA} SAP_PREDICTIONS_BUCKET: ${env:SAP_PREDICTIONS_BUCKET} CARBON_PREDICTIONS_BUCKET: ${env:CARBON_PREDICTIONS_BUCKET} HEAT_PREDICTIONS_BUCKET: ${env:HEAT_PREDICTIONS_BUCKET} @@ -45,8 +39,6 @@ custom: pythonRequirements: fileName: backend/app/requirements/requirements.txt dockerizePip: true - pythonBin: python3.11 - customDomain: domainName: api.${self:provider.environment.DOMAIN_NAME} createRoute53Record: true @@ -54,11 +46,7 @@ custom: functions: - # ───────────────────────────────────────────── - # ZIP-BASED FASTAPI LAMBDA - # ───────────────────────────────────────────── fastapi-backend: - runtime: python3.11 handler: backend.app.main.handler timeout: 600 memorySize: 512 @@ -68,29 +56,26 @@ functions: path: /{proxy+} method: ANY - # ───────────────────────────────────────────── - # TRUE CONTAINER-IMAGE LAMBDA (4GB+ SUPPORTED) - # ───────────────────────────────────────────── model-engine-lambda: image: - name: modelEngineImage + uri: ${env:ECR_URI}:${env:GITHUB_SHA} timeout: 900 - memorySize: 4096 + memorySize: 3008 role: EngineLambdaRole events: - sqs: arn: arn:aws:sqs:${self:provider.region}:${aws:accountId}:model-engine-queue batchSize: 1 - maximumConcurrency: 12 + maximumConcurrency: 12 # Heavily restricts concurrency to avoid overwhelming the ldmbda limits + resources: Resources: - EngineQueue: Type: AWS::SQS::Queue Properties: QueueName: model-engine-queue - VisibilityTimeout: 910 + VisibilityTimeout: 910 # must be >= lambda timeout (900) FastApiLambdaRole: Type: AWS::IAM::Role From ef942ef18ab691217f489ac8a4dd0cedc4e0c05f Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sat, 10 Jan 2026 11:40:49 +0000 Subject: [PATCH 167/202] allow double glazing if no restrictions but confirmed performance of secondary glazing --- backend/app/db/functions/__init__.py | 1 + .../functions/already_installed_functions.py | 40 + .../db/functions/recommendations_functions.py | 3 +- backend/app/plan/data_classes.py | 1 - backend/app/plan/utils.py | 7 +- backend/engine/engine.py | 15 +- backend/onboarders/mappings/age_band.py | 14 + backend/onboarders/mappings/built_form.py | 15 + backend/onboarders/mappings/property_type.py | 6 + backend/onboarders/mappings/walls.py | 3 + backend/onboarders/parity.py | 95 ++ .../g_rebaselining_installed_measrues.py | 1020 ++++++++++++++--- .../h_reset_estimated_epcs.py | 56 +- .../i_testing_parity_data.py | 21 + .../j_installed_measures.py | 7 + recommendations/WindowsRecommendations.py | 10 +- 16 files changed, 1147 insertions(+), 167 deletions(-) create mode 100644 backend/app/db/functions/already_installed_functions.py create mode 100644 backend/onboarders/mappings/age_band.py create mode 100644 backend/onboarders/mappings/built_form.py create mode 100644 backend/onboarders/mappings/property_type.py create mode 100644 backend/onboarders/mappings/walls.py create mode 100644 backend/onboarders/parity.py create mode 100644 etl/customers/peabody/Nov 2025 Consulting Project/i_testing_parity_data.py create mode 100644 etl/customers/peabody/Nov 2025 Consulting Project/j_installed_measures.py diff --git a/backend/app/db/functions/__init__.py b/backend/app/db/functions/__init__.py index 0f239d6e..8e7495bf 100644 --- a/backend/app/db/functions/__init__.py +++ b/backend/app/db/functions/__init__.py @@ -10,3 +10,4 @@ from .materials_functions import * from .inspections_functions import * from .non_intrusive_surveys import * from .whlg_functions import * +from .already_installed_functions import * diff --git a/backend/app/db/functions/already_installed_functions.py b/backend/app/db/functions/already_installed_functions.py new file mode 100644 index 00000000..351419b0 --- /dev/null +++ b/backend/app/db/functions/already_installed_functions.py @@ -0,0 +1,40 @@ +from backend.app.db.models.recommendations import InstalledMeasure +from typing import Dict, List, Set +from collections import defaultdict + + +def get_installed_measure_types_by_uprns( + session, + uprns: List[int], +) -> Dict[int, Set[str]]: + """ + Returns installed measure types per UPRN. + + { + uprn: {"cavity_wall_insulation", "mechanical_ventilation", ...} + } + """ + + if not uprns: + return {} + + rows = ( + session.query( + InstalledMeasure.uprn, + InstalledMeasure.measure_type, + ) + .filter(InstalledMeasure.is_active.is_(True)) + .filter(InstalledMeasure.uprn.in_(uprns)) + .all() + ) + + out: Dict[int, Set[str]] = defaultdict(set) + + for uprn, measure_type in rows: + out[uprn].add( + measure_type.value + if hasattr(measure_type, "value") + else measure_type + ) + + return out diff --git a/backend/app/db/functions/recommendations_functions.py b/backend/app/db/functions/recommendations_functions.py index ae178c8a..4fdd9324 100644 --- a/backend/app/db/functions/recommendations_functions.py +++ b/backend/app/db/functions/recommendations_functions.py @@ -27,7 +27,8 @@ def prepare_plan_data( """ # Plan carbon savings co2_savings = sum([r["co2_equivalent_savings"] for r in default_recommendations]) - post_co2_emissions = p.data["co2-emissions-current"] - co2_savings + raise Exception("CHECK ME") + post_co2_emissions = p.energy["co2_emissions"] - co2_savings # Plan bill savings energy_bill_savings = sum([r["energy_cost_savings"] for r in default_recommendations]) diff --git a/backend/app/plan/data_classes.py b/backend/app/plan/data_classes.py index cec5ed11..99f6156b 100644 --- a/backend/app/plan/data_classes.py +++ b/backend/app/plan/data_classes.py @@ -5,6 +5,5 @@ from typing import Any, Optional @dataclass class PropertyRequestData: patch: dict - already_installed: list non_invasive_recommendations: dict valuation: Optional[float] diff --git a/backend/app/plan/utils.py b/backend/app/plan/utils.py index 52e2b0c4..33f391d4 100644 --- a/backend/app/plan/utils.py +++ b/backend/app/plan/utils.py @@ -52,7 +52,7 @@ def patch_epc(patch, epc_records): def extract_property_request_data( - address: Address, patches, already_installed, non_invasive_recommendations, valuation_data, uprn + address: Address, patches, non_invasive_recommendations, valuation_data, uprn ): patch_has_uprn = "uprn" in patches[0] if patches else True if patch_has_uprn: @@ -64,10 +64,6 @@ def extract_property_request_data( x for x in patches if (x["address"] == address.address) and (x["postcode"] == address.postcode) ), {}) - property_already_installed = next(( - x for x in already_installed if (x["address"] == address.address) and (x["postcode"] == address.postcode) - ), []) - # Because we have some non-invasive recommendations that match on address and postcode, but not UPRN # we need to check existence of uprn has_uprn = "uprn" in non_invasive_recommendations[0] if non_invasive_recommendations else False @@ -119,7 +115,6 @@ def extract_property_request_data( # Return data class to give a structured format return PropertyRequestData( patch=patch, - already_installed=property_already_installed, non_invasive_recommendations=property_non_invasive_recommendations, valuation=property_valuation ) diff --git a/backend/engine/engine.py b/backend/engine/engine.py index 4f5ee3c1..f4e3ad3f 100644 --- a/backend/engine/engine.py +++ b/backend/engine/engine.py @@ -684,6 +684,9 @@ async def model_engine(body: PlanTriggerRequest): energy_assessments_by_uprn = db_funcs.energy_assessment_functions.get_latest_assessments_for_uprns( session, uprns ) + already_installed_by_uprn = db_funcs.already_installed_functions.get_installed_measure_types_by_uprns( + session, uprns + ) # If we have properties that need to be created, we cerate them in bulk logger.info("Determine new properties to be created") @@ -703,7 +706,7 @@ async def model_engine(body: PlanTriggerRequest): property_lookup[("uprn", uprn)] = prop_id if landlord_property_id: property_lookup[("landlord_property_id", landlord_property_id)] = prop_id - + logger.info("Processing each property for model input preparation") input_properties, inspections_map, eco_packages, epc_upserts = [], {}, {}, [] for addr, config in tqdm( @@ -725,6 +728,8 @@ async def model_engine(body: PlanTriggerRequest): energy_assessment = energy_assessments_by_uprn.get(addr.uprn) + property_already_installed = list(already_installed_by_uprn[addr.uprn]) + epc_searcher = SearchEpc( address1=addr.address1, postcode=addr.postcode, @@ -767,7 +772,6 @@ async def model_engine(body: PlanTriggerRequest): req_data = extract_property_request_data( address=addr, patches=patches, - already_installed=already_installed, non_invasive_recommendations=non_invasive_recommendations, valuation_data=valuation_data, uprn=addr.uprn, @@ -813,7 +817,7 @@ async def model_engine(body: PlanTriggerRequest): address=epc_searcher.address_clean, postcode=epc_searcher.postcode_clean, epc_record=prepared_epc, - already_installed=req_data.already_installed + eco_packages.get(property_id)[3], + already_installed=property_already_installed + eco_packages.get(property_id)[3], property_valuation=req_data.valuation, non_invasive_recommendations=property_non_invasive_recommendations, energy_assessment=energy_assessment, @@ -965,6 +969,8 @@ async def model_engine(body: PlanTriggerRequest): # Temp putting this here recommendations_scoring_data["is_post_sap10_ending"] = True + recommendations_scoring_data["sap_starting"] = 77 + recommendations_scoring_data = recommendations_scoring_data.drop( columns=["rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending", "carbon_ending"] @@ -1189,7 +1195,8 @@ async def model_engine(body: PlanTriggerRequest): property_updates, property_epc_details, property_spatial_updates = [], [], [] plans_to_create, recommendations_to_create = [], [] - + # TODO: Check the update to carbon + print("NEED TO CHECK THE UPDATE TO CARBON") # Prepare the data that will need to be uploaded in bulk for p in input_properties: recommendations_for_property = recommendations.get(p.id, []) diff --git a/backend/onboarders/mappings/age_band.py b/backend/onboarders/mappings/age_band.py new file mode 100644 index 00000000..2487c921 --- /dev/null +++ b/backend/onboarders/mappings/age_band.py @@ -0,0 +1,14 @@ +party_map = { + "Before 1900": 'England and Wales: before 1900', + "1900-1929": 'England and Wales: 1900-1929', + "1930-1949": 'England and Wales: 1930-1949', + "1950-1966": 'England and Wales: 1950-1966', + "1967-1975": 'England and Wales: 1967-1975', + "1976-1982": 'England and Wales: 1976-1982', + "1983-1990": 'England and Wales: 1983-1990', + "1991-1995": 'England and Wales: 1991-1995', + "1996-2002": 'England and Wales: 1996-2002', + "2003-2006": 'England and Wales: 2003-2006', + "2007-2011": 'England and Wales: 2007-2011', + "2012 onwards": 'England and Wales: 2012-2021', +} diff --git a/backend/onboarders/mappings/built_form.py b/backend/onboarders/mappings/built_form.py new file mode 100644 index 00000000..23901fc6 --- /dev/null +++ b/backend/onboarders/mappings/built_form.py @@ -0,0 +1,15 @@ +parity_map = { + "MidTerrace": "Mid-Terrace", + "EndTerrace": "End-Terrace", + "Detached": "Detached", + "SemiDetached": "Semi-Detached", + "EnclosedMidTerrace": "Enclosed Mid-Terrace", + "EnclosedEndTerrace": "Enclosed End-Terrace", +} + +# MidTerrace 41462 +# EndTerrace 20910 +# Detached 16875 +# SemiDetached 14725 +# EnclosedMidTerrace 3176 +# EnclosedEndTerrace 2393 diff --git a/backend/onboarders/mappings/property_type.py b/backend/onboarders/mappings/property_type.py new file mode 100644 index 00000000..75deef04 --- /dev/null +++ b/backend/onboarders/mappings/property_type.py @@ -0,0 +1,6 @@ +parity_map = { + "Flat": "Flat", + "Maisonette": "Maisonette", + "Bungalow": "Bungalow", + "House": "House", +} diff --git a/backend/onboarders/mappings/walls.py b/backend/onboarders/mappings/walls.py new file mode 100644 index 00000000..9b70b49c --- /dev/null +++ b/backend/onboarders/mappings/walls.py @@ -0,0 +1,3 @@ +parity_map = { + +} diff --git a/backend/onboarders/parity.py b/backend/onboarders/parity.py new file mode 100644 index 00000000..f41ebeaf --- /dev/null +++ b/backend/onboarders/parity.py @@ -0,0 +1,95 @@ +import pandas as pd +from etl.epc.DataProcessor import construction_age_bounds_map +from backend.onboarders.mappings.property_type import parity_map as property_map +from backend.onboarders.mappings.age_band import party_map as age_band_map +from backend.onboarders.mappings.built_form import parity_map as built_form_map + + +def check_nulls(data, original_column, mapped_column): + # We only allow nulls if the oroginal value was null + null_vals = data[pd.isnull(data[mapped_column])] + if null_vals.empty: + return True + # We make sure all original values were null + assert pd.isnull(null_vals[original_column]).all(), ( + f"Some values in {mapped_column} were not mapped, but original values were not null" + ) + + +# Sample input data + +data = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody " + "- Data Extracts for Domna.xlsx", + sheet_name="Sustainability" +) + +# We want to map the parity fields to standard EPC references. This will allow us to +# 1) Estimate EPCs, more accurately +# 2) Patch incorrect EPCs with ease +# 3) Indicate already installed measures + +# ------------ construction_age_band ------------ +# Map to EPC age bands +# def construction_date_to_band(year): +# if pd.isnull(year): +# return None +# # Get the year from the date which is numpy datetime format +# for label, ranges in construction_age_bounds_map.items(): +# if ranges["l"] <= year <= ranges["u"]: +# return label +# raise NotImplementedError("year out of bounds") +# +# +# data["construction_age_band"] = pd.to_datetime(data["Construction Date"]).dt.year.apply(construction_date_to_band) + +data["construction_age_band"] = data["Construction Years"].map(age_band_map) + +check_nulls(data, "Construction Years", "construction_age_band") + +# ------------ property_type ------------ +data["property_type"] = data["Type"].map(property_map) + +assert pd.isnull(data["property_type"]).sum() == 0, "Some property types were not mapped" + +# ------------ built_form ------------ +data["built_form"] = data["Attachment"].map(built_form_map) + +assert pd.isnull(data["built_form"]).sum() == 0, "Some built forms were not mapped" + +# ------------ Wall Construction ------------ + +data["walls_combined"] = data["Wall Construction"] + "+" + data["Wall Insulation"].fillna("Unknown Insulation") + +data["Wall Insulation"].value_counts() +data["Wall Construction"].value_counts() + +as_built_map = { + "Cavity": {"insulated_age_bands":[], "partial_insulated_age_bands": []}, + "Solid Brick": {"insulated_age_bands": [], "partial_insulated_age_bands": []}, + "System": {"insulated_age_bands": [], "partial_insulated_age_bands": []}, + "Timber Frame": {"insulated_age_bands": [], "partial_insulated_age_bands": []}, + "Sandstone": {"insulated_age_bands": [], "partial_insulated_age_bands": []}, + "Granite": {"insulated_age_bands": [], "partial_insulated_age_bands": []}, + "Cob": {"insulated_age_bands": [], "partial_insulated_age_bands": []}, +} + +def map_wall_construction(wall_constuction, wall_insulation, construction_age_band): + if wall_insulation == "AsBuilt": + # Deduce based on wall construction and age band + bands = as_built_map.get(wall_constuction, None) + if bands is None: + raise NotImplementedError(f"Wall construction {wall_constuction} not in as built map") + + # We check if the age band is in insulated or partial insulated, and if neither, we assume uninsulated + + + + +# Variables we want to map +'Org Ref', 'Address 1', 'Address 2', 'Address 3', 'Postcode', 'Type', + 'Attachment', 'Construction Years', 'Wall Construction', + 'Wall Insulation', 'Roof Construction', 'Roof Insulation', + 'Floor Construction', 'Floor Insulation', 'Glazing', 'Heating', + 'Boiler Efficiency', 'Main Fuel', 'Controls Adequacy', 'UPRN', + 'Total Floor Area (m2)' \ No newline at end of file diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/g_rebaselining_installed_measrues.py b/etl/customers/peabody/Nov 2025 Consulting Project/g_rebaselining_installed_measrues.py index d310ffa4..8d4bc9da 100644 --- a/etl/customers/peabody/Nov 2025 Consulting Project/g_rebaselining_installed_measrues.py +++ b/etl/customers/peabody/Nov 2025 Consulting Project/g_rebaselining_installed_measrues.py @@ -1,10 +1,10 @@ import pandas as pd +from tqdm import tqdm from sqlalchemy.orm import sessionmaker from backend.app.db.connection import db_engine, db_read_session, db_session from backend.app.db.models.recommendations import Recommendation, Plan, PlanRecommendations, RecommendationMaterials, \ InstalledMeasure from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel -from sqlalchemy import func from backend.app.utils import sap_to_epc from typing import Dict, List, Set from recommendations.Costs import Costs @@ -87,14 +87,9 @@ def get_all_data(portfolio_id, scenario_ids): return properties_data, plans_data, recommendations_data -PORTFOLIO_ID = 419 # Peabody +PORTFOLIO_ID = 431 # Peabody - new portfolio SCENARIOS = [ - # 871, # EPC C - fabric first, no solid floor, ashp 3.0 - # 863, # EPC B, No EWI/IWI, No Solid Floor, ASHP 3.0 COP - # 862, # EPC B - No solid floor, ASHP COP 3.0 - # 861, # EPC C, No EWI/IWI, No Solid Floor, ASHP 3.0 COP - # 859, # EPC C - no solid floor, ashp 3.0 - 885, # EPC B - fabric first, no solid floor, ashp 3.0 + 891, # EPC B - No solid floor, ASHP COP 3.0 ] # properties_data, plans_data, recommendations_data = get_all_data(portfolio_id=PORTFOLIO_ID, scenario_ids=SCENARIOS) @@ -106,31 +101,31 @@ SCENARIOS = [ # Save CSVs # properties_df.to_csv( # "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/" -# "f_peabody_properties_data_20260108.csv", +# "Final portfolio datasets/v2/peabody_properties_data_20260108.csv", # index=False # ) # plans_df.to_csv( # "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/" -# "f_peabody_plans_data_20260108.csv", +# "Final portfolio datasets/v2/peabody_plans_data_20260108.csv", # index=False # ) # recommendations_df.to_csv( # "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/" -# "f_peabody_recommendations_data_20260108.csv", +# "Final portfolio datasets/v2/peabody_recommendations_data_20260108.csv", # index=False # ) # Read csvs properties_df = pd.read_csv( - "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/" - "f_peabody_properties_data_20260108.csv" + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting " + "Project/Final portfolio datasets/v2/peabody_properties_data_20260108.csv" ) plans_df = pd.read_csv( - "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/" - "f_peabody_plans_data_20260108.csv" + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final portfolio " + "datasets/v2/peabody_plans_data_20260108.csv" ) recommendations_df = pd.read_csv( - "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/" - "f_peabody_recommendations_data_20260108.csv" + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final portfolio " + "datasets/v2/peabody_recommendations_data_20260108.csv" ) sustainability_data = pd.read_excel( @@ -138,11 +133,29 @@ sustainability_data = pd.read_excel( "- Data Extracts for Domna.xlsx", sheet_name="Sustainability" ) +sustainability_data_with_sap = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/Parity Data " + "08012026.xlsx", +) -# recommendations_df = pd.read_excel( -# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/EPC B, " -# "No solid floor, ASHP COP 3.0.xlsx" -# ) +properties_df["uprn"] = properties_df["uprn"].astype(str) +property_data_comparison = properties_df.merge( + sustainability_data, how="inner", left_on="uprn", right_on="UPRN", suffixes=("_prop", "_sust") +) + +property_data_comparison["wall_type"] = property_data_comparison["walls"].str.split(",").str[0].str.strip() + +column_pairs = { + "built_form": "Attachment", + "property_type": "Type", + "wall_type": "Wall Construction", + "heating": "Heating", +} +combination_tables = {} + +for v1, v2 in column_pairs.items(): + df = property_data_comparison.groupby([v1, v2]).size().reset_index(name='count') + combination_tables[v1] = df # We just need all of the measure types, per property recommendation_measure_types = recommendations_df[ @@ -177,7 +190,7 @@ sustainability_data["external_wall_insulation"] = sustainability_data["Wall Insu ["External", "FilledCavityPlusExternal"] ) sustainability_data["loft_insulation"] = sustainability_data["Roof Insulation"].isin( - ["mm300", "mm250"] + ["mm300", "mm250", "mm350", "mm400", "mm270"] ) sustainability_data["double_glazing"] = sustainability_data["Glazing"].isin( ["Double 2002 or later", "Double but age unknown", "Triple", "DoubleKnownData", "Secondary", "TripleKnownData"] @@ -223,86 +236,81 @@ comparison = sustainability_data.merge( # Flag entries where we've been told that walls are already insulated, but we have recommendations for wall insulation # ------------ Walls ------------ -comparison["conflict_cavity_wall_insulation"] = ( +cwi_conflicting = comparison[ (comparison["cavity_wall_insulation"]) & (pd.isnull(comparison["cavity_wall_insulation_from_recs"]) == False) -) -comparison["conflict_iwi_wall_insulation"] = ( + ].copy() +cwi_conflicting["conflict_cavity_wall_insulation"] = True +iwi_conflicting = comparison[ (comparison["internal_wall_insulation"]) & (pd.isnull(comparison["internal_wall_insulation_from_recs"]) == False) -) -comparison["conflict_ewi_wall_insulation"] = ( + ].copy() +iwi_conflicting["conflict_iwi_wall_insulation"] = True + +ewi_conflicting = comparison[ (comparison["external_wall_insulation"]) & (pd.isnull(comparison["external_wall_insulation_from_recs"]) == False) -) - -cwi_conflicting = comparison[comparison["conflict_cavity_wall_insulation"] == True] -iwi_conflicting = comparison[comparison["conflict_iwi_wall_insulation"] == True] -ewi_conflicting = comparison[comparison["conflict_ewi_wall_insulation"] == True] + ].copy() +ewi_conflicting["conflict_ewi_wall_insulation"] = True # ------------ Roof ------------ -comparison["conflict_loft_insulation"] = ( +loft_conflicting = comparison[ (comparison["loft_insulation"]) & (pd.isnull(comparison["loft_insulation_from_recs"]) == False) -) - -loft_conflicting = comparison[comparison["conflict_loft_insulation"] == True] + ].copy() +loft_conflicting["conflict_loft_insulation"] = True # ------------ Windows ------------ -comparison["conflict_double_glazing"] = ( +double_glazing_conflicting = comparison[ (comparison["double_glazing"]) & - ( - (pd.isnull(comparison["double_glazing_from_recs"]) == False) - ) -) -comparison["conflict_secondary_glazing"] = ( + (pd.isnull(comparison["double_glazing_from_recs"]) == False) + ].copy() +double_glazing_conflicting["conflict_double_glazing"] = True +secondary_glazing_conflicting = comparison[ (comparison["secondary_glazing"]) & - ( - (pd.isnull(comparison["secondary_glazing_from_recs"]) == False) - ) -) -double_glazing_conflicting = comparison[comparison["conflict_double_glazing"] == True] -secondary_glazing_conflicting = comparison[comparison["conflict_secondary_glazing"] == True] + (pd.isnull(comparison["secondary_glazing_from_recs"]) == False) + ].copy() +secondary_glazing_conflicting["conflict_secondary_glazing"] = True # ------------ Floors ------------ -comparison["conflict_suspended_floor_insulation"] = ( +floors_conflicting = comparison[ (comparison["suspended_floor_insulation"]) & (pd.isnull(comparison["suspended_floor_insulation_from_recs"]) == False) -) -floors_conflicting = comparison[comparison["conflict_suspended_floor_insulation"] == True] + ].copy() +floors_conflicting["conflict_suspended_floor_insulation"] = True # ------------ Boiler Upgrade ------------ -comparison["conflict_boiler_upgrade"] = ( +boiler_conflicting = comparison[ (comparison["boiler_upgrade"]) & (pd.isnull(comparison["boiler_upgrade_from_recs"]) == False) -) -boiler_conflicting = comparison[comparison["conflict_boiler_upgrade"] == True] + ].copy() +boiler_conflicting["conflict_boiler_upgrade"] = True # ------------ ASHP ------------ -comparison["conflict_air_source_heat_pump"] = ( +ashp_conflicting = comparison[ (comparison["air_source_heat_pump"]) & (pd.isnull(comparison["air_source_heat_pump_from_recs"]) == False) -) -ashp_conflicting = comparison[comparison["conflict_air_source_heat_pump"] == True] + ].copy() +ashp_conflicting["conflict_air_source_heat_pump"] = True # ------------ heat controls ------------ -comparison["conflict_time_temperature_zone_control"] = ( +ttzc_conflicting = comparison[ (comparison["time_temperature_zone_control"]) & (pd.isnull(comparison["time_temperature_zone_control_from_recs"]) == False) -) -comparison["conflict_roomstat_programmer_trvs"] = ( + ].copy() +ttzc_conflicting["conflict_time_temperature_zone_control"] = True +rst_conflicting = comparison[ (comparison["roomstat_programmer_trvs"]) & (pd.isnull(comparison["roomstat_programmer_trvs_from_recs"]) == False) -) -ttzc_conflicting = comparison[comparison["conflict_time_temperature_zone_control"] == True] -rst_conflicting = comparison[comparison["conflict_roomstat_programmer_trvs"] == True] + ].copy() +rst_conflicting["conflict_roomstat_programmer_trvs"] = True # ------------ Flat Roof Insulation ----------- -comparison["conflict_flat_roof_insulation"] = ( +flat_roof_conflicting = comparison[ (comparison["flat_roof_insulation"]) & (pd.isnull(comparison["flat_roof_insulation_from_recs"]) == False) -) -flat_roof_conflicting = comparison[comparison["conflict_flat_roof_insulation"] == True] + ].copy() +flat_roof_conflicting["conflict_flat_roof_insulation"] = True # All properties with conflicts all_conflicts = pd.concat( @@ -389,6 +397,61 @@ installed_measures_df = all_conflicts.merge( installed_measures_df = installed_measures_df[installed_measures_df["already_installed"] == True] +## --- Sense checking ---- + +FABRIC_MEASURES = { + "external_wall_insulation", + "internal_wall_insulation", + "cavity_wall_insulation", +} + + +def add_mechanical_ventilation_for_fabric(installed_measures_df, recs_with_uprn): + """ + If a property has fabric insulation installed, also mark + mechanical ventilation as installed using recommendation metrics. + """ + + # Properties with fabric installed + fabric_uprns = installed_measures_df[ + installed_measures_df["measure_type"].isin(FABRIC_MEASURES) + ]["uprn"].unique() + + # Get MV recommendation metrics (pick max SAP per property as you decided) + mv_recs = ( + recs_with_uprn[ + (recs_with_uprn["measure_type"] == "mechanical_ventilation") + & (recs_with_uprn["uprn"].isin(fabric_uprns)) + ] + .sort_values("sap_points", ascending=False) + .drop_duplicates(subset=["uprn"]) + ) + + mv_installed = mv_recs[[ + "uprn", + "measure_type", + "sap_points", + "heat_demand", + "kwh_savings", + "co2_equivalent_savings", + "energy_cost_savings", + ]].copy() + + mv_installed["already_installed"] = True + + return pd.concat( + [installed_measures_df, mv_installed], + ignore_index=True + ) + + +installed_measures_df = add_mechanical_ventilation_for_fabric( + installed_measures_df, + recs_with_uprn +) + +assert installed_measures_df[["uprn", "measure_type"]].duplicated().sum() == 0 + for col in ["sap_points", "heat_demand", "kwh_savings", "co2_equivalent_savings", "energy_cost_savings"]: print(f"n missings for {col}: {pd.isnull(installed_measures_df[col]).sum()}", ) @@ -401,21 +464,12 @@ old_sap_vs_new = properties_sap.merge( sap_impact, how="inner", on="uprn" ) old_sap_vs_new["new_sap_points"] = old_sap_vs_new["current_sap_points"] + old_sap_vs_new["sap_points"] -old_sap_vs_new["new_epc_rating"] = old_sap_vs_new["new_sap_points"].apply( - lambda x: sap_to_epc(x) -) +old_sap_vs_new["new_epc_rating"] = old_sap_vs_new["new_sap_points"].apply(lambda x: sap_to_epc(x)) # How many properties go from below C to above old_sap_vs_new[old_sap_vs_new["current_sap_points"] < 69]["new_epc_rating"].value_counts() changed = old_sap_vs_new[ (old_sap_vs_new["current_sap_points"] < 69) & (old_sap_vs_new["new_sap_points"] >= 69) ] -properties_df[properties_df["current_sap_points"] < 69].shape - -old_sap_vs_new[old_sap_vs_new["current_epc_rating"].isin(["Epc.F", "Epc.G"])] - -25979 - 3891 - -sustainability_data[sustainability_data["UPRN"] == "100021204260"] # What do I need to do: # TODO: - need to get a view of "all" measures for the property, not just recommended. We can do this but just looking @@ -426,32 +480,92 @@ sustainability_data[sustainability_data["UPRN"] == "100021204260"] # 3) For anything already installed, I should mark already installed as True, and set the cost to zero # 4) I need to update the plan cost to remove the cost of the installed measures +# TODO: +# 1) Need to push the already installed measures to the database +from sqlalchemy.orm import sessionmaker +from datetime import datetime + +BATCH_SIZE = 5000 +SOURCE = "peabody_import_2026_01" + +Session = sessionmaker(bind=db_engine) + + +def bulk_insert_installed_measures(installed_measures_df): + session = Session() + + records = [] + now = datetime.utcnow() + + for _, row in installed_measures_df.iterrows(): + records.append({ + "uprn": int(row["uprn"]), + "measure_type": row["measure_type"], + "installed_at": now, + "sap_points": float(row["sap_points"]) if pd.notna(row["sap_points"]) else None, + "carbon_savings": float(row["co2_equivalent_savings"]) if pd.notna(row["co2_equivalent_savings"]) else None, + "kwh_savings": float(row["kwh_savings"]) if pd.notna(row["kwh_savings"]) else None, + "bill_savings": float(row["energy_cost_savings"]) if pd.notna(row["energy_cost_savings"]) else None, + "heat_demand_savings": float(row["heat_demand"]) if pd.notna(row["heat_demand"]) else None, + "source": SOURCE, + "is_active": True, + }) + + try: + for i in range(0, len(records), BATCH_SIZE): + batch = records[i:i + BATCH_SIZE] + session.bulk_insert_mappings(InstalledMeasure, batch) + session.commit() + print(f"✅ Inserted {i + len(batch)} / {len(records)}") + + except Exception: + session.rollback() + raise + finally: + session.close() + + +# bulk_insert_installed_measures(installed_measures_df) ### Rebaselining - - from typing import Dict from sqlalchemy import func +from typing import Dict +from sqlalchemy import func, case + +REBASING_EXCLUDED_MEASURES = { + "mechanical_ventilation", +} + def get_installed_measure_adjustments_by_uprn_for_portfolio( session, portfolio_id: int, ) -> Dict[int, dict]: """ - Returns per-UPRN installed-measure adjustments. + Returns per-UPRN installed-measure adjustments for PROPERTY / EPC rebasing. + IMPORTANT: + - Mechanical ventilation is EXCLUDED from rebasing calculations + (drag-along measure; should not alter baseline EPC/SAP). + - All other installed measures are fully applied. + + Output shape: { uprn: { - sap_points: float, - co2: float, - energy_kwh: float, - energy_bill: float, - heat_demand: float, + "sap_points": float, + "co2": float, + "energy_kwh": float, + "energy_bill": float, + "heat_demand": float, } } """ + # -------------------------------------------- + # Limit to UPRNs that belong to this portfolio + # -------------------------------------------- uprn_subquery = ( session.query(PropertyModel.uprn) .filter(PropertyModel.portfolio_id == portfolio_id) @@ -459,24 +573,55 @@ def get_installed_measure_adjustments_by_uprn_for_portfolio( .subquery() ) + # -------------------------------------------- + # CASE helper: exclude ventilation from rebasing + # -------------------------------------------- + def exclude_ventilation(column): + return case( + ( + InstalledMeasure.measure_type.notin_( + REBASING_EXCLUDED_MEASURES + ), + column, + ), + else_=0.0, + ) + + # -------------------------------------------- + # Aggregate installed-measure impacts per UPRN + # -------------------------------------------- rows = ( session.query( InstalledMeasure.uprn.label("uprn"), - func.coalesce(func.sum(InstalledMeasure.sap_points), 0.0) - .label("sap_points"), + func.coalesce( + func.sum(exclude_ventilation(InstalledMeasure.sap_points)), + 0.0, + ).label("sap_points"), - func.coalesce(func.sum(InstalledMeasure.carbon_savings), 0.0) - .label("co2"), + func.coalesce( + func.sum(exclude_ventilation(InstalledMeasure.carbon_savings)), + 0.0, + ).label("co2"), - func.coalesce(func.sum(InstalledMeasure.kwh_savings), 0.0) - .label("energy_kwh"), + func.coalesce( + func.sum(exclude_ventilation(InstalledMeasure.kwh_savings)), + 0.0, + ).label("energy_kwh"), - func.coalesce(func.sum(InstalledMeasure.bill_savings), 0.0) - .label("energy_bill"), + func.coalesce( + func.sum(exclude_ventilation(InstalledMeasure.bill_savings)), + 0.0, + ).label("energy_bill"), - func.coalesce(func.sum(InstalledMeasure.heat_demand_savings), 0.0) - .label("heat_demand"), + func.coalesce( + func.sum( + exclude_ventilation( + InstalledMeasure.heat_demand_savings + ) + ), + 0.0, + ).label("heat_demand"), ) .filter(InstalledMeasure.is_active.is_(True)) .filter(InstalledMeasure.uprn.in_(uprn_subquery)) @@ -484,6 +629,9 @@ def get_installed_measure_adjustments_by_uprn_for_portfolio( .all() ) + # -------------------------------------------- + # Shape result for downstream consumers + # -------------------------------------------- return { row.uprn: { "sap_points": float(row.sap_points), @@ -520,20 +668,27 @@ def get_installed_measure_types_by_uprn( def compute_property_sap_updates( properties: List[PropertyModel], - sap_adjustments: Dict[int, float], + sap_adjustments: Dict[int, float], # keyed by uprn ) -> List[dict]: """ Returns property SAP rebasing results. - Does NOT mutate DB objects. + ONLY returns rows where installed measures exist. """ updates = [] for prop in properties: - if prop.uprn is None or prop.original_sap_points is None: + if prop.uprn is None: continue - sap_delta = sap_adjustments.get(prop.uprn, 0.0) + # 🚨 gatekeeper + if prop.uprn not in sap_adjustments: + continue + + if prop.original_sap_points is None: + continue + + sap_delta = sap_adjustments[prop.uprn] new_sap = prop.original_sap_points + sap_delta updates.append({ @@ -542,7 +697,7 @@ def compute_property_sap_updates( "original_sap_points": prop.original_sap_points, "installed_sap_delta": sap_delta, "new_sap_points": new_sap, - "is_adjusted": sap_delta != 0, + "is_adjusted": True, }) return updates @@ -553,17 +708,13 @@ def compute_property_sap_updates( # ------------------------------------------------------------ def get_effective_plan_recommendations( - session, - plan_id: int, - excluded_measure_types: Set[str], + session, plan_id: int, excluded_measure_types: Set[str] ) -> List[Recommendation]: q = ( session.query(Recommendation) .join(PlanRecommendations) .filter(PlanRecommendations.plan_id == plan_id) - .filter(Recommendation.default.is_(True)) - ) - + .filter(Recommendation.default.is_(True))) if excluded_measure_types: q = q.filter( ~Recommendation.measure_type.in_(excluded_measure_types) @@ -587,7 +738,7 @@ def aggregate_plan_metrics(recommendations: list[Recommendation]): agg["sap_points"] += r.sap_points or 0.0 agg["co2_savings"] += r.co2_equivalent_savings or 0.0 agg["energy_bill_savings"] += r.energy_cost_savings or 0.0 - agg["energy_consumption_savings"] += r.energy_savings or 0.0 + agg["energy_consumption_savings"] += r.kwh_savings or 0.0 agg["valuation_increase"] += r.property_valuation_increase or 0.0 base_cost = r.estimated_cost or 0.0 @@ -601,49 +752,173 @@ def aggregate_plan_metrics(recommendations: list[Recommendation]): # PLAN REBASING (READ-ONLY) # ------------------------------------------------------------ +# session,plans,properties_by_id,epcs_by_property_id, property_sap_updates = session, plans, properties_by_id, epcs, +# property_updates_by_id + +from collections import defaultdict + + +def get_installed_measure_types_by_property_id_for_portfolio( + session, + portfolio_id: int, +) -> dict[int, set[str]]: + """ + Returns: + { property_id: {measure_type, ...} } + + Includes drag-along measures (e.g. ventilation). + """ + + rows = ( + session.query( + PropertyModel.id.label("property_id"), + InstalledMeasure.measure_type, + ) + .join( + InstalledMeasure, + InstalledMeasure.uprn == PropertyModel.uprn, + ) + .filter(PropertyModel.portfolio_id == portfolio_id) + .filter(InstalledMeasure.is_active.is_(True)) + .all() + ) + + installed_by_property: dict[int, set[str]] = defaultdict(set) + + for property_id, measure_type in rows: + mt = measure_type.value if hasattr(measure_type, "value") else measure_type + installed_by_property[property_id].add(mt) + + # drag-along rules + if mt in {"cavity_wall_insulation", "internal_wall_insulation", "external_wall_insulation"}: + installed_by_property[property_id].add("mechanical_ventilation") + + return installed_by_property + + +def get_all_default_plan_recommendations( + session, + plan_ids: list[int], +) -> dict[int, list[Recommendation]]: + """ + Returns {plan_id: [Recommendation, ...]} for ALL plans in one query. + """ + + rows = ( + session.query( + PlanRecommendations.plan_id, + Recommendation, + ) + .join(Recommendation, Recommendation.id == PlanRecommendations.recommendation_id) + .filter(PlanRecommendations.plan_id.in_(plan_ids)) + .filter(Recommendation.default.is_(True)) + .all() + ) + + by_plan: dict[int, list[Recommendation]] = {} + + for plan_id, rec in rows: + by_plan.setdefault(plan_id, []).append(rec) + + return by_plan + + +def filter_remaining_recommendations( + recommendations: list[Recommendation], + installed_types: set[str], +) -> list[Recommendation]: + """ + Removes recommendations whose measure_type is already installed. + """ + if not installed_types: + return recommendations + + return [ + r for r in recommendations + if ( + (r.measure_type.value if hasattr(r.measure_type, "value") else r.measure_type) + not in installed_types + ) + ] + + def compute_plan_updates( session, plans: List[Plan], properties_by_id: Dict[int, PropertyModel], epcs_by_property_id: Dict[int, PropertyDetailsEpcModel], - property_sap_updates: Dict[int, dict], + installed_types_by_property_id, + all_ventilation_measures ) -> List[dict]: """ - Computes plan metrics assuming properties are already rebased. + Computes plan metrics after marking some recommendations as already installed. + + Rules: + - Baseline post metrics remain unchanged + - Savings + costs are recomputed excluding installed measures + - ONLY mechanical ventilation alters post metrics """ - updates = [] + all_plan_recs = get_all_default_plan_recommendations( + session, + [p.id for p in plans], + ) - for plan in plans: + updates = [] + property_to_installed_types = {} + for plan in tqdm(plans, total=len(plans)): prop = properties_by_id.get(plan.property_id) epc = epcs_by_property_id.get(plan.property_id) - prop_update = property_sap_updates.get(plan.property_id) - if not prop or not epc or not prop_update: + if not prop or not epc: continue - installed_types = get_installed_measure_types_by_uprn( - session, prop.uprn - ) + installed_types = installed_types_by_property_id.get(prop.id, set()) - future_recs = get_effective_plan_recommendations( - session, - plan.id, + if not installed_types: + continue + + plan_recs = all_plan_recs.get(plan.id, []) + + remaining_recs = filter_remaining_recommendations( + plan_recs, installed_types, ) - metrics = aggregate_plan_metrics(future_recs) + remaining = aggregate_plan_metrics(remaining_recs) - baseline_bill = ( - epc.heating_cost_current - + epc.hot_water_cost_current - + epc.lighting_cost_current - + epc.appliances_cost_current - + epc.gas_standing_charge - + epc.electricity_standing_charge - ) + # Detect ventilation removal + ventilation_removed = "mechanical_ventilation" in installed_types - post_sap = prop_update["new_sap_points"] + metrics["sap_points"] + # ------------------------------- + # Start from the previous plan + # ------------------------------- + post_sap = plan.post_sap_points + post_co2 = plan.post_co2_emissions + post_bill = plan.post_energy_bill + post_kwh = plan.post_energy_consumption + + # ------------------------------- + # Undo ventilation ONLY + # ------------------------------- + ventilation_impact = all_ventilation_measures.get(prop.uprn, None) + + if ventilation_removed and ventilation_impact is not None: + # ventilation impact = baseline - remaining + + post_sap -= ventilation_impact["sap_points"] + + post_co2 += ventilation_impact["co2"] # We save more with ventilation + + post_bill += ventilation_impact["energy_bill"] + post_kwh += ventilation_impact["energy_kwh"] + + # # Skip if nothing changes at all + # if ( + # remaining["cost_of_works"] == baseline["cost_of_works"] + # and not ventilation_removed + # ): + # continue updates.append({ "plan_id": plan.id, @@ -654,40 +929,52 @@ def compute_plan_updates( "post_epc_rating": sap_to_epc(post_sap), # Carbon - "co2_savings": metrics["co2_savings"], - "post_co2_emissions": ( - epc.co2_emissions - metrics["co2_savings"] - if epc.co2_emissions is not None - else None - ), + "co2_savings": remaining["co2_savings"], + "post_co2_emissions": post_co2, # Energy bills - "energy_bill_savings": metrics["energy_bill_savings"], - "post_energy_bill": baseline_bill - metrics["energy_bill_savings"], + "energy_bill_savings": remaining["energy_bill_savings"], + "post_energy_bill": post_bill, # Energy consumption - "energy_consumption_savings": metrics["energy_consumption_savings"], - "post_energy_consumption": ( - epc.primary_energy_consumption - - metrics["energy_consumption_savings"] - ), + "energy_consumption_savings": remaining["energy_consumption_savings"], + "post_energy_consumption": post_kwh, - # Valuation - "valuation_increase": metrics["valuation_increase"], + # Valuation (safe) + "valuation_increase": remaining["valuation_increase"], "valuation_post_retrofit": ( - prop.current_valuation + metrics["valuation_increase"] + prop.current_valuation + + remaining["valuation_increase"] if prop.current_valuation is not None else None ), # Costs - "cost_of_works": metrics["cost_of_works"], - "contingency_cost": metrics["contingency_cost"], + "cost_of_works": remaining["cost_of_works"], + "contingency_cost": remaining["contingency_cost"], }) + property_to_installed_types[prop.id] = installed_types + return updates +def build_installed_recommendation_pairs( + installed_types_by_property_id: dict[int, set[str]], +) -> list[tuple[int, str]]: + """ + Returns: + [(property_id, measure_type), ...] + """ + pairs = [] + + for property_id, measure_types in installed_types_by_property_id.items(): + for mt in measure_types: + pairs.append((property_id, mt)) + + return pairs + + def calculate_contingency_for_recommendation( recommendation, ) -> float: @@ -766,6 +1053,15 @@ def compute_epc_rebasing_updates( if not adj: continue + # if ( + # adj["sap_points"] == 0 + # and adj["co2"] == 0 + # and adj["energy_kwh"] == 0 + # and adj["energy_bill"] == 0 + # and adj["heat_demand"] == 0 + # ): + # continue + updates[property_id] = { "property_id": property_id, @@ -915,13 +1211,187 @@ def persist_epc_rebasing_updates( print(f"✅ Updated {len(epcs)} EPC records") +# For setting the original SAP, carbon, etc to the current values +def initialise_original_property_and_epc_values(portfolio_id: int): + """ + Initialise original_* columns for SAP + EPC. + Safe to re-run. Only fills NULL originals. + """ + + with db_session() as session: + # ------------------------- + # PROPERTY (SAP) + # ------------------------- + properties = ( + session.query(PropertyModel) + .filter(PropertyModel.portfolio_id == portfolio_id) + .filter(PropertyModel.original_sap_points.is_(None)) + .all() + ) + + for prop in properties: + prop.original_sap_points = prop.current_sap_points + + print(f"✅ Initialised original_sap_points for {len(properties)} properties") + + # ------------------------- + # EPC (energy / carbon) + # ------------------------- + epcs = ( + session.query(PropertyDetailsEpcModel) + .filter(PropertyDetailsEpcModel.portfolio_id == portfolio_id) + .all() + ) + + epc_updates = 0 + + for epc in epcs: + updated = False + + if epc.original_co2_emissions is None: + epc.original_co2_emissions = epc.co2_emissions + updated = True + + if epc.original_primary_energy_consumption is None: + epc.original_primary_energy_consumption = ( + epc.primary_energy_consumption + ) + updated = True + + if epc.original_current_energy_demand is None: + epc.original_current_energy_demand = epc.current_energy_demand + updated = True + + if epc.original_current_energy_demand_heating_hotwater is None: + epc.original_current_energy_demand_heating_hotwater = ( + epc.current_energy_demand_heating_hotwater + ) + updated = True + + if updated: + epc_updates += 1 + + print(f"✅ Initialised EPC originals for {epc_updates} EPC records") + + session.commit() + + +from typing import Set, Dict +from sqlalchemy import distinct + +from typing import Dict +from sqlalchemy import func + + +def get_installed_ventilation_adjustments_by_uprn_for_portfolio( + session, + portfolio_id: int, +) -> Dict[int, dict]: + """ + Returns per-UPRN aggregated impact metrics for + already-installed MECHANICAL VENTILATION. + + { + uprn: { + sap_points: float, + co2: float, + energy_kwh: float, + energy_bill: float, + heat_demand: float, + } + } + """ + + # Only consider UPRNs that belong to this portfolio + uprn_subquery = ( + session.query(PropertyModel.uprn) + .filter(PropertyModel.portfolio_id == portfolio_id) + .filter(PropertyModel.uprn.isnot(None)) + .subquery() + ) + + rows = ( + session.query( + InstalledMeasure.uprn.label("uprn"), + + func.coalesce(func.sum(InstalledMeasure.sap_points), 0.0) + .label("sap_points"), + + func.coalesce(func.sum(InstalledMeasure.carbon_savings), 0.0) + .label("co2"), + + func.coalesce(func.sum(InstalledMeasure.kwh_savings), 0.0) + .label("energy_kwh"), + + func.coalesce(func.sum(InstalledMeasure.bill_savings), 0.0) + .label("energy_bill"), + + func.coalesce(func.sum(InstalledMeasure.heat_demand_savings), 0.0) + .label("heat_demand"), + ) + .filter(InstalledMeasure.is_active.is_(True)) + .filter(InstalledMeasure.measure_type == "mechanical_ventilation") + .filter(InstalledMeasure.uprn.in_(uprn_subquery)) + .group_by(InstalledMeasure.uprn) + .all() + ) + + return { + row.uprn: { + "sap_points": float(row.sap_points), + "co2": float(row.co2), + "energy_kwh": float(row.energy_kwh), + "energy_bill": float(row.energy_bill), + "heat_demand": float(row.heat_demand), + } + for row in rows + } + + +from sqlalchemy import update, tuple_ + + +def mark_recommendations_as_installed( + session, + property_measure_pairs: list[tuple[int, str]], + dry_run: bool = True, +): + if not property_measure_pairs: + print("No recommendations to update") + return + + print(f"{len(property_measure_pairs)} recommendation matches found") + + if dry_run: + print("DRY RUN — no database changes") + return + + stmt = ( + update(Recommendation) + .where( + tuple_(Recommendation.property_id, Recommendation.measure_type) + .in_(property_measure_pairs) + ) + .values(already_installed=True) + ) + + result = session.execute(stmt) + session.commit() + + print(f"✅ Updated {result.rowcount} recommendations") + + # ------------------------------------------------------------ # EXECUTION (DRY RUN) # ------------------------------------------------------------ -PORTFOLIO_ID = 430 +PORTFOLIO_ID = 431 # TODO - run the original sap points update on the peabody portfolio +# Initialising +# initialise_original_property_and_epc_values(PORTFOLIO_ID) + + with db_read_session() as session: properties = ( session.query(PropertyModel) @@ -929,6 +1399,9 @@ with db_read_session() as session: .all() ) + all_ventilation_measures = get_installed_ventilation_adjustments_by_uprn_for_portfolio(session, PORTFOLIO_ID) + installed_types_by_property_id = get_installed_measure_types_by_property_id_for_portfolio(session, PORTFOLIO_ID) + plans = ( session.query(Plan) .filter(Plan.portfolio_id == PORTFOLIO_ID) @@ -974,10 +1447,255 @@ with db_read_session() as session: plans, properties_by_id, epcs, - property_updates_by_id, + installed_types_by_property_id, + all_ventilation_measures, ) + # Used to mark recommendations + pairs = build_installed_recommendation_pairs( + installed_types_by_property_id + ) + +from copy import deepcopy + +plan_updates_comparison = deepcopy(plan_updates) +plans_by_planid = {p.id: p for p in plans} +for u in plan_updates_comparison: + before = plans_by_planid.get(u["plan_id"]) + if not before: + continue + + u.update({ + # SAP + "before_sap_points": before.post_sap_points, + "after_sap_points": u["post_sap_points"], + + # Carbon + "before_post_co2_emissions": before.post_co2_emissions, + "after_post_co2_emissions": u["post_co2_emissions"], + + # Costs + "before_cost_of_works": before.cost_of_works, + "after_cost_of_works": u["cost_of_works"], + + "before_contingency_cost": before.contingency_cost, + "after_contingency_cost": u["contingency_cost"], + }) + +plan_updates_df = pd.DataFrame(plan_updates_comparison) + +plan_updates_df["delta_sap_points"] = ( + plan_updates_df["after_sap_points"] + - plan_updates_df["before_sap_points"] +) +plan_updates_df["delta_carbon"] = ( + plan_updates_df["after_post_co2_emissions"] + - plan_updates_df["before_post_co2_emissions"] +) +plan_updates_df["delta_cost_of_works"] = ( + plan_updates_df["after_cost_of_works"] + - plan_updates_df["before_cost_of_works"] +) +plan_updates_df["delta_contingency_cost"] = ( + plan_updates_df["after_contingency_cost"] + - plan_updates_df["before_contingency_cost"] +) + +# High-level sanity checks +summary = plan_updates_df[[ + "delta_sap_points", + "delta_carbon", + "delta_cost_of_works", + "delta_contingency_cost", +]].sum() + +print(summary) + +# Grab some random samples +example = plan_updates_df[plan_updates_df["delta_cost_of_works"] < -1000].sample(1) +# example = plan_updates_df[plan_updates_df["delta_sap_points"] == 0].sample(1) +example = plan_updates_df[plan_updates_df["property_id"] == 434936].squeeze() + +print(example["property_id"]) +# Go the the db and get the UPRN +uprn_example = 202149883 +installed_adjustments[uprn_example] + +[x for x in plan_updates if x["property_id"] == example["property_id"].values[0]] + +installed_measures_example = {} + +example.squeeze() # When ready to run! -persist_property_sap_updates(property_updates_by_id) -persist_plan_updates(plan_updates) -persist_epc_rebasing_updates(epc_updates) +# persist_property_sap_updates(property_updates_by_id) +# persist_plan_updates(plan_updates) +# persist_epc_rebasing_updates(epc_updates) +# BATCH_SIZE = 1000 +# +# with db_session() as session: +# for i in range(0, len(pairs), BATCH_SIZE): +# batch = pairs[i:i + BATCH_SIZE] +# +# mark_recommendations_as_installed( +# session, +# batch, +# dry_run=False, +# ) +# +# session.commit() + +# https://assessment-model-git-main-hestiahomes.vercel.app/portfolio/430/building-passport/435084/ +# Current EPC rating should go to 68.6 - no it shouldn't! less + +# https://assessment-model-git-main-hestiahomes.vercel.app/portfolio/430/building-passport/434930/ +# Should now be a C72, +# https://assessment-model-git-main-hestiahomes.vercel.app/portfolio/430/building-passport/434930 +# Carbon should be 2.02, energy_kwh should be, 12311.5 + +# We need a follow-up query which switches off ventilation if ewi, iwi or cwi are already installed +# https://assessment-model-git-main-hestiahomes.vercel.app/portfolio/430/building-passport/435154/plans/1024673 +# Should go to C73 +# This is a good one to test also, marking the recommendation as non-default + +# Good example to check: +# https://assessment-model-git-main-hestiahomes.vercel.app/portfolio/430/building-passport/434936/plans/1024455 +# Should go down by these: +# {'sap_points': 11.299999, 'co2': 1.85, 'energy_kwh': 7882.1997, 'energy_bill': 549.89935, 'heat_demand': 77.7} +# Before SAP: 55 +# Carbon 7.56 +# kwh: 28207 + +# Good example to check: +# https://assessment-model-git-main-hestiahomes.vercel.app/portfolio/430/building-passport/434444/plans/1024063 +# SHould change by these +# {'sap_points': 10.3, 'co2': 2.54, 'energy_kwh': 3713.5, 'energy_bill': 1028.2682, 'heat_demand': 151.61} +# Current: SAP 54 +# Carbon: 4.45 +# kwh: 10307 + +# There's one final thing to do - we had an error in post carbon so we need to increase it by the appliances +# amount for all units +from backend.ml_models.AnnualBillSavings import AnnualBillSavings + + +# Need to add this on to the plan for each property +def calculate_appliance_carbon_tonnes(total_floor_area: float) -> float: + """ + Returns appliance carbon emissions in tonnes CO2. + """ + appliance_energy_kwh = AnnualBillSavings.estimate_appliances_energy_use( + total_floor_area=total_floor_area + ) + + # kgCO2 → tonnes CO2 + appliance_carbon_tonnes = (appliance_energy_kwh * 0.232) / 1000 + return appliance_carbon_tonnes + + +from sqlalchemy.orm import joinedload +from tqdm import tqdm + +from tqdm import tqdm + + +def apply_appliance_carbon_to_plans( + session, + portfolio_id: int, + dry_run: bool = True, +): + """ + Adds appliance-related carbon emissions to plan.post_co2_emissions + using EPC total_floor_area. + """ + + # -------------------------------------------- + # Load EPCs (floor area source of truth) + # -------------------------------------------- + epcs = ( + session.query(PropertyDetailsEpcModel) + .filter(PropertyDetailsEpcModel.portfolio_id == portfolio_id) + .filter(PropertyDetailsEpcModel.total_floor_area.isnot(None)) + .all() + ) + + epc_by_property_id = { + e.property_id: e for e in epcs + } + + # -------------------------------------------- + # Load plans with post carbon + # -------------------------------------------- + plans = ( + session.query(Plan) + .filter(Plan.portfolio_id == portfolio_id) + .filter(Plan.post_co2_emissions.isnot(None)) + .all() + ) + + updates = [] + total_delta = 0.0 + + for plan in tqdm(plans, total=len(plans)): + epc = epc_by_property_id.get(plan.property_id) + if not epc: + continue + + floor_area = epc.total_floor_area + if not floor_area or floor_area <= 0: + continue + + delta = float(calculate_appliance_carbon_tonnes(floor_area)) + + if delta == 0: + continue + + updates.append((plan, delta)) + total_delta += delta + + # -------------------------------------------- + # Reporting + # -------------------------------------------- + print(f"Plans affected: {len(updates)}") + print(f"Total appliance carbon added (tCO2): {total_delta:.4f}") + + if dry_run: + print("🟡 DRY RUN — no updates applied") + return + + # -------------------------------------------- + # Apply updates + # -------------------------------------------- + for plan, delta in updates: + plan.post_co2_emissions += delta + + session.commit() + print("✅ Appliance carbon successfully applied") + + +# with db_session() as session: +# apply_appliance_carbon_to_plans( +# session, +# portfolio_id=PORTFOLIO_ID, +# dry_run=False, +# ) + +# Get all uprns for entries in already installed, from the database +with db_read_session() as session: + db_uprns = { + str(r[0]) + for r in ( + session.query(InstalledMeasure.uprn) + .all() + ) + } + +# What is the overlap of these properties and the properties in portfolo 430 +sal_data = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260107 " + "corrected batch 6 sal.xlsx", + sheet_name="batch 1", +) + +len(sal_data[sal_data["epc_os_uprn"].astype(str).isin(db_uprns)]["epc_os_uprn"]) + +# len([uprn for uprn, v in installed_adjustments.items() if str(uprn) in sal_data["epc_os_uprn"].astype(str).tolist()]) diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/h_reset_estimated_epcs.py b/etl/customers/peabody/Nov 2025 Consulting Project/h_reset_estimated_epcs.py index 83799eff..9c7b3d2f 100644 --- a/etl/customers/peabody/Nov 2025 Consulting Project/h_reset_estimated_epcs.py +++ b/etl/customers/peabody/Nov 2025 Consulting Project/h_reset_estimated_epcs.py @@ -75,6 +75,10 @@ df = df.sort_values("property_id", ascending=True) agg = df.groupby("property_id").size().reset_index(name="n_plans") agg = agg.sort_values("n_plans", ascending=True) + +agg[agg["n_plans"] == 3] +agg[agg["n_plans"] == 2].shape + agg[agg["n_plans"] != 3] assert all(agg["n_plans"] == 3) @@ -153,4 +157,54 @@ with pd.ExcelWriter(filename) as writer: sal.iloc[41000:61000, :].to_excel(writer, sheet_name="batch 4", index=False) sal.iloc[61000:81000, :].to_excel(writer, sheet_name="batch 5", index=False) - sal.iloc[81000:, :].to_excel(writer, sheet_name="batch 5", index=False) + sal.iloc[81000:, :].to_excel(writer, sheet_name="batch 6", index=False) + +# TODO - mistake was made when creating the final SAL +b1 = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260101 " + "sal.xlsx", + sheet_name="batch 1" +) +b2 = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260101 " + "sal.xlsx", + sheet_name="batch 2" +) +b3 = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260101 " + "sal.xlsx", + sheet_name="batch 3" +) +b4 = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260101 " + "sal.xlsx", + sheet_name="batch 4" +) +b5 = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260101 " + "sal.xlsx", + sheet_name="batch 5" +) +# Batch 6 should be the remaining +total = pd.concat([b1, b2, b3, b4, b5]) +remaining = sal[~sal["epc_os_uprn"].isin(total["epc_os_uprn"].values)] +# Create new output +filename = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/" + "20260107 corrected batch 6 sal.xlsx") + +with pd.ExcelWriter(filename) as writer: + sal.to_excel(writer, sheet_name="Standardised Asset List", index=False) + # Top 1000 for testing + b1.to_excel(writer, sheet_name="batch 1", index=False) + # Batch 2 is the next 20,000 + b2.to_excel(writer, sheet_name="batch 2", index=False) + # Batch 3 is the next 20,000 + b3.to_excel(writer, sheet_name="batch 3", index=False) + + b4.to_excel(writer, sheet_name="batch 4", index=False) + b5.to_excel(writer, sheet_name="batch 5", index=False) + remaining.to_excel(writer, sheet_name="batch 6", index=False) + +all_together = pd.concat( + [b1, b2, b3, b4, b5, remaining] +) diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/i_testing_parity_data.py b/etl/customers/peabody/Nov 2025 Consulting Project/i_testing_parity_data.py new file mode 100644 index 00000000..c6fb86ea --- /dev/null +++ b/etl/customers/peabody/Nov 2025 Consulting Project/i_testing_parity_data.py @@ -0,0 +1,21 @@ +import pandas as pd + +df = pd.read_excel( + "/Users/khalimconn-kowlessar/Downloads/Parity Data 08012026.xlsx" +) + +df['SAP Score'].mean() + +df[~pd.isnull(df["Lodged EPC Score"])]["Lodged EPC Score"].mean() +df[~pd.isnull(df["Lodged EPC Score"])]["SAP Score"].mean() + +df['Difference'] = abs(df['SAP Score'] - df['Lodged EPC Score']) +df[~pd.isnull(df["Lodged EPC Score"])]["Difference"].mean() + +df["Lodged EPC Band"].value_counts(normalize=True) +df["SAP Band"].value_counts(normalize=True) + +z = df[df["SAP Band"] != df["Lodged EPC Band"]] +agg = z.groupby(["Lodged EPC Band", "SAP Band"]).size().reset_index(name="count") + +zz = z[z["Lodged EPC Band"] == "A"] diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/j_installed_measures.py b/etl/customers/peabody/Nov 2025 Consulting Project/j_installed_measures.py new file mode 100644 index 00000000..370473a1 --- /dev/null +++ b/etl/customers/peabody/Nov 2025 Consulting Project/j_installed_measures.py @@ -0,0 +1,7 @@ +import pandas as pd + +sustainability_data = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody " + "- Data Extracts for Domna.xlsx", + sheet_name="Sustainability" +) diff --git a/recommendations/WindowsRecommendations.py b/recommendations/WindowsRecommendations.py index 7b85ac49..bac20af4 100644 --- a/recommendations/WindowsRecommendations.py +++ b/recommendations/WindowsRecommendations.py @@ -72,9 +72,12 @@ class WindowsRecommendations: elif "secondary_glazing" in measures and "double_glazing" not in measures: is_secondary_glazing = True else: - is_secondary_glazing = self.property.restricted_measures or ( - self.property.windows["glazing_type"] == "secondary" + # If the property currently has some secondary glazing but isn't in a conservation area + # + is_secondary_glazing = self.property.restricted_measures and ( + self.property.data["windows-energy-eff"] in ["Poor", "Very Poor"] ) + windows_area = self.property.windows_area if not number_of_windows: @@ -200,6 +203,8 @@ class WindowsRecommendations: else: glazed_type_ending = "secondary glazing" new_windows_description = "Multiple glazing throughout" + # Windows only end up with an average efficiency + windows_energy_eff = "Average" else: raise ValueError("Invalid glazing type - implement me") @@ -208,7 +213,6 @@ class WindowsRecommendations: windows_energy_eff = "Very Good" # For post 2002 windows, the energy efficiency is "Good" and so for the simulation, we simulate with "Good" - windows_ending_config = WindowAttributes(new_windows_description).process() windows_simulation_config = check_simulation_difference( From b156513524b3883825a3b568c0d99204216fe47f Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sat, 10 Jan 2026 11:48:56 +0000 Subject: [PATCH 168/202] updating windows description for secondary glazing replacement to double --- recommendations/WindowsRecommendations.py | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/recommendations/WindowsRecommendations.py b/recommendations/WindowsRecommendations.py index bac20af4..917a1667 100644 --- a/recommendations/WindowsRecommendations.py +++ b/recommendations/WindowsRecommendations.py @@ -78,15 +78,17 @@ class WindowsRecommendations: self.property.data["windows-energy-eff"] in ["Poor", "Very Poor"] ) - windows_area = self.property.windows_area + # We check if the windows are partially insulated but we're recommending double glazing as a complete + # replacement + double_glazing_replacement = ( + not is_secondary_glazing and + # As defined in coverage_map in windows attributes + self.property.windows["glazing_coverage"] in ["partial", "most"] + ) if not number_of_windows: raise ValueError("Number of windows not specified") - if windows_area is not None: - # TODO - we don't have a price for this so we can't recommend it - print("We have windows area, we should use this data for our recommendations!!!") - # We scale the number of windows based on the proportion of existing glazing if self.property.data["multi-glaze-proportion"] != "": @@ -118,7 +120,10 @@ class WindowsRecommendations: is_secondary_glazing=is_secondary_glazing, ) - already_installed = "windows_glazing" in self.property.already_installed + measure_type = "double_glazing" if not is_secondary_glazing else "secondary_glazing" + + already_installed = measure_type in self.property.already_installed + if already_installed: cost_result = override_costs(cost_result) description = "The property already has double glazing installed. No further action is required." @@ -126,7 +131,7 @@ class WindowsRecommendations: glazing_type = ( "secondary glazing" if is_secondary_glazing else "double glazing" ) - if self.property.windows["glazing_coverage"] in ["partial", "most"]: + if self.property.windows["glazing_coverage"] in ["partial", "most"] and not double_glazing_replacement: description = f"Install {glazing_type} to the remaining windows" else: description = f"Install {glazing_type} to all windows" @@ -234,8 +239,6 @@ class WindowsRecommendations: "glazed-type": glazed_type_ending, } - measure_type = "double_glazing" if not is_secondary_glazing else "secondary_glazing" - non_invasive_recommendation = next( (r for r in self.property.non_invasive_recommendations if r["type"] in ["windows_glazing", measure_type]), {} From 808a5122ee8b1b73677eeb4ce3d37421e115c36c Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sat, 10 Jan 2026 14:59:46 +0000 Subject: [PATCH 169/202] added re-baselining to the property model --- backend/Property.py | 15 ++++- .../db/functions/recommendations_functions.py | 1 - backend/engine/engine.py | 51 +++++++++------ backend/ml_models/api.py | 3 +- recommendations/HeatingRecommender.py | 16 +++-- recommendations/Recommendations.py | 64 ++++++++++++++++++- 6 files changed, 118 insertions(+), 32 deletions(-) diff --git a/backend/Property.py b/backend/Property.py index 70a70307..7df947ce 100644 --- a/backend/Property.py +++ b/backend/Property.py @@ -800,13 +800,19 @@ class Property: to_update[k] = None return to_update - def get_full_property_data(self, current_valuation=None): + def get_full_property_data(self, current_valuation=None, needs_rebaselining=False, rebaselining_sap=0): """ This method extracts the data which is pushed to the database, containing core information, from the EPC about a property :return: """ + current_sap_rating = self.data["current-energy-efficiency"] + if needs_rebaselining: + current_sap_rating += rebaselining_sap + + current_epc_rating = sap_to_epc(current_sap_rating) + property_data = { "creation_status": "READY", "uprn": int(self.data["uprn"]), @@ -823,9 +829,12 @@ class Property: "number_of_rooms": self.number_of_rooms, "year_built": self.year_built, "tenure": self.data["tenure"], - "current_epc_rating": self.data["current-energy-rating"], - "current_sap_points": self.data["current-energy-efficiency"], + "current_epc_rating": current_epc_rating, + "current_sap_points": current_sap_rating, "current_valuation": current_valuation, + "original_sap_points": self.data["current-energy-efficiency"], + "is_sap_points_adjusted_for_installed_measures": needs_rebaselining, + "installed_measures_sap_point_adjustment": rebaselining_sap, } property_data = self._clean_upload_data(property_data) diff --git a/backend/app/db/functions/recommendations_functions.py b/backend/app/db/functions/recommendations_functions.py index 4fdd9324..7d448aa0 100644 --- a/backend/app/db/functions/recommendations_functions.py +++ b/backend/app/db/functions/recommendations_functions.py @@ -27,7 +27,6 @@ def prepare_plan_data( """ # Plan carbon savings co2_savings = sum([r["co2_equivalent_savings"] for r in default_recommendations]) - raise Exception("CHECK ME") post_co2_emissions = p.energy["co2_emissions"] - co2_savings # Plan bill savings diff --git a/backend/engine/engine.py b/backend/engine/engine.py index f4e3ad3f..e0c5fdb7 100644 --- a/backend/engine/engine.py +++ b/backend/engine/engine.py @@ -929,9 +929,7 @@ async def model_engine(body: PlanTriggerRequest): # any panel performance, we ensure that we have a 3kWp and 4kWp option for the property logger.info("Identifying property recommendations") - recommendations = {} - recommendations_scoring_data = [] - representative_recommendations = {} + recommendations, recommendations_scoring_data, representative_recommendations = {}, [], {} for p in tqdm(input_properties): # We set the ECO package data, if we have it property_eco_package = eco_packages.get(p.id, (None, None, None)) @@ -965,17 +963,15 @@ async def model_engine(body: PlanTriggerRequest): recommendations_scoring_data.extend(p.recommendations_scoring_data) logger.info("Preparing data for scoring in sap change api") - recommendations_scoring_data = pd.DataFrame(recommendations_scoring_data) + recommendations_scoring_data = pd.DataFrame(recommendations_scoring_data).drop( + columns=[ + "rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending", + "carbon_ending" + ] + ) # Temp putting this here recommendations_scoring_data["is_post_sap10_ending"] = True - recommendations_scoring_data["sap_starting"] = 77 - - recommendations_scoring_data = recommendations_scoring_data.drop( - columns=["rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending", - "carbon_ending"] - ) - all_predictions = await model_api.async_paginated_predictions( data=recommendations_scoring_data, bucket=get_settings().DATA_BUCKET, @@ -1015,19 +1011,19 @@ async def model_engine(body: PlanTriggerRequest): # We now insert kwh estimates and costs into the recommendations logger.info("Calculating tenant savings - kwh and bills") - for property_id in tqdm([p.id for p in input_properties]): + for p in tqdm(input_properties): + property_id = p.id property_recommendations = recommendations.get(property_id, []) - property_instance = [p for p in input_properties if p.id == property_id][0] property_current_energy_bill = ( Recommendations.calculate_recommendation_tenant_savings( - property_instance=property_instance, + property_instance=p, kwh_simulation_predictions=kwh_simulation_predictions, property_recommendations=property_recommendations, ashp_cop=body.ashp_cop ) ) - property_instance.current_energy_bill = property_current_energy_bill + p.current_energy_bill = property_current_energy_bill # Insert the predictions into the recommendations and run the optimiser logger.info("Optimising measures") @@ -1195,23 +1191,40 @@ async def model_engine(body: PlanTriggerRequest): property_updates, property_epc_details, property_spatial_updates = [], [], [] plans_to_create, recommendations_to_create = [], [] - # TODO: Check the update to carbon - print("NEED TO CHECK THE UPDATE TO CARBON") # Prepare the data that will need to be uploaded in bulk for p in input_properties: recommendations_for_property = recommendations.get(p.id, []) default_recommendations = [r for r in recommendations_for_property if r["default"]] + + # We need to: + # Get already installed measures + already_installed_default = [r for r in default_recommendations if r["already_installed"]] + # Property should be have increased SAP + needs_rebaselining = bool(len(already_installed_default)) + rebaselining_sap = float(sum([r["sap_points"] for r in already_installed_default])) + rebaselining_carbon = float(sum([r["co2_equivalent_savings"] for r in already_installed_default])) + rebaselining_heat_demand = float(sum([r["heat_demand"] for r in already_installed_default])) + rebaselining_kwh = float(sum([r["kwh_savings"] for r in already_installed_default])) + rebaselining_bills = float(sum([r["energy_cost_savings"] for r in already_installed_default])) + # TODO - gotta apply the adjustments to the property table, and the property_details_epc table + + # This will include everything, including already installed total_sap_points = sum([r["sap_points"] for r in default_recommendations]) new_sap_points = float(p.data["current-energy-efficiency"]) + total_sap_points new_epc = sap_to_epc(new_sap_points) - total_cost = sum([r["total"] for r in default_recommendations]) + # Already installed measures do not have a cost but we remove anyway + total_cost = sum([r["total"] for r in default_recommendations if not r["already_installed"]]) valuations = PropertyValuation.estimate(property_instance=p, target_epc=new_epc, total_cost=total_cost) # --- property-level updates (always) --- property_updates.append({ "property_id": p.id, "portfolio_id": body.portfolio_id, - "data": p.get_full_property_data(current_valuation=valuations["current_value"]) + "data": p.get_full_property_data( + current_valuation=valuations["current_value"], + needs_rebaselining=needs_rebaselining, + rebaselining_sap=rebaselining_sap, + ) }) property_epc_details.append(p.get_property_details_epc(portfolio_id=body.portfolio_id)) diff --git a/backend/ml_models/api.py b/backend/ml_models/api.py index 7f3e5873..daf4b715 100644 --- a/backend/ml_models/api.py +++ b/backend/ml_models/api.py @@ -142,7 +142,8 @@ class ModelApi: @staticmethod def extract_phase(recommendation_id): if 'phase=' in recommendation_id: - return int(recommendation_id.split('phase=')[1][0]) + extracted = recommendation_id.split('phase=')[1] + return int(extracted.strip()) else: return None diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py index fdc25bf9..15a7b0b0 100644 --- a/recommendations/HeatingRecommender.py +++ b/recommendations/HeatingRecommender.py @@ -1,5 +1,6 @@ import re import backend.app.assumptions as assumptions +from etl.customers.immo.pilot.asset_list import already_installed from recommendations.recommendation_utils import ( check_simulation_difference, override_costs, combine_recommendation_configs ) @@ -320,12 +321,6 @@ class HeatingRecommender: measures = MEASURE_MAP["heating"] if measures is None else measures - # TODO: We could have a system flush recommendation for an existing boiler, where there is no need to replace - # the boiler, but instead flushing the system will make it run more efficiently. There is a cost for this - # in the Costs class, stored as SYSTEM_FLUSH_COST - - # TODO: Right now, we don't have recommendations for electric boilers - we should probably have one - # if we have a non-invasive ashp recommendation, we get the configuration directly from the property instance non_invasive_ashp_recommendation = next( (r for r in self.property.non_invasive_recommendations if r["type"] == "air_source_heat_pump"), @@ -1115,6 +1110,7 @@ class HeatingRecommender: "hot-water-energy-eff": heating_simulation_config["hot_water_energy_eff_ending"] } + # TODO: Probably don't need to use this for HHRSH - simplify recommendations = self.combine_heating_and_controls( controls_recommendations=controls_recommender.recommendation, heating_simulation_config=heating_simulation_config, @@ -1128,6 +1124,12 @@ class HeatingRecommender: non_intrusive_recommendation=non_intrusive_recommendation, heating_product=hhrsh_product ) + + # Check if HHRSH are already installed + already_installed = "high_heat_retention_storage_heaters" in self.property.already_installed + for rec in recommendations: + rec["already_installed"] = already_installed + if _return: return recommendations @@ -1347,7 +1349,7 @@ class HeatingRecommender: n_rooms=self.property.number_of_rooms ) - already_installed = "heating" in self.property.already_installed + already_installed = "boiler_upgrade" in self.property.already_installed if already_installed: boiler_costs = override_costs(boiler_costs) description = "Heating system has already been upgraded, no further action needed." diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py index 29ba267a..e1d63592 100644 --- a/recommendations/Recommendations.py +++ b/recommendations/Recommendations.py @@ -272,6 +272,36 @@ class Recommendations: property_recommendations.append(self.solar_recommender.recommendation) phase += 1 + if self.property_instance.already_installed: + # We need to re-shuffle our measures + property_recommendations_removed_installed = [] + already_installed_recs = [] + for recs in property_recommendations: + phase_recs = [] + phase_already_installed_recs = [] + for rec in recs: + if rec["already_installed"]: + phase_already_installed_recs.append(rec) + else: + phase_recs.append(rec) + if phase_recs: + property_recommendations_removed_installed.append(phase_recs) + if phase_already_installed_recs: + already_installed_recs.append(phase_already_installed_recs) + + # We re-set the phases + for i, recs in enumerate(property_recommendations_removed_installed): + for rec in recs: + rec["phase"] = i + # already installed recs get negative phasing + already_installed_phase = -len(already_installed_recs) + for recs in already_installed_recs: + for rec in recs: + rec["phase"] = already_installed_phase + already_installed_phase += 1 + + property_recommendations = already_installed_recs + property_recommendations_removed_installed + # We insert temporary ids into the recommendations which is important for the optimiser later property_recommendations = self.insert_temp_recommendation_id(property_recommendations) @@ -486,6 +516,11 @@ class Recommendations: mv_increasing_variables = ["carbon", "heat_demand"] mv_decreasing_variables = ["sap"] + # We allow for negative phase + starting_phase = min( + rec["phase"] for recs in property_recommendations for rec in recs + ) + impact_summary = [] for recommendations_by_type in property_recommendations: for rec in recommendations_by_type: @@ -526,7 +561,7 @@ class Recommendations: # We structure this so that depending on the phase, we capture the previous phase impacts and # then just have one piece of code to calculate the difference - if rec["phase"] == 0: + if rec["phase"] == starting_phase: # These are just the starting values, from the EPC. When we score the ML models, # heating_cost_starting and heating_cost_ending are just the values in the EPC. However, with # heating_cost_ending, we expect that the EPC will predict a heating cost based on what would happen @@ -954,6 +989,33 @@ class Recommendations: pd.isnull(kwh_impact_table["hotwater_fuel_type"]).sum()): raise Exception("Fuel type is missing") + # As one final adjustment, if we + # 1) have a boiler upgrade recommendation + # 2) Have an average efficiency boiler, we adjust the COP of the existing boiler down to 75% + heating_upgrades = [x for x in property_recommendations if x[0]["type"] == "heating"] + boiler_upgrade = [r for recs in heating_upgrades for r in recs if r["measure_type"] == "boiler_upgrade"] + existing_heating_efficiency = property_instance.data["mainheat-energy-eff"] + + if len(boiler_upgrade) and existing_heating_efficiency in ["Very Poor", "Poor", "Average"]: + efficiency_map = {"Very Poor": 0.6, "Poor": 0.65, "Average": 0.7} + adjusted_cop = efficiency_map[existing_heating_efficiency] + boiler_phase = boiler_upgrade[0]["phase"] + heating_measure_types_to_id = [ + {"recommendation_id": r["recommendation_id"], "measure_type": r["measure_type"]} + for r in heating_upgrades[0] + ] + kwh_impact_table = kwh_impact_table.merge( + pd.DataFrame(heating_measure_types_to_id), how="left", on="recommendation_id" + ) + for col in ["heating_cop", "hotwater_cop"]: + kwh_impact_table[col] = np.where( + (kwh_impact_table["phase"] <= boiler_phase) & + (kwh_impact_table["heating_fuel_type"] == "Natural Gas") & + (kwh_impact_table["measure_type"] != "boiler_upgrade"), + adjusted_cop, kwh_impact_table[col] + ) + kwh_impact_table = kwh_impact_table.drop(columns=["measure_type"]) + # We now calculate the fuel cost for k in ["heating", "hotwater"]: kwh_impact_table[f"{k}_cost"] = kwh_impact_table.apply( From 3fe102c385ceb543d3c4361d9f7ba4d5e18e51f3 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sat, 10 Jan 2026 18:52:26 +0000 Subject: [PATCH 170/202] added rebaselining for installed measures --- backend/Property.py | 37 +++++++-- .../db/functions/recommendations_functions.py | 31 +++++--- backend/engine/engine.py | 76 +++++++++++++++++-- recommendations/Recommendations.py | 2 +- .../optimiser/funding_optimiser.py | 20 +++-- 5 files changed, 136 insertions(+), 30 deletions(-) diff --git a/backend/Property.py b/backend/Property.py index 7df947ce..e0bc2199 100644 --- a/backend/Property.py +++ b/backend/Property.py @@ -852,7 +852,10 @@ class Property: else None ) - def get_property_details_epc(self, portfolio_id: int): + def get_property_details_epc( + self, portfolio_id: int, needs_rebaselining: bool = False, rebaselining_carbon: float = 0, + rebaselining_heat_demand: float = 0, rebaselining_kwh: float = 0, rebaselining_bills: float = 0 + ): if self.current_energy_bill is None: raise ValueError("Current energy bill has not been set") @@ -875,6 +878,19 @@ class Property: # We check if the lodgement date is more than 10 years old is_expired = (datetime.now() - pd.to_datetime(lodgement_date)) > timedelta(days=3650) + # Handle re-baselining + co2_emissions = self.energy["co2_emissions"] + primary_energy_consumption = self.energy["primary_energy_consumption"] + current_kwh_demand = self.current_energy_consumption + current_kwh_heating_hotwater = self.current_energy_consumption_heating_hotwater + if needs_rebaselining: + # Carbon will be reduced + co2_emissions -= rebaselining_carbon + # Heat demand will be reduced + primary_energy_consumption -= rebaselining_heat_demand + current_kwh_demand -= rebaselining_kwh + current_kwh_heating_hotwater -= rebaselining_kwh + property_details_epc = { "property_id": self.id, "portfolio_id": portfolio_id, @@ -911,16 +927,25 @@ class Property: "number_of_storeys": self.number_of_storeys["number_of_storeys"], "mains_gas": self.mains_gas, "energy_tariff": self.data["energy-tariff"], - "primary_energy_consumption": self.energy["primary_energy_consumption"], - "co2_emissions": self.energy["co2_emissions"], - "current_energy_demand": self.current_energy_consumption, - "current_energy_demand_heating_hotwater": self.current_energy_consumption_heating_hotwater, + "primary_energy_consumption": primary_energy_consumption, + "co2_emissions": co2_emissions, + "current_energy_demand": current_kwh_demand, # This is kwh - naming is confusing + "current_energy_demand_heating_hotwater": current_kwh_heating_hotwater, # This is kwh "estimated": self.data.get("estimated", False), # We indicate if we've overwritten a SAP 05 EPC "sap_05_overwritten": sap_05_overwritten, "sap_05_score": sap_05_score, "sap_05_epc_rating": sap_05_epc_rating, - **self.current_energy_bill + **self.current_energy_bill, + "original_co2_emissions": self.energy["co2_emissions"], + "original_primary_energy_consumption": self.energy["primary_energy_consumption"], + "original_current_energy_demand": self.current_energy_consumption, # Bad naming, this is kwh + "original_current_energy_demand_heating_hotwater": self.current_energy_consumption_heating_hotwater, # kwh + "installed_measures_co2_adjustment": rebaselining_carbon, + "installed_measures_energy_demand_adjustment": rebaselining_kwh, # kwh + "installed_measures_total_energy_bill_adjustment": rebaselining_bills, + "installed_measures_heat_demand_adjustment": rebaselining_heat_demand, + "is_epc_adjusted_for_installed_measures": needs_rebaselining, } return property_details_epc diff --git a/backend/app/db/functions/recommendations_functions.py b/backend/app/db/functions/recommendations_functions.py index 7d448aa0..51562f55 100644 --- a/backend/app/db/functions/recommendations_functions.py +++ b/backend/app/db/functions/recommendations_functions.py @@ -10,7 +10,8 @@ from backend.app.db.connection import db_session, db_read_session def prepare_plan_data( - p, body, scenario_id, eco_packages, valuations, new_sap_points, new_epc, default_recommendations + p, body, scenario_id, eco_packages, valuations, new_sap_points, new_epc, default_recommendations, + rebaselining_carbon=0, rebaselining_heat_demand=0, rebaselining_kwh=0, rebaselining_bills=0, ): """ Utility function to prepare the data that goes into the production of a plan. Is a fairly rough and unstructured @@ -23,19 +24,29 @@ def prepare_plan_data( :param new_sap_points: sap points, post default recommendations :param new_epc: new epc rating, post default recommendations :param default_recommendations: list of default recommendations for a property + :param rebaselining_carbon: carbon emissions adjustment for rebaselining + :param rebaselining_heat_demand: heat demand adjustment for rebaselining + :param rebaselining_kwh: kwh consumption adjustment for rebaselining + :param rebaselining_bills: energy bill adjustment for rebaselining :return: """ # Plan carbon savings - co2_savings = sum([r["co2_equivalent_savings"] for r in default_recommendations]) - post_co2_emissions = p.energy["co2_emissions"] - co2_savings + co2_savings = sum( + [r["co2_equivalent_savings"] for r in default_recommendations if not r.get("already_installed", False)] + ) + post_co2_emissions = p.energy["co2_emissions"] - rebaselining_carbon - co2_savings # Plan bill savings - energy_bill_savings = sum([r["energy_cost_savings"] for r in default_recommendations]) - post_energy_bill = sum(p.current_energy_bill.values()) - energy_bill_savings + energy_bill_savings = sum( + [r["energy_cost_savings"] for r in default_recommendations if not r.get("already_installed", False)] + ) + post_energy_bill = sum(p.current_energy_bill.values()) - rebaselining_bills - energy_bill_savings # energy consumption - energy_consumption_savings = sum([r["kwh_savings"] for r in default_recommendations]) - post_energy_consumption = p.current_energy_consumption - energy_consumption_savings + energy_consumption_savings = sum( + [r["kwh_savings"] for r in default_recommendations if not r.get("already_installed", False)] + ) + post_energy_consumption = p.current_energy_consumption - rebaselining_kwh - energy_consumption_savings valuation_post_retrofit, valuation_increase = None, None if valuations["current_value"]: @@ -43,8 +54,10 @@ def prepare_plan_data( valuation_post_retrofit = valuations["average_increased_value"] # plan costing data - cost_of_works = sum([r["total"] for r in default_recommendations]) - contingency_cost = sum([r.get("contingency", 0) for r in default_recommendations]) + cost_of_works = sum([r["total"] for r in default_recommendations if not r.get("already_installed", False)]) + contingency_cost = sum( + [r.get("contingency", 0) for r in default_recommendations if not r.get("already_installed", False)] + ) return { "portfolio_id": body.portfolio_id, diff --git a/backend/engine/engine.py b/backend/engine/engine.py index e0c5fdb7..740b9581 100644 --- a/backend/engine/engine.py +++ b/backend/engine/engine.py @@ -525,6 +525,22 @@ def extract_address_data(config, body): return uprn, address1, full_address +def keep_max_sap_per_measure_type(items): + # First pass: find max sap_points per measure_type + max_by_type = {} + for item in items: + t = item["measure_type"] + max_by_type[t] = max(max_by_type.get(t, float("-inf")), item["sap_points"]) + + # Second pass: keep only items matching the max for their type + output = [] + for measure_type, points in max_by_type.items(): + to_consider = [x for x in items if x["measure_type"] == measure_type and x["sap_points"] == points] + output.append(to_consider[0]) # pick the first one in case of ties + + return output + + async def model_engine(body: PlanTriggerRequest): logger.info("Model Engine triggered with body: %s", json.loads(body.model_dump_json())) @@ -1063,8 +1079,33 @@ async def model_engine(body: PlanTriggerRequest): (r["partial_project_score"], r["partial_project_funding"], r["innovation_uplift"], r["uplift_project_score"]) = (0, 0, 0, 0) + already_installed_measures = [] + for measures in measures_to_optimise_with_uplift: + for m in measures: + # A) We're going to make the already installed measures default + # B) We need to SAP points for all already installed measures to avoid double counting + if m["already_installed"]: + already_installed_measures.append( + { + "id": m["recommendation_id"], + "measure_type": m["measure_type"], + "sap_points": m["sap_points"], + } + ) + + # We get the ones with the highest SAP + default_already_installed = keep_max_sap_per_measure_type(already_installed_measures) + already_installed_sap = float(sum(d["sap_points"] for d in default_already_installed)) + + # Remove them from the optimisation pool + finalised_measures_to_optimise = [] + for m in measures_to_optimise_with_uplift: + filtered = [x for x in m if not x["already_installed"]] + if filtered: + finalised_measures_to_optimise.append(filtered) + input_measures = optimiser_functions.prepare_input_measures( - measures_to_optimise_with_uplift, body.goal, needs_ventilation, funding=True, + finalised_measures_to_optimise, body.goal, needs_ventilation, funding=True, property_eco_packages=eco_packages.get(p.id) ) @@ -1075,9 +1116,10 @@ async def model_engine(body: PlanTriggerRequest): p=p, input_measures=input_measures, budget=body.budget, - target_gain=gain, + target_gain=gain - already_installed_sap, enforce_heat_pump_insulation=True, - enforce_fabric_first=body.enforce_fabric_first + enforce_fabric_first=body.enforce_fabric_first, + already_installed_sap=already_installed_sap, # To be passed to output ) # if handle the empty case @@ -1120,7 +1162,8 @@ async def model_engine(body: PlanTriggerRequest): ) battery_sap_score = BatterySAPScorer.score(starting_sap=post_sap, pv_size=pv_size) - selected = {r["id"] for r in solution} + # We add the defauly already installed measures to the solution + selected = {r["id"] for r in solution + default_already_installed} if property_required_measures: solution = optimiser_functions.add_required_measures( @@ -1206,7 +1249,6 @@ async def model_engine(body: PlanTriggerRequest): rebaselining_heat_demand = float(sum([r["heat_demand"] for r in already_installed_default])) rebaselining_kwh = float(sum([r["kwh_savings"] for r in already_installed_default])) rebaselining_bills = float(sum([r["energy_cost_savings"] for r in already_installed_default])) - # TODO - gotta apply the adjustments to the property table, and the property_details_epc table # This will include everything, including already installed total_sap_points = sum([r["sap_points"] for r in default_recommendations]) @@ -1227,7 +1269,16 @@ async def model_engine(body: PlanTriggerRequest): ) }) - property_epc_details.append(p.get_property_details_epc(portfolio_id=body.portfolio_id)) + property_epc_details.append( + p.get_property_details_epc( + portfolio_id=body.portfolio_id, + needs_rebaselining=needs_rebaselining, + rebaselining_carbon=rebaselining_carbon, + rebaselining_heat_demand=rebaselining_heat_demand, + rebaselining_kwh=rebaselining_kwh, + rebaselining_bills=rebaselining_bills, + ) + ) property_spatial_updates.append({"uprn": p.uprn, "data": p.spatial}) @@ -1236,7 +1287,18 @@ async def model_engine(body: PlanTriggerRequest): continue plan_data = db_funcs.recommendations_functions.prepare_plan_data( - p, body, scenario_id, eco_packages, valuations, new_sap_points, new_epc, default_recommendations + p=p, + body=body, + scenario_id=scenario_id, + eco_packages=eco_packages, + valuations=valuations, + new_sap_points=new_sap_points, + new_epc=new_epc, + default_recommendations=default_recommendations, + rebaselining_carbon=rebaselining_carbon, + rebaselining_heat_demand=rebaselining_heat_demand, + rebaselining_kwh=rebaselining_kwh, + rebaselining_bills=rebaselining_bills, ) plans_to_create.append({"property_id": p.id, "plan_data": plan_data}) diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py index e1d63592..2466ea4e 100644 --- a/recommendations/Recommendations.py +++ b/recommendations/Recommendations.py @@ -298,7 +298,7 @@ class Recommendations: for recs in already_installed_recs: for rec in recs: rec["phase"] = already_installed_phase - already_installed_phase += 1 + already_installed_phase += 1 property_recommendations = already_installed_recs + property_recommendations_removed_installed diff --git a/recommendations/optimiser/funding_optimiser.py b/recommendations/optimiser/funding_optimiser.py index 1d4fc682..f9e471ce 100644 --- a/recommendations/optimiser/funding_optimiser.py +++ b/recommendations/optimiser/funding_optimiser.py @@ -643,7 +643,8 @@ def optimise_with_scenarios( budget=None, target_gain=None, enforce_heat_pump_insulation=True, - enforce_fabric_first=False + enforce_fabric_first=False, + already_installed_sap=0 ): """ Scenario-based optimiser (funding-agnostic). @@ -754,7 +755,11 @@ def optimise_with_scenarios( heat_pump_paths = build_heat_pump_paths(remaining_wall_measures, remaining_roof_measures) paths.extend(heat_pump_paths) - fixed_selections = expand_funding_path(optimisation_measures, paths) + fixed_selections = [] + for path in paths: + result = expand_funding_path(input_measures, [path]) + if result: + fixed_selections.extend(result) for fixed in fixed_selections: @@ -825,7 +830,7 @@ def optimise_with_scenarios( "already_installed_gain": sum([x["gain"] for x in picked if x["already_installed"]]) }) - solutions_df = append_solution_metrics(solutions, target_gain, p) + solutions_df = append_solution_metrics(solutions, target_gain, p, already_installed_sap) return solutions_df @@ -835,12 +840,14 @@ def _get_ending_sap_without_battery(x): return float(sum(gain)) -def append_solution_metrics(solutions, target_gain, p): +def append_solution_metrics(solutions, target_gain, p, already_installed_sap=0): """ Given a set of solutions, this function will return a dataframe, with cost metrics appended, to allow the end user to select the optimal solution. :param solutions: :param target_gain: + :param p: + :param already_installed_sap: :return: """ @@ -852,7 +859,7 @@ def append_solution_metrics(solutions, target_gain, p): # Given the scheme, we now check if the packages are eligible. If they *are* eligible, but they don't meet the # final upgrade target, we then look to perform a final optimisation pass to meet the target gain. - solutions_df["meets_upgrade_target"] = solutions_df["total_gain"] >= target_gain - 0.1 + solutions_df["meets_upgrade_target"] = solutions_df["total_gain"] >= target_gain # We now can calculate the project ABS, which subtracts from the cost, but this is only relevant for ECO4 # We flag projects that are including batteries solutions_df["has_battery"] = solutions_df["items"].apply(has_battery) @@ -863,7 +870,7 @@ def append_solution_metrics(solutions, target_gain, p): # We need the ending SAP, but we'll need to remove the battery SAP uplift first solutions_df["ending_sap_without_battery"] = solutions_df.apply( - lambda x: int(p.data["current-energy-efficiency"]) + _get_ending_sap_without_battery(x), + lambda x: int(p.data["current-energy-efficiency"]) + already_installed_sap + _get_ending_sap_without_battery(x), axis=1 ) @@ -1015,7 +1022,6 @@ def expand_funding_path(input_measures, path_spec): cands = iter_and_candidates(input_measures, elem["AND"]) else: raise ValueError("unknown path element; expected 'OR' or 'AND'") - if not cands: return [] From 1b3aa926715f23506ff15965b5723fea62791cee Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sat, 10 Jan 2026 21:21:18 +0000 Subject: [PATCH 171/202] already installed working --- backend/Property.py | 4 +--- backend/engine/engine.py | 1 - .../g_rebaselining_installed_measrues.py | 15 ++++++++------- recommendations/WallRecommendations.py | 15 +++++++++++++-- 4 files changed, 22 insertions(+), 13 deletions(-) diff --git a/backend/Property.py b/backend/Property.py index e0bc2199..49dc15d0 100644 --- a/backend/Property.py +++ b/backend/Property.py @@ -469,10 +469,8 @@ class Property: # It means we've recommended HHR with electric immersion, and shouldn't overwrite # the hot water description continue + # Set the new value otherwise as it's due to already installed measures - do nothing - raise NotImplementedError( - "Already have this key in the phase_epc_transformation - implement me" - ) phase_epc_transformation[k] = v simulation_epc.update(phase_epc_transformation) self.simulation_epcs[rec["recommendation_id"]] = simulation_epc diff --git a/backend/engine/engine.py b/backend/engine/engine.py index 740b9581..4a503a08 100644 --- a/backend/engine/engine.py +++ b/backend/engine/engine.py @@ -1073,7 +1073,6 @@ async def model_engine(body: PlanTriggerRequest): # We insert the innovation uplift measures_to_optimise_with_uplift = deepcopy(measures_to_optimise) - # TODO: Turn this into a function and store the innovaiton uplift for group in measures_to_optimise_with_uplift: for r in group: (r["partial_project_score"], r["partial_project_funding"], r["innovation_uplift"], diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/g_rebaselining_installed_measrues.py b/etl/customers/peabody/Nov 2025 Consulting Project/g_rebaselining_installed_measrues.py index 8d4bc9da..4405d113 100644 --- a/etl/customers/peabody/Nov 2025 Consulting Project/g_rebaselining_installed_measrues.py +++ b/etl/customers/peabody/Nov 2025 Consulting Project/g_rebaselining_installed_measrues.py @@ -196,7 +196,7 @@ sustainability_data["double_glazing"] = sustainability_data["Glazing"].isin( ["Double 2002 or later", "Double but age unknown", "Triple", "DoubleKnownData", "Secondary", "TripleKnownData"] ) sustainability_data["secondary_glazing"] = sustainability_data["Glazing"].isin( - ["Double 2002 or later", "Double but age unknown", "Triple", "DoubleKnownData", "Secondary", "TripleKnownData"] + ["Secondary"] ) sustainability_data["suspended_floor_insulation"] = sustainability_data["Floor Insulation"].isin( @@ -262,8 +262,9 @@ loft_conflicting["conflict_loft_insulation"] = True # ------------ Windows ------------ double_glazing_conflicting = comparison[ - (comparison["double_glazing"]) & - (pd.isnull(comparison["double_glazing_from_recs"]) == False) + (comparison["double_glazing"] | comparison["secondary_glazing"]) & + (pd.isnull(comparison["double_glazing_from_recs"]) == False) & + (pd.isnull(comparison["secondary_glazing_from_recs"]) == True) ].copy() double_glazing_conflicting["conflict_double_glazing"] = True secondary_glazing_conflicting = comparison[ @@ -445,10 +446,10 @@ def add_mechanical_ventilation_for_fabric(installed_measures_df, recs_with_uprn) ) -installed_measures_df = add_mechanical_ventilation_for_fabric( - installed_measures_df, - recs_with_uprn -) +# installed_measures_df = add_mechanical_ventilation_for_fabric( +# installed_measures_df, +# recs_with_uprn +# ) assert installed_measures_df[["uprn", "measure_type"]].duplicated().sum() == 0 diff --git a/recommendations/WallRecommendations.py b/recommendations/WallRecommendations.py index 49483d2f..e1770838 100644 --- a/recommendations/WallRecommendations.py +++ b/recommendations/WallRecommendations.py @@ -8,6 +8,7 @@ from datatypes.enums import QuantityUnits from backend.Property import Property from backend.app.plan.schemas import MEASURE_MAP from BaseUtility import Definitions +from etl.customers.vander_elliot.non_intrusives import already_installed from etl.epc_clean.epc_attributes.WallAttributes import WallAttributes from recommendations.recommendation_utils import ( r_value_per_mm_to_u_value, calculate_u_value_uplift, is_diminishing_returns, update_lowest_selected_u_value, @@ -641,8 +642,18 @@ class WallRecommendations(Definitions): # we separate the logic for for recommending them, therefore we don't # consider diminishing returns between the two as they are considered to be separate measures + prop_already_installed = self.property.already_installed + # So, we'll end up with problems if e.g. an external wall insulation is already installed and we try and + # recommend internal wall insulation. To avoid this, we check if either measure is already installed + # and: + # 1) If EWI is installed, we don't recommend IWI + # 2) If IWI is installed, we don't recommend EWI + # We only produce the recommendation for the moment, for the purpose of re-baselining + ewi_recommendations = [] - if self.ewi_valid() and "external_wall_insulation" in measures: + if self.ewi_valid() and "external_wall_insulation" in measures and ( + "internal_wall_insulation" not in prop_already_installed + ): ewi_recommendations = self._find_insulation( u_value=u_value, insulation_materials=pd.DataFrame( @@ -653,7 +664,7 @@ class WallRecommendations(Definitions): ) iwi_recommendations = [] - if "internal_wall_insulation" in measures: + if "internal_wall_insulation" in measures and "external_wall_insulation" not in prop_already_installed: iwi_recommendations = self._find_insulation( u_value=u_value, insulation_materials=pd.DataFrame(self.internal_wall_insulation_materials), From 5dd29ee1c6620255549be6501fa104e38e7ffeb5 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sat, 10 Jan 2026 21:43:15 +0000 Subject: [PATCH 172/202] removed stupid import --- recommendations/HeatingRecommender.py | 1 - recommendations/WallRecommendations.py | 1 - 2 files changed, 2 deletions(-) diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py index 15a7b0b0..ea3056ba 100644 --- a/recommendations/HeatingRecommender.py +++ b/recommendations/HeatingRecommender.py @@ -1,6 +1,5 @@ import re import backend.app.assumptions as assumptions -from etl.customers.immo.pilot.asset_list import already_installed from recommendations.recommendation_utils import ( check_simulation_difference, override_costs, combine_recommendation_configs ) diff --git a/recommendations/WallRecommendations.py b/recommendations/WallRecommendations.py index e1770838..284d1d2a 100644 --- a/recommendations/WallRecommendations.py +++ b/recommendations/WallRecommendations.py @@ -8,7 +8,6 @@ from datatypes.enums import QuantityUnits from backend.Property import Property from backend.app.plan.schemas import MEASURE_MAP from BaseUtility import Definitions -from etl.customers.vander_elliot.non_intrusives import already_installed from etl.epc_clean.epc_attributes.WallAttributes import WallAttributes from recommendations.recommendation_utils import ( r_value_per_mm_to_u_value, calculate_u_value_uplift, is_diminishing_returns, update_lowest_selected_u_value, From ef7490fe44d0884885bb695897cf828b93749d87 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sat, 10 Jan 2026 22:56:30 +0000 Subject: [PATCH 173/202] fixed typing bug --- backend/Property.py | 2 +- backend/app/assumptions.py | 2 ++ .../Nov 2025 Consulting Project/h_reset_estimated_epcs.py | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/backend/Property.py b/backend/Property.py index 49dc15d0..0df29405 100644 --- a/backend/Property.py +++ b/backend/Property.py @@ -805,7 +805,7 @@ class Property: :return: """ - current_sap_rating = self.data["current-energy-efficiency"] + current_sap_rating = float(self.data["current-energy-efficiency"]) if needs_rebaselining: current_sap_rating += rebaselining_sap diff --git a/backend/app/assumptions.py b/backend/app/assumptions.py index 97a1df76..19263bb3 100644 --- a/backend/app/assumptions.py +++ b/backend/app/assumptions.py @@ -93,6 +93,8 @@ DESCRIPTIONS_TO_FUEL_TYPES = { "Community scheme with CHP, mains gas": {"fuel": "Natural Gas", "cop": 0.85}, "Air source heat pump, radiators and underfloor, electric": {"fuel": "Electricity", "cop": AVERAGE_ASHP_EFFICIENCY / 100}, + "Electric ceiling heating, radiators, electric": {"fuel": "Electricity", "cop": 1}, + "Boiler and underfloor heating, mains gas, Boiler and radiators, mains gas": {"fuel": "Natural Gas", "cop": 0.85}, } # These are the measure types where if there is a ventilation recommendation, we force the inclusion of it diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/h_reset_estimated_epcs.py b/etl/customers/peabody/Nov 2025 Consulting Project/h_reset_estimated_epcs.py index 9c7b3d2f..d22d0f9e 100644 --- a/etl/customers/peabody/Nov 2025 Consulting Project/h_reset_estimated_epcs.py +++ b/etl/customers/peabody/Nov 2025 Consulting Project/h_reset_estimated_epcs.py @@ -5,7 +5,7 @@ from backend.app.db.connection import db_read_session from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel from backend.app.db.models.recommendations import Plan -PORTFOLIO_ID = 431 +PORTFOLIO_ID = 433 with db_read_session() as session: # Get all properties from PropertyDetailsEpcModel, where estimated is True, for portfolio 419 From bae3e13e219b47c04acbc5bcdf02262a9c8faab4 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 12 Jan 2026 13:51:28 +0000 Subject: [PATCH 174/202] minor peabody output work --- .../i_testing_parity_data.py | 74 +++++- .../k_deck_stats.py | 236 ++++++++++++++++++ sfr/principal_pitch/2_export_data.py | 27 +- 3 files changed, 317 insertions(+), 20 deletions(-) create mode 100644 etl/customers/peabody/Nov 2025 Consulting Project/k_deck_stats.py diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/i_testing_parity_data.py b/etl/customers/peabody/Nov 2025 Consulting Project/i_testing_parity_data.py index c6fb86ea..41613bc3 100644 --- a/etl/customers/peabody/Nov 2025 Consulting Project/i_testing_parity_data.py +++ b/etl/customers/peabody/Nov 2025 Consulting Project/i_testing_parity_data.py @@ -1,8 +1,10 @@ import pandas as pd df = pd.read_excel( - "/Users/khalimconn-kowlessar/Downloads/Parity Data 08012026.xlsx" + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/Parity Data " + "08012026.xlsx" ) +df["wall_combined"] = df["Wall Construction"] + "+" + df["Wall Insulation"].fillna("Unknown Insulation") df['SAP Score'].mean() @@ -18,4 +20,72 @@ df["SAP Band"].value_counts(normalize=True) z = df[df["SAP Band"] != df["Lodged EPC Band"]] agg = z.groupby(["Lodged EPC Band", "SAP Band"]).size().reset_index(name="count") -zz = z[z["Lodged EPC Band"] == "A"] +recommendations_epc_c = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/EPC C - no " + "solid floor, ashp 3.0 - corrected.xlsx" +) +recommendations_epc_c["uprn"] = recommendations_epc_c["uprn"].astype(int).astype(str) + +combined = recommendations_epc_c.merge( + df, + left_on="uprn", + right_on="UPRN", + suffixes=("_rec", "_sal") +) + +combined = combined[["uprn", "SAP Score", "current_sap_points", "walls", "wall_combined"]] + +combined[combined["SAP Score"] < 69]["current_epc_rating"].value_counts() +combined[combined["SAP Score"] < 69]["SAP Band"].value_counts() +combined[combined["SAP Score"] < 69].shape +combined[combined["current_sap_points"] < 69] + +combined["SAP Band"].value_counts() + +# Our Cs +combined_cs = combined[combined["SAP Score"] < 69] +combined_cs["SAP Band"].value_counts() +# Their C and below + + +compare = recommendations_epc_c[recommendations_epc_c["current_sap_points"] < 69] + +packages = recommendations_epc_c[recommendations_epc_c["total_retrofit_cost"] > 0] +packages["current_epc_rating"].value_counts() + +# TODO: 612 units +23219 - 612 +errors = recommendations_epc_c[ + (recommendations_epc_c["current_sap_points"] >= 69) & + (recommendations_epc_c["total_retrofit_cost"] > 0) + ] +errors["total_retrofit_cost"].sum() + +below_epc_c = recommendations_epc_c[recommendations_epc_c["current_sap_points"] < 69] + +below_epc_c_compare = below_epc_c.merge( + df, + left_on="uprn", + right_on="UPRN", + suffixes=("_rec", "_sal") +) + +eg1 = below_epc_c_compare[below_epc_c_compare["SAP Band"] == "C"].copy() +eg1["wall_combined"].value_counts() + +eg1_counts = eg1.groupby(["walls", "wall_combined"]).size().reset_index(name="count") +eg1_counts = eg1_counts.sort_values("count", ascending=False) + +externally_insulated = eg1[ + (eg1["wall_combined"] == "Solid Brick+External") & + pd.isnull(eg1["internal_wall_insulation"]) + ] + +externally_insulated[externally_insulated.index == 823]["uprn"] + +recommendations_epc_c[ + (recommendations_epc_c["current_sap_points"] < 69) & + (recommendations_epc_c["current_sap_points"] > 68) + ].shape + +recommendations_epc_c[recommendations_epc_c["wall_combined"] == ""] diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/k_deck_stats.py b/etl/customers/peabody/Nov 2025 Consulting Project/k_deck_stats.py new file mode 100644 index 00000000..5200c34d --- /dev/null +++ b/etl/customers/peabody/Nov 2025 Consulting Project/k_deck_stats.py @@ -0,0 +1,236 @@ +import pandas as pd + +epc_c_recommendations = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/EPC C - no " + "solid floor, ashp 3.0 - corrected.xlsx" +) +epc_b_recommendations = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/EPC B - no " + "solid floor, ashp 3.0 - corrected.xlsx" +) + +epc_c_movers = epc_b_recommendations[ + epc_b_recommendations["current_epc_rating"] == "Epc.C" + ] +epc_c_movers["property_type"].value_counts() + +house_epc_c_movers = epc_c_movers[ + epc_c_movers["property_type"] == "House" + ] +house_epc_c_movers_with_solar = house_epc_c_movers[ + ~pd.isnull(house_epc_c_movers["solar_pv"]) | ~pd.isnull(house_epc_c_movers["solar_pv_with_battery"]) + ] + +house_epc_c_movers_with_a_heatpump = house_epc_c_movers[ + ~pd.isnull(house_epc_c_movers["air_source_heat_pump"]) +] + +flat_epc_c_movers = epc_c_movers[ + epc_c_movers["property_type"] == "Flat" + ] + +epc_c_recommendations["sap_points"].mean() +epc_c_recommendations["sap_points"].mean() + +measure_cols = [ + "air_source_heat_pump", + "boiler_upgrade", + "cavity_wall_insulation", + "double_glazing", + "external_wall_insulation", + "flat_roof_insulation", + "high_heat_retention_storage_heaters", + "internal_wall_insulation", + "loft_insulation", + "low_energy_lighting", + "mechanical_ventilation", + "room_roof_insulation", + "roomstat_programmer_trvs", + "sealing_open_fireplace", + "secondary_glazing", + "secondary_heating", + "solar_pv", + "solar_pv_with_battery", + "suspended_floor_insulation", + "time_temperature_zone_control", +] + +epc_c_melted = ( + epc_c_recommendations + .melt( + id_vars=[c for c in epc_c_recommendations.columns if c not in measure_cols], + value_vars=measure_cols, + var_name="measure_type", + value_name="value", + ) + .dropna(subset=["value"]) +) +epc_c_melted = epc_c_melted[epc_c_melted["value"] > 0] +epc_c_measures = epc_c_melted["measure_type"].value_counts(normalize=True).to_frame().reset_index() + +epc_b_melted = ( + epc_b_recommendations + .melt( + id_vars=[c for c in epc_b_recommendations.columns if c not in measure_cols], + value_vars=measure_cols, + var_name="measure_type", + value_name="value", + ) + .dropna(subset=["value"]) +) + +epc_b_melted = epc_b_melted[epc_b_melted["value"] > 0] +epc_b_measures = epc_b_melted["measure_type"].value_counts(normalize=True).to_frame().reset_index() + +measures_compared = epc_c_measures.merge( + epc_b_measures, + left_on="measure_type", + right_on="measure_type", + suffixes=("_epc_c", "_epc_b"), +) + +epc_c_retrofits = epc_c_recommendations[ + epc_c_recommendations["total_retrofit_cost"] > 0 + ] + +epc_b_retrofits = epc_b_recommendations[ + epc_b_recommendations["total_retrofit_cost"] > 0 + ] + +epc_c_retrofits["sap_points"].mean() +epc_b_retrofits["sap_points"].mean() + +properties_in_both = epc_c_retrofits.merge(epc_b_retrofits, on="uprn", suffixes=("_epc_c", "_epc_b")) + +properties_in_both["total_retrofit_cost_epc_c"].mean() +properties_in_both["sap_points_epc_c"].mean() +properties_in_both["total_retrofit_cost_epc_b"].mean() +properties_in_both["sap_points_epc_b"].mean() + +# Solar PV savings - we need the amount of solar PV bill savings +from sqlalchemy.orm import sessionmaker +from backend.app.db.connection import db_engine +from backend.app.db.models.recommendations import Recommendation, Plan, PlanRecommendations, RecommendationMaterials +from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel +from collections import defaultdict + +PORTFOLIO_ID = 434 # Peabody +SCENARIOS = [ + 904, + 905 +] +scenario_names = { + 904: "EPC C - no solid floor, ashp 3.0", + 905: "EPC B - no solid floor, ashp 3.0", +} + + +def get_data(portfolio_id, scenario_ids): + session = sessionmaker(bind=db_engine)() + session.begin() + + # -------------------- + # Properties + # -------------------- + properties_query = session.query( + PropertyModel, + PropertyDetailsEpcModel + ).join( + PropertyDetailsEpcModel, + PropertyModel.id == PropertyDetailsEpcModel.property_id + ).filter( + PropertyModel.portfolio_id == portfolio_id + ).all() + + properties_data = [ + { + **{col.name: getattr(p.PropertyModel, col.name) + for col in PropertyModel.__table__.columns}, + **{col.name: getattr(p.PropertyDetailsEpcModel, col.name) + for col in PropertyDetailsEpcModel.__table__.columns}, + } + for p in properties_query + ] + + # -------------------- + # Plans + # -------------------- + plans_query = session.query(Plan).filter( + Plan.scenario_id.in_(scenario_ids) + ).all() + + plans_data = [ + {col.name: getattr(plan, col.name) for col in Plan.__table__.columns} + for plan in plans_query + ] + + plan_ids = [p["id"] for p in plans_data] + + # -------------------- + # Recommendations (NO materials yet) + # -------------------- + recommendations_query = session.query( + Recommendation, + Plan.scenario_id + ).join( + PlanRecommendations, + Recommendation.id == PlanRecommendations.recommendation_id + ).join( + Plan, + Plan.id == PlanRecommendations.plan_id + ).filter( + PlanRecommendations.plan_id.in_(plan_ids), + Recommendation.default.is_(True), + Recommendation.already_installed.is_(False) + ).all() + + recommendations_data = [ + { + **{col.name: getattr(r.Recommendation, col.name) + for col in Recommendation.__table__.columns}, + "scenario_id": r.scenario_id, + "materials": [] # placeholder + } + for r in recommendations_query + ] + + recommendation_ids = [r["id"] for r in recommendations_data] + + # -------------------- + # Recommendation materials (SEPARATE QUERY) + # -------------------- + materials_query = session.query( + RecommendationMaterials + ).filter( + RecommendationMaterials.recommendation_id.in_(recommendation_ids) + ).all() + + # Group materials by recommendation_id + materials_by_recommendation = defaultdict(list) + + for m in materials_query: + materials_by_recommendation[m.recommendation_id].append({ + "material_id": m.material_id, + "depth": m.depth, + "quantity": m.quantity, + "quantity_unit": m.quantity_unit, + "estimated_cost": m.estimated_cost, + }) + + # Attach materials safely (no filtering side effects) + for r in recommendations_data: + r["materials"] = materials_by_recommendation.get(r["id"], []) + + session.close() + + return properties_data, plans_data, recommendations_data + + +properties_data, plans_data, recommendations_data = get_data( + portfolio_id=PORTFOLIO_ID, scenario_ids=SCENARIOS +) + +recommendations_df = pd.DataFrame(recommendations_data) + +solar_pv_recommendations = recommendations_df[recommendations_df["measure_type"] == "solar_pv"] +average_savings = solar_pv_recommendations.groupby("scenario_id")["energy_cost_savings"].mean().reset_index() diff --git a/sfr/principal_pitch/2_export_data.py b/sfr/principal_pitch/2_export_data.py index 89c29ce4..36efb603 100644 --- a/sfr/principal_pitch/2_export_data.py +++ b/sfr/principal_pitch/2_export_data.py @@ -14,22 +14,14 @@ from collections import defaultdict # PORTFOLIO_ID = 206 # SCENARIOS = [389] -PORTFOLIO_ID = 419 # Peabody +PORTFOLIO_ID = 434 # Peabody SCENARIOS = [ - 871, # EPC C - fabric first, no solid floor, ashp 3.0 - 863, # EPC B, No EWI/IWI, No Solid Floor, ASHP 3.0 COP - 862, # EPC B - No solid floor, ASHP COP 3.0 - 861, # EPC C, No EWI/IWI, No Solid Floor, ASHP 3.0 COP - 859, # EPC C - no solid floor, ashp 3.0 - 885, # EPC B - fabric first, no solid floor, ashp 3.0 + 904, + 905 ] scenario_names = { - 871: "EPC C, fabric first, no solid floor, ashp 3.0", - 863: "EPC B, No EWI IWI, No Solid Floor, ASHP 3.0 COP", - 862: "EPC B, No solid floor, ASHP COP 3.0", - 861: "EPC C, No EWI IWI, No Solid Floor, ASHP 3.0 COP", - 859: "EPC C, no solid floor, ashp 3.0", - 885: "EPC B, fabric first, no solid floor, ashp 3.0" + 904: "EPC C - no solid floor, ashp 3.0", + 905: "EPC B - no solid floor, ashp 3.0", } @@ -88,7 +80,8 @@ def get_data(portfolio_id, scenario_ids): Plan.id == PlanRecommendations.plan_id ).filter( PlanRecommendations.plan_id.in_(plan_ids), - Recommendation.default.is_(True) + Recommendation.default.is_(True), + Recommendation.already_installed.is_(False) ).all() recommendations_data = [ @@ -220,9 +213,7 @@ for scenario_id in SCENARIOS: df = properties_df[ [ "landlord_property_id", "property_id", "uprn", "address", "postcode", "property_type", "walls", "roof", - "heating", "windows", - "current_epc_rating", - "current_sap_points", "total_floor_area", "number_of_rooms", + "heating", "windows", "current_epc_rating", "current_sap_points", "total_floor_area", "number_of_rooms", ] ].merge( recommendations_measures_pivot, how="left", on="property_id" @@ -240,7 +231,7 @@ for scenario_id in SCENARIOS: # Create excel to store to filename = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting " - f"Project/{scenario_names[scenario_id]}.xlsx") + f"Project/Final SAL/{scenario_names[scenario_id]} - corrected.xlsx") with pd.ExcelWriter(filename) as writer: df.to_excel(writer, sheet_name="properties", index=False) From f44d58c08ee6015b280470ea5663806b7004a3bc Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 12 Jan 2026 14:10:28 +0000 Subject: [PATCH 175/202] added new ecr and predictions bucket --- infrastructure/terraform/main.tf | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/infrastructure/terraform/main.tf b/infrastructure/terraform/main.tf index c3a585f7..5a67b793 100644 --- a/infrastructure/terraform/main.tf +++ b/infrastructure/terraform/main.tf @@ -177,6 +177,12 @@ module "retrofit_hotwater_kwh_predictions" { allowed_origins = var.allowed_origins } +module "retrofit_sap_baseline_predictions" { + source = "./modules/s3" + bucketname = "retrofit-sap-baseline-predictions-${var.stage}" + allowed_origins = var.allowed_origins +} + // We make this bucket presignable, because we want to generate download links for the frontend module "retrofit_energy_assessments" { source = "./modules/s3_presignable_bucket" @@ -253,6 +259,12 @@ module "lambda_hotwater_kwh_prediction_ecr" { source = "./modules/ecr" } +# Baselining models +module "sap_baseline_ecr" { + ecr_name = "sap-baseline-prediction-${var.stage}" + source = "./modules/ecr" +} + ############################################## # CDN - Cloudfront ############################################## From 6436518da95b3c4f173f93f13e19d36cedbe977d Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 12 Jan 2026 17:32:53 +0000 Subject: [PATCH 176/202] Added change to pass an already installed sap value, which rebaselines the property, to prevent optimisation --- backend/engine/engine.py | 31 +++++++------ .../k_deck_stats.py | 43 +++++++++++++++++++ .../optimiser/optimiser_functions.py | 15 +++++-- .../tests/test_optimiser_functions.py | 16 +++++++ 4 files changed, 88 insertions(+), 17 deletions(-) diff --git a/backend/engine/engine.py b/backend/engine/engine.py index 4a503a08..18b93ec7 100644 --- a/backend/engine/engine.py +++ b/backend/engine/engine.py @@ -1065,21 +1065,8 @@ async def model_engine(body: PlanTriggerRequest): ) continue - fixed_gain = optimiser_functions.calculate_fixed_gain( - property_required_measures, recommendations, p, needs_ventilation - ) - gain = optimiser_functions.calculate_gain(body=body, p=p, fixed_gain=fixed_gain, eco_packages=eco_packages) - - # We insert the innovation uplift - measures_to_optimise_with_uplift = deepcopy(measures_to_optimise) - - for group in measures_to_optimise_with_uplift: - for r in group: - (r["partial_project_score"], r["partial_project_funding"], r["innovation_uplift"], - r["uplift_project_score"]) = (0, 0, 0, 0) - already_installed_measures = [] - for measures in measures_to_optimise_with_uplift: + for measures in measures_to_optimise: for m in measures: # A) We're going to make the already installed measures default # B) We need to SAP points for all already installed measures to avoid double counting @@ -1096,6 +1083,22 @@ async def model_engine(body: PlanTriggerRequest): default_already_installed = keep_max_sap_per_measure_type(already_installed_measures) already_installed_sap = float(sum(d["sap_points"] for d in default_already_installed)) + fixed_gain = optimiser_functions.calculate_fixed_gain( + property_required_measures, recommendations, p, needs_ventilation + ) + gain = optimiser_functions.calculate_gain( + body=body, p=p, fixed_gain=fixed_gain, eco_packages=eco_packages, + already_installed_gain=already_installed_sap + ) + + # We insert the innovation uplift + measures_to_optimise_with_uplift = deepcopy(measures_to_optimise) + + for group in measures_to_optimise_with_uplift: + for r in group: + (r["partial_project_score"], r["partial_project_funding"], r["innovation_uplift"], + r["uplift_project_score"]) = (0, 0, 0, 0) + # Remove them from the optimisation pool finalised_measures_to_optimise = [] for m in measures_to_optimise_with_uplift: diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/k_deck_stats.py b/etl/customers/peabody/Nov 2025 Consulting Project/k_deck_stats.py index 5200c34d..c641da06 100644 --- a/etl/customers/peabody/Nov 2025 Consulting Project/k_deck_stats.py +++ b/etl/customers/peabody/Nov 2025 Consulting Project/k_deck_stats.py @@ -231,6 +231,49 @@ properties_data, plans_data, recommendations_data = get_data( ) recommendations_df = pd.DataFrame(recommendations_data) +properties_df = pd.DataFrame(properties_data) solar_pv_recommendations = recommendations_df[recommendations_df["measure_type"] == "solar_pv"] average_savings = solar_pv_recommendations.groupby("scenario_id")["energy_cost_savings"].mean().reset_index() + +# Check tenures +initial_asset_data = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody " + "- Data Extracts for Domna.xlsx", + sheet_name="Properties" +) +sustainability_data = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody " + "- Data Extracts for Domna.xlsx", + sheet_name="Sustainability" +) + +sustainability_sample = sustainability_data[ + sustainability_data["UPRN"].isin(properties_df["uprn"].astype(int).astype(str).values) +] + +sustainability_sample = sustainability_sample.merge( + initial_asset_data, left_on="Org Ref", right_on="UPRN", suffixes=("_sustainability", "_initial_asset") +) + +block_sizes = initial_asset_data["BlockCode"].value_counts().reset_index().sort_values("count", ascending=False) +block_sizes.to_excel("/Users/khalimconn-kowlessar/Downloads/peabody_block_sizes.xlsx", index=False) + +initial_asset_data.columns +initial_asset_data["LeaseType"].value_counts() + +# sustainability_sample["Tenure Group"].value_counts() +# Tenure Group +# General Needs 57787 +# Home Ownership 25471 +# Care & Supported Housing 4239 +# Rental 2677 +# Other 188 + +df = sustainability_sample["Ownership Type"].value_counts().to_frame().reset_index() +df.to_excel("/Users/khalimconn-kowlessar/Downloads/sustainability_tenures.xlsx", index=False) + +tenure_groups = sustainability_sample["Tenure Group"].value_counts().to_frame().reset_index() +tenure_groups.to_excel("/Users/khalimconn-kowlessar/Downloads/sustainability_tenure_groups.xlsx", index=False) + +initial_asset_data[~pd.isnull(initial_asset_data["BlockCode"])]["Tenure Group"].value_counts() diff --git a/recommendations/optimiser/optimiser_functions.py b/recommendations/optimiser/optimiser_functions.py index 0eec35dc..ca48d26d 100644 --- a/recommendations/optimiser/optimiser_functions.py +++ b/recommendations/optimiser/optimiser_functions.py @@ -202,8 +202,13 @@ def calculate_fixed_gain(property_required_measures, recommendations, p, needs_v return fixed_gain -def calculate_gain(body: PlanTriggerRequest, p: Property, fixed_gain: float, - eco_packages: None | dict = None) -> float | None: +def calculate_gain( + body: PlanTriggerRequest, + p: Property, + fixed_gain: float, + eco_packages: None | dict = None, + already_installed_gain: float = 0, +) -> float | None: """ Calculates the target gain value for optimisation based on the goal. @@ -221,6 +226,7 @@ def calculate_gain(body: PlanTriggerRequest, p: Property, fixed_gain: float, fixed_gain : float Total fixed gain from required measures (returned by calculate_fixed_gain). eco_packages : dict, optional + already_installed_gain: float, optional Returns ------- @@ -228,13 +234,16 @@ def calculate_gain(body: PlanTriggerRequest, p: Property, fixed_gain: float, Required SAP gain for EPC, or None for non-EPC goals. """ if body.goal == "Increasing EPC": - current_sap = int(p.data["current-energy-efficiency"]) + current_sap = int(p.data["current-energy-efficiency"]) + already_installed_gain target_sap = ( eco_packages.get(p.id)[1] if eco_packages.get(p.id)[1] is not None else epc_to_sap_lower_bound(body.goal_value) ) + if target_sap == current_sap: + return 0 + gain = CostOptimiser.calculate_sap_gain_with_slack( target_sap - current_sap ) - fixed_gain diff --git a/recommendations/tests/test_optimiser_functions.py b/recommendations/tests/test_optimiser_functions.py index 865e3398..ea0b5d94 100644 --- a/recommendations/tests/test_optimiser_functions.py +++ b/recommendations/tests/test_optimiser_functions.py @@ -85,6 +85,22 @@ class TestCalculateGain: gain = optimiser_functions.calculate_gain(body, prop, fixed_gain=0) assert gain is None + def test_returns_zero_for_already_installed_getting_to_target(self): + body = SimpleNamespace(goal="Increasing EPC", goal_value="C") + p = SimpleNamespace(data={"current-energy-efficiency": "67"}, id=1) + fixed_gain = 0 + eco_packages = {1: (None, None, None, [])} + already_installed_sap = 2 + gain = optimiser_functions.calculate_gain( + body=body, + p=p, + fixed_gain=fixed_gain, + eco_packages=eco_packages, + already_installed_gain=already_installed_sap + ) + + assert gain == 0 + def test_calculates_gain_for_epc(self, monkeypatch): # patch cost optimiser calculation monkeypatch.setattr(optimiser_functions, "epc_to_sap_lower_bound", lambda goal_value: 69) From 1ef909e87a5c2fb1ef3dd3ee84724b703cb89d53 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 13 Jan 2026 19:22:40 +0000 Subject: [PATCH 177/202] upgrade database instance power --- infrastructure/terraform/main.tf | 2 ++ 1 file changed, 2 insertions(+) diff --git a/infrastructure/terraform/main.tf b/infrastructure/terraform/main.tf index 5a67b793..94f29261 100644 --- a/infrastructure/terraform/main.tf +++ b/infrastructure/terraform/main.tf @@ -87,6 +87,8 @@ resource "aws_db_instance" "default" { apply_immediately = true # Set up storage type to gp3 for better performance storage_type = "gp3" + # Instance size + instance_class = "db.t4g.medium" } # Set up the bucket that recieve the csv uploads of epc to be retrofit From c07b012ebb00921f62026733f6ce6c4502fc9488 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 13 Jan 2026 19:29:53 +0000 Subject: [PATCH 178/202] upgrading db --- infrastructure/terraform/dev.tfvars | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/infrastructure/terraform/dev.tfvars b/infrastructure/terraform/dev.tfvars index dc84a01f..92b7e158 100644 --- a/infrastructure/terraform/dev.tfvars +++ b/infrastructure/terraform/dev.tfvars @@ -9,7 +9,7 @@ api_url_prefix = "api" # Database allocated_storage = 20 -instance_class = "db.t3.micro" +instance_class = "db.t4g.medium" database_name = "DevAssessmentModelDB" # S3 From 15725a1d13a7551191e6732104a3340ffb745c60 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 13 Jan 2026 19:32:53 +0000 Subject: [PATCH 179/202] fix missing file --- .../c_finalised_modelling_data.py | 29 -- .../k_deck_stats.py | 182 +++++++++++ .../l_reduced_sample.py | 115 +++++++ .../m_reduced_sample_revised.py | 293 ++++++++++++++++++ infrastructure/terraform/main.tf | 2 - sfr/principal_pitch/2_export_data.py | 14 +- 6 files changed, 598 insertions(+), 37 deletions(-) create mode 100644 etl/customers/peabody/Nov 2025 Consulting Project/l_reduced_sample.py create mode 100644 etl/customers/peabody/Nov 2025 Consulting Project/m_reduced_sample_revised.py diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/c_finalised_modelling_data.py b/etl/customers/peabody/Nov 2025 Consulting Project/c_finalised_modelling_data.py index b2dfb01e..3f56d82d 100644 --- a/etl/customers/peabody/Nov 2025 Consulting Project/c_finalised_modelling_data.py +++ b/etl/customers/peabody/Nov 2025 Consulting Project/c_finalised_modelling_data.py @@ -1,34 +1,5 @@ import pandas as pd -# import pandas as pd -# -# sal = pd.read_excel( -# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting " -# "Project/data_validation/to_standardise_uprns - Standardised.xlsx", -# sheet_name="Standardised Asset List" -# ) -# -# # Quick breadown of missingness -# missing = sal[ -# pd.isnull(sal["estimated"]) | (sal["estimated"] == True) | pd.isnull(sal["epc_os_uprn"]) -# ] -# -# fetched = sal[(sal["estimated"] == False) | ~pd.isnull(sal["epc_os_uprn"])].copy() -# fetched = fetched[ -# ["landlord_property_id", "domna_address_1", "domna_postcode", "domna_full_address", "epc_address1", -# "epc_postcode", "epc_address", "landlord_property_type", "epc_property_type"] -# ] -# -# known_issues = [ -# -# ] -# -# # Missed postcodes -# missed_postcode_agg = missing.groupby("domna_postcode").size().reset_index(name="count") -# missed_postcode_agg = missed_postcode_agg.sort_values("count", ascending=False) -# -# multi_missed_postcode = missed_postcode_agg[missed_postcode_agg["count"] > 1] - ### Prepare sustainability_data = pd.read_excel( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody " diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/k_deck_stats.py b/etl/customers/peabody/Nov 2025 Consulting Project/k_deck_stats.py index c641da06..179c0878 100644 --- a/etl/customers/peabody/Nov 2025 Consulting Project/k_deck_stats.py +++ b/etl/customers/peabody/Nov 2025 Consulting Project/k_deck_stats.py @@ -277,3 +277,185 @@ tenure_groups = sustainability_sample["Tenure Group"].value_counts().to_frame(). tenure_groups.to_excel("/Users/khalimconn-kowlessar/Downloads/sustainability_tenure_groups.xlsx", index=False) initial_asset_data[~pd.isnull(initial_asset_data["BlockCode"])]["Tenure Group"].value_counts() + +sample_data = initial_asset_data[ + ~initial_asset_data["Ownership Type"].isin( + [ + # Commercial # Everything is resi - based on the Residential Indicator variable - all are true + # Freeholder + "FREEHOLDER", # 19517 properties + # HOMEBUY / EQUITY LOAN + "Rent to Homebuy", # 1 property + # Leaseholder + "LEASEHOLD 100%", # 8455 properties + "Owned and Managed - 999 year lease", # 2076 properties + "Managed but not Owned-Private Lease", # 159 properties + "Owned and managed LEASEHOLD", # 26 properties + # Outright Sale - can't find anything matching + # SHARED EQUITY + "Shared Ownership", # 4065 properties + "Shared Ownership Owned Not Managed", # 23 properties + # Extra categories which seem sensible to exclude + "NOT MANAGED AND NOT OWNED" + ] + ) +] + +sample_data["Ownership Type"].value_counts() + +sample_data = initial_asset_data[ + initial_asset_data["Ownership Type"].isin( + [ + "Owned and Managed", + "Owned and Managed - 999 year lease", + "Owned and managed LEASEHOLD", + "LEASEHOLD 100%", + "DATALOAD DEFAULT" + ] + ) +] +dropped = initial_asset_data[~initial_asset_data["UPRN"].isin(sample_data["UPRN"].values)] +dropped["Ownership Type"].value_counts() + +for value in [ + # Commercial # Everything is resi, so should be fine. No matches + # Freeholder + "FREEHOLDER", # 19517 properties + # HOMEBUY / EQUITY LOAN + "Rent to Homebuy", # 1 property + # Leaseholder + "LEASEHOLD 100%", # 8455 properties + "Owned and Managed - 999 year lease", # 2076 properties + "Managed but not Owned-Private Lease", # 159 properties + "Owned and managed LEASEHOLD", # 26 properties + # Outright Sale - can't find anything matching + # SHARED EQUITY + "Shared Ownership", # 4065 properties + "Shared Ownership Owned Not Managed", # 23 properties +]: + print(initial_asset_data[initial_asset_data["Ownership Type"] == value].shape[0]) + +house_types = [ + "HOUSE", + "BUNGALOW", + "MAISONETTE", + "DUPLEX", +] + +guaranteed_control = [ + "Owned and Managed", + "Owned and Managed - 999 year lease", + "Owned and managed LEASEHOLD", + "LEASEHOLD 100%", + "DATALOAD DEFAULT", +] + +sample_data = initial_asset_data[ + ( + initial_asset_data["Ownership Type"].isin(guaranteed_control) + ) + | + ( + (initial_asset_data["Ownership Type"] == "FREEHOLDER") + & + (initial_asset_data["Property Type"].isin(house_types)) + ) + ] + +fabric_retrofit_sample = initial_asset_data[ + initial_asset_data["Ownership Type"].isin( + [ + "Owned and Managed", + "FREEHOLDER", + "DATALOAD DEFAULT", + ] + ) +] + +initial_asset_data[pd.isnull(initial_asset_data["BlockCode"])]["Ownership Type"].value_counts() +initial_asset_data[~pd.isnull(initial_asset_data["BlockCode"])]["Ownership Type"].value_counts() + +initial_asset_data[~pd.isnull(initial_asset_data["BlockCode"])]["Property Type"].value_counts() +z = initial_asset_data[ + ~pd.isnull(initial_asset_data["BlockCode"]) & initial_asset_data["Property Type"].isin(house_types) + ] + +block_code_agg = z["BlockCode"].value_counts().reset_index().sort_values("count", ascending=False) +zz = initial_asset_data[initial_asset_data["BlockCode"] == "CHAT3343FM"] + +potential_sample = initial_asset_data[ + ~pd.isnull(initial_asset_data["BlockCode"]) +] + +compare = potential_sample["Property Type"].value_counts(normalize=True).to_frame().reset_index().merge( + initial_asset_data["Property Type"].value_counts(normalize=True).to_frame().reset_index(), + left_on="Property Type", + right_on="Property Type", + suffixes=("_on_block_codes", "_overall") +) + +# Comparison of smaller sample vs overall +new_asset_data = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/2025_11_11 " + "- Peabody " + "- Data Extracts for Domna v2.xlsx", + sheet_name="Properties" +) + +new_sustainability_data = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/2025_11_11 " + "- Peabody " + "- Data Extracts for Domna v2.xlsx", + sheet_name="Sustainability" +) + +sap_bands = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/Parity Data " + "08012026.xlsx", +) + +combined = new_asset_data.merge( + new_sustainability_data, + left_on="UPRN", + right_on="Org Ref", + suffixes=("_asset", "_sustainability") +).merge( + sap_bands[["OrgRef", "SAP Band", "Lodged EPC Band"]], how="left", left_on="Org Ref", right_on="OrgRef" +) +reduced_sample = combined[ + ~combined["AH Tenure"].isin( + ["Commercial", + "Freeholder", + "HOMEBUY / EQUITY LOAN", + "Leaseholder", + "Outright Sale", + "SHARED EQUITY", + "Shared Ownership"] + ) +].copy() + +# property types +property_type_comparison = reduced_sample["Property Type"].value_counts(normalize=True).to_frame().reset_index().merge( + combined["Property Type"].value_counts(normalize=True).to_frame().reset_index(), + left_on="Property Type", + right_on="Property Type", + suffixes=("_reduced_sample", "_overall") +) + +# lodged ratings +lodged_epc_band_comparison = reduced_sample["Lodged EPC Band"].value_counts( + normalize=True).to_frame().reset_index().merge( + combined["Lodged EPC Band"].value_counts(normalize=True).to_frame().reset_index(), + left_on="Lodged EPC Band", + right_on="Lodged EPC Band", + suffixes=("_reduced_sample", "_overall") +) + +# modelled ratings +modelled_epc_band_comparison = reduced_sample["SAP Band"].value_counts( + normalize=True).to_frame().reset_index().merge( + combined["SAP Band"].value_counts(normalize=True).to_frame().reset_index(), + left_on="SAP Band", + right_on="SAP Band", + suffixes=("_reduced_sample", "_overall") +) diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/l_reduced_sample.py b/etl/customers/peabody/Nov 2025 Consulting Project/l_reduced_sample.py new file mode 100644 index 00000000..cbc52447 --- /dev/null +++ b/etl/customers/peabody/Nov 2025 Consulting Project/l_reduced_sample.py @@ -0,0 +1,115 @@ +import pandas as pd + +initial_asset_data = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody " + "- Data Extracts for Domna.xlsx", + sheet_name="Properties" +) + +sustainability_data = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody " + "- Data Extracts for Domna.xlsx", + sheet_name="Sustainability" +) + +asset_data_v2 = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/2025_11_11 " + "- Peabody " + "- Data Extracts for Domna v2.xlsx", + sheet_name="Properties" +) + +desired_ownerships = asset_data_v2[ + ~asset_data_v2["AH Tenure"].isin( + {"Commercial", + "Freeholder", + "HOMEBUY / EQUITY LOAN", + "Leaseholder", + "Outright Sale", + "SHARED EQUITY", + "Shared Ownership"} + ) +] + +desired_ownerships["Ownership Type"].value_counts() + +removed_ownerships = initial_asset_data[ + ~initial_asset_data["UPRN"].isin(desired_ownerships["UPRN"].values) +]["Ownership Type"].value_counts() + +sal = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260112 - " + "ownership filtered sal.xlsx", + sheet_name="Standardised Asset List" +) + +# What did we include, that we shouldn't have? +should_have_been_dropped = sal[ + ~sal["landlord_property_id"].isin(desired_ownerships["UPRN"].values) +] + +needs_to_be_added = desired_ownerships[ + ~desired_ownerships["UPRN"].isin(sal["landlord_property_id"].values) +] + +# Merge on ownership types +sal = sal.merge( + initial_asset_data[["UPRN", "Ownership Type"]], + left_on="domna_property_id", + right_on="UPRN", +) + +# Remove the irrelevant ownership types +sal = sal[ + ~sal["Ownership Type"].isin( + [ + # Commercial # Everything is resi - based on the Residential Indicator variable - all are true + # Freeholder + "FREEHOLDER", # 19517 properties + # HOMEBUY / EQUITY LOAN + "Rent to Homebuy", # 1 property + # Leaseholder + "LEASEHOLD 100%", # 8455 properties + "Owned and Managed - 999 year lease", # 2076 properties + "Managed but not Owned-Private Lease", # 159 properties + "Owned and managed LEASEHOLD", # 26 properties + # Outright Sale - can't find anything matching + # SHARED EQUITY + "Shared Ownership", # 4065 properties + "Shared Ownership Owned Not Managed", # 23 properties + # Extra categories which seem sensible to exclude + "NOT MANAGED AND NOT OWNED" + ] + ) +] + +sal["landlord_property_id"] = sal["domna_property_id"].copy() + +# Store this SAL in three batches +filename = ( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260112 - " + "ownership filtered sal.xlsx" +) +with pd.ExcelWriter(filename) as writer: + sal.to_excel(writer, sheet_name="Standardised Asset List", index=False) + # Store the three sections + sal[0:20000].to_excel(writer, sheet_name="Batch 1", index=False) + sal[20000:40000].to_excel(writer, sheet_name="Batch 2", index=False) + sal[40000:].to_excel(writer, sheet_name="Batch 3", index=False) + +# Test reading back in and assembling +# b1 = pd.read_excel( +# filename, +# sheet_name="Batch 1" +# ) +# b2 = pd.read_excel( +# filename, +# sheet_name="Batch 2" +# ) +# b3 = pd.read_excel( +# filename, +# sheet_name="Batch 3" +# ) +# assembled_sal = pd.concat([b1, b2, b3]) +# # Make sure we have the right # of UPRNs +# assert assembled_sal["epc_os_uprn"].nunique() == sal["epc_os_uprn"].nunique() diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/m_reduced_sample_revised.py b/etl/customers/peabody/Nov 2025 Consulting Project/m_reduced_sample_revised.py new file mode 100644 index 00000000..a18dc315 --- /dev/null +++ b/etl/customers/peabody/Nov 2025 Consulting Project/m_reduced_sample_revised.py @@ -0,0 +1,293 @@ +# ------ Pull in the full SAL sample ------ +import pandas as pd + +full_sal = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final " + "SAL/Depracated/20260107 corrected batch 6 sal.xlsx", + sheet_name="Standardised Asset List" +) + +# ------Pull in the reduced sample ------ +# This has a slightly incorrect mix of ownership types. Some properties will need to be dropped and others, added +reduced_sal = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260112 - " + "ownership filtered sal.xlsx", + sheet_name="Standardised Asset List" +) + +# ------ Pull in the confirmed ownership column from Peabody ------ +new_asset_data = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/2025_11_11 " + "- Peabody " + "- Data Extracts for Domna v2.xlsx", + sheet_name="Properties" +) + +correct_sample = new_asset_data[ + ~new_asset_data["AH Tenure"].isin( + ["Commercial", + "Freeholder", + "HOMEBUY / EQUITY LOAN", + "Leaseholder", + "Outright Sale", + "SHARED EQUITY", + "Shared Ownership"] + ) +].copy() + +# ------- Stuff to add ------- +# These are properties that need to be added to the reduced sample, from the SAL +stuff_to_add = correct_sample[ + ~correct_sample["UPRN"].isin(reduced_sal["landlord_property_id"].values) +]["UPRN"].values + +sal_to_add = full_sal[ + full_sal["domna_property_id"].isin(stuff_to_add) +].copy() + +# ------- Stuff to remove ------- +stuff_to_remove = reduced_sal[ + ~reduced_sal["landlord_property_id"].isin(correct_sample["UPRN"].values) +]["landlord_property_id"].values + +to_delete = reduced_sal[ + reduced_sal["landlord_property_id"].isin(stuff_to_remove) +].copy() + +# ------- Create the correctly formatted SAL, with an individual batch for properties we need to add ------- + +# This is what is correct, from the reduced sample, after removing the incorrect ownership types +reduced_sal_final = reduced_sal[ + ~reduced_sal["landlord_property_id"].isin(stuff_to_remove) +].copy() + +sal_to_add["landlord_property_id"] = sal_to_add["domna_property_id"].copy() + +full_sal = pd.concat( + [reduced_sal_final, sal_to_add], +) + +# filename = ( +# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260113 - " +# "final asset list.xlsx" +# ) +# with pd.ExcelWriter(filename) as writer: +# full_sal.to_excel(writer, sheet_name="Standardised Asset List", index=False) +# # Store the three sections +# reduced_sal_final[0:25000].to_excel(writer, sheet_name="Batch 1 - was correct", index=False) +# reduced_sal_final[25000:].to_excel(writer, sheet_name="Batch 2 - was correct", index=False) +# sal_to_add.to_excel(writer, sheet_name="Batch 3 - needs adding", index=False) + +# We now prepare the process of getting the associated +# We have the properties we need to delete. We can get their associated plans for all scenario IDs +scenario_ids = [908, 909, 910] + +import pandas as pd +from sqlalchemy.orm import Session +from backend.app.db.models.portfolio import PropertyModel +from backend.app.db.connection import db_session, db_read_session +from sqlalchemy import select, func +from sqlalchemy.orm import Session +from backend.app.db.models.recommendations import Plan + +uprns_to_be_deleted = to_delete["epc_os_uprn"].values.tolist() + +# PORTFOLIO_ID = 435 + +# SCENARIO_ID_WITH_PLANS_TO_DELETE = 910 + + +# Get the property IDs for these UPRNs +# def get_property_ids_for_uprns(session: Session, uprns: list[int], portfolio_id) -> list[int]: +# return [ +# property_id +# for (property_id,) in +# session.query(PropertyModel.id) +# .filter( +# PropertyModel.uprn.in_(uprns), +# PropertyModel.portfolio_id == portfolio_id +# ) +# .all() +# ] +# +# +# with db_read_session() as session: +# property_ids_to_delete = get_property_ids_for_uprns( +# session, uprns_to_be_deleted, portfolio_id=PORTFOLIO_ID +# ) +# +# +# def count_plans_for_scenario(session: Session, scenario_id: int, portfolio_id, property_ids) -> int: +# return session.execute( +# select(func.count()) +# .select_from(Plan) +# .where( +# Plan.scenario_id == scenario_id, +# Plan.portfolio_id == portfolio_id, +# Plan.property_id.in_(property_ids) +# ) +# ).scalar_one() +# +# +# with db_session() as session: +# n_plans = count_plans_for_scenario( +# session, +# scenario_id=SCENARIO_ID_WITH_PLANS_TO_DELETE, +# portfolio_id=PORTFOLIO_ID, +# property_ids=property_ids_to_delete +# ) +# +# +# def get_plan_ids_for_scenario( +# session: Session, scenario_id: int, portfolio_id, property_ids +# ) -> list[int]: +# result = session.execute( +# select(Plan.id, Plan.property_id) +# .where( +# Plan.scenario_id == scenario_id, +# Plan.portfolio_id == portfolio_id, +# Plan.property_id.in_(property_ids) +# ) +# ) +# return [{"plan_id": row.id, "property_id": row.property_id} for row in result] +# +# +# with db_session() as session: +# plan_ids_to_property = get_plan_ids_for_scenario( +# session, +# scenario_id=SCENARIO_ID_WITH_PLANS_TO_DELETE, +# portfolio_id=PORTFOLIO_ID, +# property_ids=property_ids_to_delete +# ) +# +# df = pd.DataFrame(plan_ids_to_property) +# df[df["property_id"].duplicated()].shape +# +# plan_ids = [row["plan_id"] for row in plan_ids_to_property] +# +# +# def chunked(iterable, size): +# for i in range(0, len(iterable), size): +# yield iterable[i:i + size] +# +# +# from sqlalchemy import text +# from sqlalchemy.orm import Session +# +# +# def delete_plan_batch(session: Session, plan_ids: list[int]): +# if not plan_ids: +# return +# +# session.execute(text("SET LOCAL lock_timeout = '5s'")) +# +# params = {"plan_ids": plan_ids} +# +# # ---------------------------- +# # recommendation_materials +# # ---------------------------- +# session.execute( +# text(""" +# DELETE FROM recommendation_materials rm +# USING plan_recommendations pr +# WHERE rm.recommendation_id = pr.recommendation_id +# AND pr.plan_id = ANY(:plan_ids) +# """), +# params, +# ) +# +# # ---------------------------- +# # plan_recommendations +# # ---------------------------- +# session.execute( +# text(""" +# DELETE FROM plan_recommendations +# WHERE plan_id = ANY(:plan_ids) +# """), +# params, +# ) +# +# # ---------------------------- +# # recommendations (only those used by these plans) +# # ---------------------------- +# session.execute( +# text(""" +# DELETE FROM recommendation r +# WHERE r.id IN ( +# SELECT DISTINCT recommendation_id +# FROM plan_recommendations +# WHERE plan_id = ANY(:plan_ids) +# ) +# """), +# params, +# ) +# +# # ---------------------------- +# # plans LAST +# # ---------------------------- +# session.execute( +# text(""" +# DELETE FROM plan +# WHERE id = ANY(:plan_ids) +# """), +# params, +# ) +# +# +# batch_size = 25 +# total = (len(plan_ids) + batch_size - 1) // batch_size +# +# for i, batch in enumerate(chunked(plan_ids, batch_size), start=1): +# print(f"Deleting plan batch {i}/{total} ({len(batch)} plans)") +# +# with db_session() as session: +# delete_plan_batch(session, batch) +# +# print(f"Batch {i} committed") +# +# # Now, we delete the associated properties in batch and associated objects. It should +# # largely be property, property details +# property_ids_to_delete +# +# from sqlalchemy import text +# from sqlalchemy.orm import Session +# +# +# def move_properties_between_portfolios( +# session: Session, +# property_ids: list[int], +# from_portfolio_id: int, +# to_portfolio_id: int, +# ): +# if not property_ids: +# return 0 +# +# result = session.execute( +# text(""" +# UPDATE property +# SET portfolio_id = :to_portfolio_id +# WHERE portfolio_id = :from_portfolio_id +# AND id = ANY(:property_ids) +# """), +# { +# "property_ids": property_ids, +# "from_portfolio_id": from_portfolio_id, +# "to_portfolio_id": to_portfolio_id, +# }, +# ) +# +# return result.rowcount +# +# +# # Moved? +# # 573476, 586011 +# +# property_ids_to_delete2 = [x for x in property_ids_to_delete if x not in [573476, 586011]] +# +# with db_session() as session: +# n_moved = move_properties_between_portfolios( +# session, +# property_ids=property_ids_to_delete2, +# from_portfolio_id=PORTFOLIO_ID, +# to_portfolio_id=32, # Archive portfolio +# ) diff --git a/infrastructure/terraform/main.tf b/infrastructure/terraform/main.tf index 94f29261..5a67b793 100644 --- a/infrastructure/terraform/main.tf +++ b/infrastructure/terraform/main.tf @@ -87,8 +87,6 @@ resource "aws_db_instance" "default" { apply_immediately = true # Set up storage type to gp3 for better performance storage_type = "gp3" - # Instance size - instance_class = "db.t4g.medium" } # Set up the bucket that recieve the csv uploads of epc to be retrofit diff --git a/sfr/principal_pitch/2_export_data.py b/sfr/principal_pitch/2_export_data.py index 36efb603..e36a6297 100644 --- a/sfr/principal_pitch/2_export_data.py +++ b/sfr/principal_pitch/2_export_data.py @@ -14,14 +14,16 @@ from collections import defaultdict # PORTFOLIO_ID = 206 # SCENARIOS = [389] -PORTFOLIO_ID = 434 # Peabody +PORTFOLIO_ID = 435 # Peabody SCENARIOS = [ - 904, - 905 + 908, + 909, + # 910, ] scenario_names = { - 904: "EPC C - no solid floor, ashp 3.0", - 905: "EPC B - no solid floor, ashp 3.0", + 908: "EPC C - no solid floor, ashp 3.0", + 909: "EPC C - no solid floor, no EWI or IWI, ashp 3.0", + # 910: "EPC B - no solid floor, no EWI, ashp 3.0" } @@ -231,7 +233,7 @@ for scenario_id in SCENARIOS: # Create excel to store to filename = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting " - f"Project/Final SAL/{scenario_names[scenario_id]} - corrected.xlsx") + f"Project/Final SAL/{scenario_names[scenario_id]} - 20250113 final.xlsx") with pd.ExcelWriter(filename) as writer: df.to_excel(writer, sheet_name="properties", index=False) From 6f1159e871b7339ec248c4ad9192bd3564b5e9a8 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 14 Jan 2026 00:01:01 +0000 Subject: [PATCH 180/202] made adjustment to force any already installed properties to be included in a plan --- .../h_reset_estimated_epcs.py | 15 +++++----- .../k_deck_stats.py | 30 +++++++++++++++++++ .../n_fixing_already_installed_bug.py | 0 recommendations/Recommendations.py | 12 ++++++++ sfr/principal_pitch/2_export_data.py | 12 ++++---- 5 files changed, 56 insertions(+), 13 deletions(-) create mode 100644 etl/customers/peabody/Nov 2025 Consulting Project/n_fixing_already_installed_bug.py diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/h_reset_estimated_epcs.py b/etl/customers/peabody/Nov 2025 Consulting Project/h_reset_estimated_epcs.py index d22d0f9e..67ff2c85 100644 --- a/etl/customers/peabody/Nov 2025 Consulting Project/h_reset_estimated_epcs.py +++ b/etl/customers/peabody/Nov 2025 Consulting Project/h_reset_estimated_epcs.py @@ -5,7 +5,7 @@ from backend.app.db.connection import db_read_session from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel from backend.app.db.models.recommendations import Plan -PORTFOLIO_ID = 433 +PORTFOLIO_ID = 435 with db_read_session() as session: # Get all properties from PropertyDetailsEpcModel, where estimated is True, for portfolio 419 @@ -49,12 +49,13 @@ sal = sal.drop_duplicates(subset=['epc_os_uprn']) estimated_to_refresh = sal[sal["epc_os_uprn"].isin(estimated_uprns_list)].copy() SCENARIOS = [ - 871, # EPC C - fabric first, no solid floor, ashp 3.0 - 863, # EPC B, No EWI/IWI, No Solid Floor, ASHP 3.0 COP - 862, # EPC B - No solid floor, ASHP COP 3.0 - 861, # EPC C, No EWI/IWI, No Solid Floor, ASHP 3.0 COP - 859, # EPC C - no solid floor, ashp 3.0 - 885, # EPC B - fabric first, no solid floor, ashp 3.0 + # 871, # EPC C - fabric first, no solid floor, ashp 3.0 + # 863, # EPC B, No EWI/IWI, No Solid Floor, ASHP 3.0 COP + # 862, # EPC B - No solid floor, ASHP COP 3.0 + # 861, # EPC C, No EWI/IWI, No Solid Floor, ASHP 3.0 COP + # 859, # EPC C - no solid floor, ashp 3.0 + # 885, # EPC B - fabric first, no solid floor, ashp 3.0 + 908, 909, 910 ] # Get all plans, associated to these properties - the property IDs are in estimated_epc_ids diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/k_deck_stats.py b/etl/customers/peabody/Nov 2025 Consulting Project/k_deck_stats.py index 179c0878..cd7fba63 100644 --- a/etl/customers/peabody/Nov 2025 Consulting Project/k_deck_stats.py +++ b/etl/customers/peabody/Nov 2025 Consulting Project/k_deck_stats.py @@ -459,3 +459,33 @@ modelled_epc_band_comparison = reduced_sample["SAP Band"].value_counts( right_on="SAP Band", suffixes=("_reduced_sample", "_overall") ) + +# Testing measures +m1 = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/EPC C - no " + "solid floor, ashp 3.0 - 20250113 final.xlsx" +) +m2 = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/EPC C - no " + "solid floor, no EWI or IWI, ashp 3.0 - 20250113 final.xlsx" +) + +compare = m1.merge( + m2, + left_on="uprn", + right_on="uprn", + suffixes=("_ewi_iwi", "_no_ewi_iwi") +) + +# Which properties get done under the no EWI/IWI scenario that do not under the EWI/IWI scenario +only_no_ewi_iwi = compare[ + (compare["total_retrofit_cost_ewi_iwi"] == 0) & + (compare["total_retrofit_cost_no_ewi_iwi"] != 0) + ] + +(m1["total_retrofit_cost"] > 0).sum() +(m2["total_retrofit_cost"] > 0).sum() + +with_ewi_projects = compare[compare["total_retrofit_cost_no_ewi_iwi"] > 0] + +z = with_ewi_projects[pd.isnull(with_ewi_projects["total_retrofit_cost_ewi_iwi"])] diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/n_fixing_already_installed_bug.py b/etl/customers/peabody/Nov 2025 Consulting Project/n_fixing_already_installed_bug.py new file mode 100644 index 00000000..e69de29b diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py index 2466ea4e..ab13134d 100644 --- a/recommendations/Recommendations.py +++ b/recommendations/Recommendations.py @@ -86,6 +86,18 @@ class Recommendations: inclusions_full = [MEASURE_MAP[x] if x in MEASURE_MAP else x for x in self.inclusions] exclusions_full = [MEASURE_MAP[x] if x in MEASURE_MAP else x for x in self.exclusions] + + # if we have already installed measures, we need to include them so they get factored into the baseline + # this is something we'll likely need to remove + if self.property_instance.already_installed: + # We make sure that any already installed measures are included + for rec in self.property_instance.already_installed: + if rec not in inclusions_full: + inclusions_full.append(rec) + + # We remove them from the exclusions if they are there + exclusions_full = [e for e in exclusions_full if e not in self.property_instance.already_installed] + # We need to unlist any lists, but we should check if they're lists first inclusions_full = [ item for sublist in inclusions_full for item in (sublist if isinstance(sublist, list) else [sublist]) diff --git a/sfr/principal_pitch/2_export_data.py b/sfr/principal_pitch/2_export_data.py index e36a6297..a3042a56 100644 --- a/sfr/principal_pitch/2_export_data.py +++ b/sfr/principal_pitch/2_export_data.py @@ -16,14 +16,14 @@ from collections import defaultdict # SCENARIOS = [389] PORTFOLIO_ID = 435 # Peabody SCENARIOS = [ - 908, - 909, - # 910, + # 908, + # 909, + 910, ] scenario_names = { - 908: "EPC C - no solid floor, ashp 3.0", - 909: "EPC C - no solid floor, no EWI or IWI, ashp 3.0", - # 910: "EPC B - no solid floor, no EWI, ashp 3.0" + # 908: "EPC C - no solid floor, ashp 3.0", + # 909: "EPC C - no solid floor, no EWI or IWI, ashp 3.0", + 910: "EPC B - no solid floor, no EWI, ashp 3.0" } From 5f2bc596caa0b218c548863f172099ca10ec745d Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 14 Jan 2026 01:52:32 +0000 Subject: [PATCH 181/202] don't say ventilation is already installed if cwi installed --- .../n_fixing_already_installed_bug.py | 81 +++++++++++++++++++ recommendations/VentilationRecommendations.py | 2 +- .../optimiser/optimiser_functions.py | 3 +- sfr/principal_pitch/2_export_data.py | 8 +- 4 files changed, 88 insertions(+), 6 deletions(-) diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/n_fixing_already_installed_bug.py b/etl/customers/peabody/Nov 2025 Consulting Project/n_fixing_already_installed_bug.py index e69de29b..5a3aad3f 100644 --- a/etl/customers/peabody/Nov 2025 Consulting Project/n_fixing_already_installed_bug.py +++ b/etl/customers/peabody/Nov 2025 Consulting Project/n_fixing_already_installed_bug.py @@ -0,0 +1,81 @@ +# 1) Need to get all already installed measures +# 2) get the unique uprns for these properties +# 3) Create a re-fresh SAL for these properties +# 4) re-trigger EPC C w/o EWI/IWI + the EPC B scenario + +from backend.app.db.models.recommendations import InstalledMeasure +from backend.app.db.connection import db_session +from etl.customers.cambridge.surveys import current_epc + +# Get all installed measures from the installedMeasure table +with db_session() as session: + # We need installed measures, where the measure type is ewi or iwi + installed_measures = session.query(InstalledMeasure).filter( + InstalledMeasure.measure_type.in_(["external_wall_insulation", "internal_wall_insulation"]) + ).all() + # Get the uprns + installed_uprns = [x.uprn for x in installed_measures] + +installed_uprns = list(set(installed_uprns)) + +# This is 21425 properties. +# We then create a portfolio of properties we need to re-run +import pandas as pd + +sal = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260113 - " + "final asset list.xlsx", + sheet_name="Standardised Asset List" +) + +needing_retry = sal[sal["epc_os_uprn"].isin(installed_uprns)] + +# Store +needing_retry.to_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final " + "SAL/properties_needing_retry_20260115.xlsx", + sheet_name="Standardised Asset List", + index=False +) + +#### Testing +with_ewi = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/EPC C - no " + "solid floor, ashp 3.0 - 20250113 final.xlsx" +) +without_ewi = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/EPC C - no " + "solid floor, no EWI or IWI, ashp 3.0 - 20250113 final.xlsx" +) + +comparison = with_ewi.merge( + without_ewi, + left_on="uprn", + right_on="uprn", + suffixes=("_with_ewi", "_without_ewi") +) + +with_ewi = comparison[comparison["total_retrofit_cost_with_ewi"] > 0] +with_ewi["current_epc_rating_with_ewi"].value_counts() +with_ewi["current_epc_rating_with_ewi"].value_counts() + +without_ewi = comparison[comparison["total_retrofit_cost_without_ewi"] > 0] +with_ewi = comparison[comparison["total_retrofit_cost_with_ewi"] > 0] + +with_ewi[with_ewi["current_epc_rating_with_ewi"] == "Epc.C"]["uprn"] + +to_fix = with_ewi[with_ewi["current_epc_rating_with_ewi"] == "Epc.C"] +to_fix = to_fix[["uprn", "address_with_ewi", "postcode_with_ewi", "property_type_with_ewi"]].rename( + columns={ + "address_with_ewi": "address", + "postcode_with_ewi": "postcode", + "property_type_with_ewi": "property_type" + } +).merge( + sal[["epc_os_uprn", "landlord_built_form"]], + left_on="uprn", + right_on="epc_os_uprn", + how="left" +).drop(columns=["epc_os_uprn"]) + +to_fix = to_fix.to_dict("records") diff --git a/recommendations/VentilationRecommendations.py b/recommendations/VentilationRecommendations.py index a8aa0ca3..ed2f50e2 100644 --- a/recommendations/VentilationRecommendations.py +++ b/recommendations/VentilationRecommendations.py @@ -39,7 +39,7 @@ class VentilationRecommendations(Definitions): parts = self.mechanical_ventilation_materials.copy() - already_installed = "cavity_wall_insulation" in self.property.already_installed + already_installed = "mechanical_ventilation" in self.property.already_installed # TODO: We now have multiple ventilation options - we default to selecting the cheapest option part = min(parts, key=lambda x: x['total_cost']) diff --git a/recommendations/optimiser/optimiser_functions.py b/recommendations/optimiser/optimiser_functions.py index ca48d26d..a4543dbf 100644 --- a/recommendations/optimiser/optimiser_functions.py +++ b/recommendations/optimiser/optimiser_functions.py @@ -241,7 +241,8 @@ def calculate_gain( else epc_to_sap_lower_bound(body.goal_value) ) - if target_sap == current_sap: + if target_sap <= current_sap: + # We've already met or exceeded the target EPC return 0 gain = CostOptimiser.calculate_sap_gain_with_slack( diff --git a/sfr/principal_pitch/2_export_data.py b/sfr/principal_pitch/2_export_data.py index a3042a56..f12eb85d 100644 --- a/sfr/principal_pitch/2_export_data.py +++ b/sfr/principal_pitch/2_export_data.py @@ -16,13 +16,13 @@ from collections import defaultdict # SCENARIOS = [389] PORTFOLIO_ID = 435 # Peabody SCENARIOS = [ - # 908, - # 909, + 908, + 909, 910, ] scenario_names = { - # 908: "EPC C - no solid floor, ashp 3.0", - # 909: "EPC C - no solid floor, no EWI or IWI, ashp 3.0", + 908: "EPC C - no solid floor, ashp 3.0", + 909: "EPC C - no solid floor, no EWI or IWI, ashp 3.0", 910: "EPC B - no solid floor, no EWI, ashp 3.0" } From 8ad2c002fd276f036252cd37dc0b932e4f527d52 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 14 Jan 2026 02:13:46 +0000 Subject: [PATCH 182/202] removing bug --- backend/engine/engine.py | 2 +- .../n_fixing_already_installed_bug.py | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/backend/engine/engine.py b/backend/engine/engine.py index 18b93ec7..50ed0772 100644 --- a/backend/engine/engine.py +++ b/backend/engine/engine.py @@ -1118,7 +1118,7 @@ async def model_engine(body: PlanTriggerRequest): p=p, input_measures=input_measures, budget=body.budget, - target_gain=gain - already_installed_sap, + target_gain=gain, enforce_heat_pump_insulation=True, enforce_fabric_first=body.enforce_fabric_first, already_installed_sap=already_installed_sap, # To be passed to output diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/n_fixing_already_installed_bug.py b/etl/customers/peabody/Nov 2025 Consulting Project/n_fixing_already_installed_bug.py index 5a3aad3f..4bd11a1b 100644 --- a/etl/customers/peabody/Nov 2025 Consulting Project/n_fixing_already_installed_bug.py +++ b/etl/customers/peabody/Nov 2025 Consulting Project/n_fixing_already_installed_bug.py @@ -11,14 +11,13 @@ from etl.customers.cambridge.surveys import current_epc with db_session() as session: # We need installed measures, where the measure type is ewi or iwi installed_measures = session.query(InstalledMeasure).filter( - InstalledMeasure.measure_type.in_(["external_wall_insulation", "internal_wall_insulation"]) + InstalledMeasure.measure_type.in_(["cavity_wall_insulation"]) ).all() # Get the uprns installed_uprns = [x.uprn for x in installed_measures] installed_uprns = list(set(installed_uprns)) -# This is 21425 properties. # We then create a portfolio of properties we need to re-run import pandas as pd @@ -33,7 +32,7 @@ needing_retry = sal[sal["epc_os_uprn"].isin(installed_uprns)] # Store needing_retry.to_excel( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final " - "SAL/properties_needing_retry_20260115.xlsx", + "SAL/properties_needing_retry_20260115 - cavity wall insulation.xlsx", sheet_name="Standardised Asset List", index=False ) From b808f132a858b9de30f27127c960aa97c6cc46a9 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Fri, 16 Jan 2026 15:49:02 +0000 Subject: [PATCH 183/202] Update devcontainer and include test packages --- .devcontainer/Dockerfile | 20 +++++++++++++------- .devcontainer/requirements.txt | 4 +++- .vscode/settings.json | 2 ++ 3 files changed, 18 insertions(+), 8 deletions(-) diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index 56c366f4..ccfb55b6 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -1,4 +1,5 @@ -FROM python:3.12-bullseye +FROM python:3.11.10-bullseye + ARG USER=vscode ARG DEBIAN_FRONTEND=noninteractive @@ -24,12 +25,17 @@ RUN useradd -m -s /usr/bin/bash ${USER} \ && echo "${USER} ALL=(ALL) NOPASSWD: ALL" >/etc/sudoers.d/${USER} \ && chmod 0440 /etc/sudoers.d/${USER} -# 4) Python deps -ENV PIP_NO_CACHE_DIR=1 PIP_DISABLE_PIP_VERSION_CHECK=1 -# Model +# # 4) Python deps - if you want to run assest list +# ENV PIP_NO_CACHE_DIR=1 PIP_DISABLE_PIP_VERSION_CHECK=1 # ADD asset_list/requirements.txt requirements.txt -# FASTAPI backend -ADD .devcontainer/requirements.txt requirements.txt +# RUN pip install -r requirements.txt + +# +ENV PIP_NO_CACHE_DIR=1 PIP_DISABLE_PIP_VERSION_CHECK=1 +ADD backend/engine/requirements.txt requirements1.txt +ADD backend/app/requirements/requirements.txt requirements2.txt +ADD .devcontainer/requirements.txt requirements3.txt +RUN cat requirements1.txt requirements2.txt requirements3.txt > requirements.txt RUN pip install -r requirements.txt # 5) Workdir @@ -37,4 +43,4 @@ WORKDIR /workspaces/model # 6) Make Python find your package # Add project root to PYTHONPATH for all processes -ENV PYTHONPATH=/workspaces/model:${PYTHONPATH} +ENV PYTHONPATH=/workspaces/model:${PYTHONPATH} \ No newline at end of file diff --git a/.devcontainer/requirements.txt b/.devcontainer/requirements.txt index d8c51f19..3ffebf3e 100644 --- a/.devcontainer/requirements.txt +++ b/.devcontainer/requirements.txt @@ -14,4 +14,6 @@ openpyxl==3.1.2 pytz uvicorn[standard] sqlmodel - +# Testing +pytest==9.0.2 +pytest-cov==7.0.0 \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json index 27782c10..9a9ea9f8 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -9,6 +9,8 @@ "path": "/bin/bash" } }, + "python.testing.unittestEnabled": false, + "python.testing.pytestEnabled": true, // Hot reload setting that needs to be in user settings // "jupyter.runStartupCommands": [ From b1aca16be035d88dd44dda8564a07d44b841e632 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Fri, 16 Jan 2026 17:28:28 +0000 Subject: [PATCH 184/202] Define simple local runner --- .devcontainer/requirements.txt | 3 ++- .gitignore | 2 ++ backend/condition/__init__.py | 0 backend/condition/handler.py | 16 +++++++++++++++ backend/condition/ingestion/processor.py | 6 ++++++ backend/condition/local_runner.py | 25 ++++++++++++++++++++++++ 6 files changed, 51 insertions(+), 1 deletion(-) create mode 100644 backend/condition/__init__.py create mode 100644 backend/condition/handler.py create mode 100644 backend/condition/ingestion/processor.py create mode 100644 backend/condition/local_runner.py diff --git a/.devcontainer/requirements.txt b/.devcontainer/requirements.txt index 3ffebf3e..300b86b0 100644 --- a/.devcontainer/requirements.txt +++ b/.devcontainer/requirements.txt @@ -16,4 +16,5 @@ uvicorn[standard] sqlmodel # Testing pytest==9.0.2 -pytest-cov==7.0.0 \ No newline at end of file +pytest-cov==7.0.0 +ipykernel>=6.25,<7 \ No newline at end of file diff --git a/.gitignore b/.gitignore index a6538116..625277a5 100644 --- a/.gitignore +++ b/.gitignore @@ -242,6 +242,8 @@ fabric.properties local_data/* /local_data/* etl/epc/local_data/* +/backend/condition/sample_data/lbwf/* +/backend/condition/sample_data/peadody/* *.DS_Store infrastructure/terraform/.terraform* diff --git a/backend/condition/__init__.py b/backend/condition/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/backend/condition/handler.py b/backend/condition/handler.py new file mode 100644 index 00000000..9d26902b --- /dev/null +++ b/backend/condition/handler.py @@ -0,0 +1,16 @@ +from typing import Mapping, Any +from io import BytesIO + +from utils.logger import setup_logger +from ingestion.processor import process_file + + +logger = setup_logger() + +def handler(event: Mapping[str, Any], context: Any) -> None: + # Temporary stub for PoC wiring + dummy_stream = BytesIO(b"") + + source_key = event.get("source_key", "unknown-source") + + process_file(dummy_stream, source_key) \ No newline at end of file diff --git a/backend/condition/ingestion/processor.py b/backend/condition/ingestion/processor.py new file mode 100644 index 00000000..1653f310 --- /dev/null +++ b/backend/condition/ingestion/processor.py @@ -0,0 +1,6 @@ +from typing import BinaryIO, List + +from utils.logger import setup_logger + +def process_file(file_stream: BinaryIO, source_key: str) -> None: + print(f"[processor] Received file: {source_key}") \ No newline at end of file diff --git a/backend/condition/local_runner.py b/backend/condition/local_runner.py new file mode 100644 index 00000000..f27e04dc --- /dev/null +++ b/backend/condition/local_runner.py @@ -0,0 +1,25 @@ +from pathlib import Path + +from ingestion.processor import process_file + +def main() -> None: + try: + # Works in scripts / debugger / pytest + ROOT_DIR = Path(__file__).resolve().parents[1] + except NameError: + # __file__ is not defined in notebooks + ROOT_DIR = Path.cwd() + + path: Path = ROOT_DIR / "condition" / "sample_data" + + lbwf_path: Path = path / "lbwf" / "LBWF - Example Asset Data September 2025.xlsx" # TODO: get this from s3 as part of devcontainer init + + with lbwf_path.open("rb") as f: + process_file( + file_stream=f, + source_key=lbwf_path.as_posix(), + ) + +if __name__ == "__main__": + main() + From e277e270ab9c2a36caf25549a94ea86f6828de30 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Mon, 19 Jan 2026 10:13:00 +0000 Subject: [PATCH 185/202] Move processor.py out of ingestion directory --- backend/condition/handler.py | 2 +- backend/condition/local_runner.py | 2 +- backend/condition/{ingestion => }/processor.py | 0 3 files changed, 2 insertions(+), 2 deletions(-) rename backend/condition/{ingestion => }/processor.py (100%) diff --git a/backend/condition/handler.py b/backend/condition/handler.py index 9d26902b..5279b029 100644 --- a/backend/condition/handler.py +++ b/backend/condition/handler.py @@ -2,7 +2,7 @@ from typing import Mapping, Any from io import BytesIO from utils.logger import setup_logger -from ingestion.processor import process_file +from backend.condition.processor import process_file logger = setup_logger() diff --git a/backend/condition/local_runner.py b/backend/condition/local_runner.py index f27e04dc..28f9b06c 100644 --- a/backend/condition/local_runner.py +++ b/backend/condition/local_runner.py @@ -1,6 +1,6 @@ from pathlib import Path -from ingestion.processor import process_file +from backend.condition.processor import process_file def main() -> None: try: diff --git a/backend/condition/ingestion/processor.py b/backend/condition/processor.py similarity index 100% rename from backend/condition/ingestion/processor.py rename to backend/condition/processor.py From c073a4cb431c60384c68ec9a54f7c503a765a5b4 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Mon, 19 Jan 2026 11:08:30 +0000 Subject: [PATCH 186/202] =?UTF-8?q?Parser=20factory=20chooses=20parser=20c?= =?UTF-8?q?lass=20based=20on=20filepath=20=F0=9F=9F=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/condition/parsing/factory.py | 4 ++++ backend/condition/parsing/lbwf_parser.py | 8 ++++++++ backend/condition/parsing/parser.py | 8 ++++++++ backend/condition/processor.py | 8 +++++++- .../condition/tests/parsing/test_parsing_factory.py | 12 ++++++++++++ pytest.ini | 2 +- 6 files changed, 40 insertions(+), 2 deletions(-) create mode 100644 backend/condition/parsing/factory.py create mode 100644 backend/condition/parsing/lbwf_parser.py create mode 100644 backend/condition/parsing/parser.py create mode 100644 backend/condition/tests/parsing/test_parsing_factory.py diff --git a/backend/condition/parsing/factory.py b/backend/condition/parsing/factory.py new file mode 100644 index 00000000..55b46253 --- /dev/null +++ b/backend/condition/parsing/factory.py @@ -0,0 +1,4 @@ +from backend.condition.parsing.parser import Parser + +def select_parser(filepath: str) -> Parser: + raise NotImplementedError \ No newline at end of file diff --git a/backend/condition/parsing/lbwf_parser.py b/backend/condition/parsing/lbwf_parser.py new file mode 100644 index 00000000..b0c233d3 --- /dev/null +++ b/backend/condition/parsing/lbwf_parser.py @@ -0,0 +1,8 @@ +from typing import BinaryIO, Any + +from backend.condition.parsing.parser import Parser + +class LbwfParser(Parser): + + def parse(self, file_stream: BinaryIO) -> Any: + raise NotImplementedError \ No newline at end of file diff --git a/backend/condition/parsing/parser.py b/backend/condition/parsing/parser.py new file mode 100644 index 00000000..105fda36 --- /dev/null +++ b/backend/condition/parsing/parser.py @@ -0,0 +1,8 @@ +from abc import ABC, abstractmethod +from typing import BinaryIO, Any + +class Parser(ABC): + + @abstractmethod + def parse(self, file_stream: BinaryIO) -> Any: + pass \ No newline at end of file diff --git a/backend/condition/processor.py b/backend/condition/processor.py index 1653f310..82f1b92e 100644 --- a/backend/condition/processor.py +++ b/backend/condition/processor.py @@ -3,4 +3,10 @@ from typing import BinaryIO, List from utils.logger import setup_logger def process_file(file_stream: BinaryIO, source_key: str) -> None: - print(f"[processor] Received file: {source_key}") \ No newline at end of file + print(f"[processor] Received file: {source_key}") + + # Instantiation + + + # Orchestration + diff --git a/backend/condition/tests/parsing/test_parsing_factory.py b/backend/condition/tests/parsing/test_parsing_factory.py new file mode 100644 index 00000000..dc2949f0 --- /dev/null +++ b/backend/condition/tests/parsing/test_parsing_factory.py @@ -0,0 +1,12 @@ +from backend.condition.parsing.factory import select_parser + +def test_selects_lbwf_parser(): + # arrange + file_path_str = "uploads/lbwf/Example Asset Data.xlsx" + expected_class_name = "LbwfParser" + + # act + actual_class_name = select_parser(file_path_str).__class__.__name__ + + # assert + assert expected_class_name == actual_class_name \ No newline at end of file diff --git a/pytest.ini b/pytest.ini index 84c686b1..1422657b 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,4 +1,4 @@ [pytest] pythonpath = . addopts = --cov-report term-missing --cov=etl/epc --cov=recommendations --cov=backend --cov=etl/epc_clean --cov=etl/spatial -testpaths = recommendations/tests backend/tests etl/epc/tests etl/epc_clean/tests etl/spatial/tests +testpaths = recommendations/tests backend/tests etl/epc/tests etl/epc_clean/tests etl/spatial/tests backend/condition/tests From 4d36fce83d56a193a89b28196814870f91cc2644 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Mon, 19 Jan 2026 11:13:14 +0000 Subject: [PATCH 187/202] =?UTF-8?q?Parser=20factory=20chooses=20parser=20c?= =?UTF-8?q?lass=20based=20on=20filepath=20=F0=9F=9F=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/condition/parsing/factory.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/backend/condition/parsing/factory.py b/backend/condition/parsing/factory.py index 55b46253..c2963079 100644 --- a/backend/condition/parsing/factory.py +++ b/backend/condition/parsing/factory.py @@ -1,4 +1,8 @@ from backend.condition.parsing.parser import Parser +from backend.condition.parsing.lbwf_parser import LbwfParser def select_parser(filepath: str) -> Parser: - raise NotImplementedError \ No newline at end of file + path = filepath.lower() + + if "lbwf" in path: + return LbwfParser() \ No newline at end of file From 9244689e76668f9a499b3e9fef3736199b089f52 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Mon, 19 Jan 2026 11:17:15 +0000 Subject: [PATCH 188/202] =?UTF-8?q?Parser=20factory=20raises=20value=20err?= =?UTF-8?q?or=20on=20unknown=20file=20path=20=F0=9F=9F=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../condition/tests/parsing/test_parsing_factory.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/backend/condition/tests/parsing/test_parsing_factory.py b/backend/condition/tests/parsing/test_parsing_factory.py index dc2949f0..4e373a12 100644 --- a/backend/condition/tests/parsing/test_parsing_factory.py +++ b/backend/condition/tests/parsing/test_parsing_factory.py @@ -1,3 +1,5 @@ +import pytest + from backend.condition.parsing.factory import select_parser def test_selects_lbwf_parser(): @@ -9,4 +11,12 @@ def test_selects_lbwf_parser(): actual_class_name = select_parser(file_path_str).__class__.__name__ # assert - assert expected_class_name == actual_class_name \ No newline at end of file + assert expected_class_name == actual_class_name + +def test_unknown_filepath_raises_value_error(): + # arrange + file_path_str = "unkown/Example Asset Data.xlsx" + + # act + assert + with pytest.raises(ValueError): + select_parser(file_path_str) \ No newline at end of file From 6fd4b19e886bc962fe4d119590c2b2e4fdd6d6e2 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Mon, 19 Jan 2026 11:18:15 +0000 Subject: [PATCH 189/202] =?UTF-8?q?Parser=20factory=20raises=20value=20err?= =?UTF-8?q?or=20on=20unknown=20file=20path=20=F0=9F=9F=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/condition/parsing/factory.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/backend/condition/parsing/factory.py b/backend/condition/parsing/factory.py index c2963079..7db8383f 100644 --- a/backend/condition/parsing/factory.py +++ b/backend/condition/parsing/factory.py @@ -5,4 +5,6 @@ def select_parser(filepath: str) -> Parser: path = filepath.lower() if "lbwf" in path: - return LbwfParser() \ No newline at end of file + return LbwfParser() + + raise ValueError("Unrecognised file path, unable to instantiate Parser") \ No newline at end of file From 049a93fa26358210c0c12aaecc3f1812077af806 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Mon, 19 Jan 2026 11:47:32 +0000 Subject: [PATCH 190/202] =?UTF-8?q?Create=20FileType=20enum=20=F0=9F=9F=AA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/condition/file_type.py | 12 ++++++++++ backend/condition/parsing/factory.py | 9 ++++---- backend/condition/processor.py | 2 +- .../tests/parsing/test_parsing_factory.py | 15 ++++--------- .../condition/tests/test_detect_file_type.py | 22 +++++++++++++++++++ 5 files changed, 43 insertions(+), 17 deletions(-) create mode 100644 backend/condition/file_type.py create mode 100644 backend/condition/tests/test_detect_file_type.py diff --git a/backend/condition/file_type.py b/backend/condition/file_type.py new file mode 100644 index 00000000..b9a4357f --- /dev/null +++ b/backend/condition/file_type.py @@ -0,0 +1,12 @@ +from enum import Enum + +class FileType(Enum): + LBWF = "lbwf" + +def detect_file_type(filepath: str) -> FileType: + path = filepath.lower() + + if "lbwf" in path: + return FileType.LBWF + + raise ValueError("Unrecognised file path") \ No newline at end of file diff --git a/backend/condition/parsing/factory.py b/backend/condition/parsing/factory.py index 7db8383f..01dce75d 100644 --- a/backend/condition/parsing/factory.py +++ b/backend/condition/parsing/factory.py @@ -1,10 +1,9 @@ +from backend.condition.file_type import FileType from backend.condition.parsing.parser import Parser from backend.condition.parsing.lbwf_parser import LbwfParser -def select_parser(filepath: str) -> Parser: - path = filepath.lower() - - if "lbwf" in path: +def select_parser(file_type: FileType) -> Parser: + if file_type is FileType.LBWF: return LbwfParser() - raise ValueError("Unrecognised file path, unable to instantiate Parser") \ No newline at end of file + raise ValueError("Unrecognised file type, unable to instantiate Parser") diff --git a/backend/condition/processor.py b/backend/condition/processor.py index 82f1b92e..c4dcabc2 100644 --- a/backend/condition/processor.py +++ b/backend/condition/processor.py @@ -1,6 +1,7 @@ from typing import BinaryIO, List from utils.logger import setup_logger +from backend.condition.file_type import FileType def process_file(file_stream: BinaryIO, source_key: str) -> None: print(f"[processor] Received file: {source_key}") @@ -9,4 +10,3 @@ def process_file(file_stream: BinaryIO, source_key: str) -> None: # Orchestration - diff --git a/backend/condition/tests/parsing/test_parsing_factory.py b/backend/condition/tests/parsing/test_parsing_factory.py index 4e373a12..481418d7 100644 --- a/backend/condition/tests/parsing/test_parsing_factory.py +++ b/backend/condition/tests/parsing/test_parsing_factory.py @@ -1,22 +1,15 @@ import pytest from backend.condition.parsing.factory import select_parser +from backend.condition.file_type import FileType def test_selects_lbwf_parser(): # arrange - file_path_str = "uploads/lbwf/Example Asset Data.xlsx" + file_type = FileType.LBWF expected_class_name = "LbwfParser" # act - actual_class_name = select_parser(file_path_str).__class__.__name__ + actual_class_name = select_parser(file_type).__class__.__name__ # assert - assert expected_class_name == actual_class_name - -def test_unknown_filepath_raises_value_error(): - # arrange - file_path_str = "unkown/Example Asset Data.xlsx" - - # act + assert - with pytest.raises(ValueError): - select_parser(file_path_str) \ No newline at end of file + assert expected_class_name == actual_class_name \ No newline at end of file diff --git a/backend/condition/tests/test_detect_file_type.py b/backend/condition/tests/test_detect_file_type.py new file mode 100644 index 00000000..fecf22c1 --- /dev/null +++ b/backend/condition/tests/test_detect_file_type.py @@ -0,0 +1,22 @@ +import pytest + +from backend.condition.file_type import FileType, detect_file_type + +def test_detects_lbwf_file_type(): + # arrange + file_path_str = "uploads/lbwf/Exaple Asset Data.xlsx" + expected_file_type = FileType.LBWF + + # act + actual_file_type: FileType = detect_file_type(file_path_str) + + # assert + assert expected_file_type == actual_file_type + +def test_unknown_filepath_raises_value_error(): + # arrange + file_path_str = "unknown/Example Asset Data.xlsx" + + # act + assert + with pytest.raises(ValueError): + detect_file_type(file_path_str) \ No newline at end of file From 00a707500ee86e58f1b02c219af50c2a45439174 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Mon, 19 Jan 2026 14:40:35 +0000 Subject: [PATCH 191/202] =?UTF-8?q?parse=20lbwf=20houses=20=F0=9F=9F=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/condition/parsing/lbwf_parser.py | 1 + .../records/lbwf_property_condition.py | 27 +++++ backend/condition/processor.py | 7 +- .../tests/parsing/test_lbwf_parser.py | 98 +++++++++++++++++++ 4 files changed, 131 insertions(+), 2 deletions(-) create mode 100644 backend/condition/parsing/records/lbwf_property_condition.py create mode 100644 backend/condition/tests/parsing/test_lbwf_parser.py diff --git a/backend/condition/parsing/lbwf_parser.py b/backend/condition/parsing/lbwf_parser.py index b0c233d3..7404189f 100644 --- a/backend/condition/parsing/lbwf_parser.py +++ b/backend/condition/parsing/lbwf_parser.py @@ -1,6 +1,7 @@ from typing import BinaryIO, Any from backend.condition.parsing.parser import Parser +from backend.condition.parsing.records.lbwf_property_condition import LbwfPropertyCondition class LbwfParser(Parser): diff --git a/backend/condition/parsing/records/lbwf_property_condition.py b/backend/condition/parsing/records/lbwf_property_condition.py new file mode 100644 index 00000000..1ecd00d6 --- /dev/null +++ b/backend/condition/parsing/records/lbwf_property_condition.py @@ -0,0 +1,27 @@ +from dataclasses import dataclass +from datetime import date + + +@dataclass +class LbwfPropertyCondition: + uprn: int + prop_ref: int + domna: int + address: str + ownership: str + prop_status: str + prop_type: str # TODO: make this enum? + prop_sub_type: str # TODO: make this enum? + element_group: str + element_code: str + element_code_description: str + attribute_code: str + attribute_code_description: str + element_date_value: str | None = None + element_numerical_value: int | None = None + element_text_value: str | None = None + quantity: int | None = None + install_date: date | None = None + remaining_life: int | None = None + element_comments: str | None = None + diff --git a/backend/condition/processor.py b/backend/condition/processor.py index c4dcabc2..f19c4257 100644 --- a/backend/condition/processor.py +++ b/backend/condition/processor.py @@ -1,12 +1,15 @@ from typing import BinaryIO, List +from backend.condition.parsing.parser import Parser from utils.logger import setup_logger -from backend.condition.file_type import FileType +from backend.condition.file_type import FileType, detect_file_type +from backend.condition.parsing.factory import select_parser def process_file(file_stream: BinaryIO, source_key: str) -> None: print(f"[processor] Received file: {source_key}") # Instantiation - + file_type: FileType = detect_file_type(source_key) + parser: Parser = select_parser(file_type) # Orchestration diff --git a/backend/condition/tests/parsing/test_lbwf_parser.py b/backend/condition/tests/parsing/test_lbwf_parser.py new file mode 100644 index 00000000..4c5be5cd --- /dev/null +++ b/backend/condition/tests/parsing/test_lbwf_parser.py @@ -0,0 +1,98 @@ +from typing import Any +import pytest +from io import BytesIO +from openpyxl import Workbook +from datetime import datetime +import debugpy + +from backend.condition.parsing.lbwf_parser import LbwfParser +from backend.condition.parsing.records.lbwf_property_condition import LbwfPropertyCondition + +@pytest.fixture +def lbwf_homes_xlsx_bytes() -> BytesIO: + wb = Workbook() + houses_asset_data = wb.active + houses_asset_data.title = "Houses Asset Data" + houses_asset_data.append([ + "PROP REF", + "Domna", + "ADDRESS", + "OWNERSHIP", + "PROP STATUS", + "PROP TYPE", + "PROP SUB TYPE", + "ELEMENT GROUP", + "ELEMENT CODE", + "ELEMENT CODE DESCRIPTION", + "ATTRIBUTE CODE", + "ATTRIBUTE CODE DESCRIPTION", + "ELEMENT DATE VALUE", + "ELEMENT NUMERIC VALUE", + "ELEMENT TEXT VALUE", + "QUANTITY", + "INSTALL DATE", + "REMAINING LIFE", + "ELEMENT COMMENTS" + ] + ) + + houses_asset_data.append([ + 12345, + 12345, + "123 Fake Street", + "LBWF_OWNED", + "OCCP", + "HOU", + "TERRACED", + "ASSETS", + "AHR_CAT", + "Accessible Housing Register Category", + "F", + "General Needs", + None, + None, + None, + 1, + None, + None, + None, + ]) + houses_asset_data.append([ + 54321, + 54321, + "100 Random Road", + "LBWF_OWNED", + "OCCP", + "HOU", + "EOT", + "ASSETS", + "INTSMKDET", + "Smoke Detectors in Property", + "HARDWRDMNS", + "Hard Wired Mains Smoke Alarm in Property", + None, + None, + None, + 2, + datetime(2019,4,1), + 4, + "Source of Data = Joe Bloggs", + ]) + + stream = BytesIO() + wb.save(stream) + stream.seek(0) + + return stream + +def test_lbwf_parser_passes_houses(lbwf_homes_xlsx_bytes): + debugpy.wait_for_client() + # arrange + parser = LbwfParser() + + # act + result: Any = parser.parse(lbwf_homes_xlsx_bytes) + + # assert + assert len(result) == 2 + assert isinstance(result[0], LbwfPropertyCondition) \ No newline at end of file From 60f3f0c2227a39a076b5631f5d1a5604ebd0b9ba Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Mon, 19 Jan 2026 14:40:35 +0000 Subject: [PATCH 192/202] =?UTF-8?q?parse=20lbwf=20houses=20=F0=9F=9F=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/condition/parsing/lbwf_parser.py | 1 + .../records/lbwf_property_condition.py | 27 ++++ backend/condition/processor.py | 7 +- .../tests/parsing/test_lbwf_parser.py | 120 ++++++++++++++++++ 4 files changed, 153 insertions(+), 2 deletions(-) create mode 100644 backend/condition/parsing/records/lbwf_property_condition.py create mode 100644 backend/condition/tests/parsing/test_lbwf_parser.py diff --git a/backend/condition/parsing/lbwf_parser.py b/backend/condition/parsing/lbwf_parser.py index b0c233d3..7404189f 100644 --- a/backend/condition/parsing/lbwf_parser.py +++ b/backend/condition/parsing/lbwf_parser.py @@ -1,6 +1,7 @@ from typing import BinaryIO, Any from backend.condition.parsing.parser import Parser +from backend.condition.parsing.records.lbwf_property_condition import LbwfPropertyCondition class LbwfParser(Parser): diff --git a/backend/condition/parsing/records/lbwf_property_condition.py b/backend/condition/parsing/records/lbwf_property_condition.py new file mode 100644 index 00000000..1ecd00d6 --- /dev/null +++ b/backend/condition/parsing/records/lbwf_property_condition.py @@ -0,0 +1,27 @@ +from dataclasses import dataclass +from datetime import date + + +@dataclass +class LbwfPropertyCondition: + uprn: int + prop_ref: int + domna: int + address: str + ownership: str + prop_status: str + prop_type: str # TODO: make this enum? + prop_sub_type: str # TODO: make this enum? + element_group: str + element_code: str + element_code_description: str + attribute_code: str + attribute_code_description: str + element_date_value: str | None = None + element_numerical_value: int | None = None + element_text_value: str | None = None + quantity: int | None = None + install_date: date | None = None + remaining_life: int | None = None + element_comments: str | None = None + diff --git a/backend/condition/processor.py b/backend/condition/processor.py index c4dcabc2..f19c4257 100644 --- a/backend/condition/processor.py +++ b/backend/condition/processor.py @@ -1,12 +1,15 @@ from typing import BinaryIO, List +from backend.condition.parsing.parser import Parser from utils.logger import setup_logger -from backend.condition.file_type import FileType +from backend.condition.file_type import FileType, detect_file_type +from backend.condition.parsing.factory import select_parser def process_file(file_stream: BinaryIO, source_key: str) -> None: print(f"[processor] Received file: {source_key}") # Instantiation - + file_type: FileType = detect_file_type(source_key) + parser: Parser = select_parser(file_type) # Orchestration diff --git a/backend/condition/tests/parsing/test_lbwf_parser.py b/backend/condition/tests/parsing/test_lbwf_parser.py new file mode 100644 index 00000000..f7a7d162 --- /dev/null +++ b/backend/condition/tests/parsing/test_lbwf_parser.py @@ -0,0 +1,120 @@ +from typing import Any +import pytest +from io import BytesIO +from openpyxl import Workbook +from datetime import datetime +import debugpy + +from backend.condition.parsing.lbwf_parser import LbwfParser +from backend.condition.parsing.records.lbwf_property_condition import LbwfPropertyCondition + +@pytest.fixture +def lbwf_homes_xlsx_bytes() -> BytesIO: + wb = Workbook() + houses_asset_data = wb.active + houses_asset_data.title = "Houses Asset Data" + houses_asset_data.append([ + "PROP REF", + "Domna", + "ADDRESS", + "OWNERSHIP", + "PROP STATUS", + "PROP TYPE", + "PROP SUB TYPE", + "ELEMENT GROUP", + "ELEMENT CODE", + "ELEMENT CODE DESCRIPTION", + "ATTRIBUTE CODE", + "ATTRIBUTE CODE DESCRIPTION", + "ELEMENT DATE VALUE", + "ELEMENT NUMERIC VALUE", + "ELEMENT TEXT VALUE", + "QUANTITY", + "INSTALL DATE", + "REMAINING LIFE", + "ELEMENT COMMENTS" + ] + ) + houses_asset_data.append([ + 12345, + 12345, + "123 Fake Street, London, A10 1AB", + "LBWF_OWNED", + "OCCP", + "HOU", + "TERRACED", + "ASSETS", + "AHR_CAT", + "Accessible Housing Register Category", + "F", + "General Needs", + None, + None, + None, + 1, + None, + None, + None, + ]) + houses_asset_data.append([ + 54321, + 54321, + "100 Random Road, London, A10 1AB", + "LBWF_OWNED", + "OCCP", + "HOU", + "EOT", + "ASSETS", + "INTSMKDET", + "Smoke Detectors in Property", + "HARDWRDMNS", + "Hard Wired Mains Smoke Alarm in Property", + None, + None, + None, + 2, + datetime(2019,4,1), + 4, + "Source of Data = Joe Bloggs", + ]) + + all_energy_breakdown = wb.create_sheet("All Energy Breakdown") + all_energy_breakdown.append([ + "UPRN", + "Organisation Reference", + "Alternate Organisation Reference", + "Address", + "Postcode" + ]) + all_energy_breakdown.append([ + 1, + 100, + 101, + "100 RANDOM ROAD", + "A10 1AB" + ]) + all_energy_breakdown.append([ + 2, + 200, + None, + "123 FAKE STREET", + "A10 1AB" + ]) + + stream = BytesIO() + wb.save(stream) + stream.seek(0) + + return stream + +def test_lbwf_parser_passes_houses(lbwf_homes_xlsx_bytes): + debugpy.wait_for_client() + # arrange + parser = LbwfParser() + + # act + result: Any = parser.parse(lbwf_homes_xlsx_bytes) + + # assert + assert len(result) == 2 + assert isinstance(result[0], LbwfPropertyCondition) # TODO: Improve these asserts \ No newline at end of file From 6ff652ff3492222633e118d27619121aa2a65800 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Mon, 19 Jan 2026 16:23:11 +0000 Subject: [PATCH 193/202] =?UTF-8?q?parse=20lbwf=20houses=20=F0=9F=9F=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .devcontainer/devcontainer.json | 3 + .vscode/launch.json | 15 +++ .vscode/settings.json | 1 + backend/condition/parsing/lbwf_parser.py | 96 ++++++++++++++++++- ...y_condition.py => lbwf_asset_condition.py} | 2 +- backend/condition/processor.py | 3 +- .../tests/parsing/test_lbwf_parser.py | 8 +- backend/condition/utils/date_utils.py | 10 ++ 8 files changed, 128 insertions(+), 10 deletions(-) create mode 100644 .vscode/launch.json rename backend/condition/parsing/records/{lbwf_property_condition.py => lbwf_asset_condition.py} (95%) create mode 100644 backend/condition/utils/date_utils.py diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 91a76c3d..761786cd 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -27,5 +27,8 @@ "ms-python.vscode-python-envs" ] } + }, + "containerEnv": { + "PYTHONFLAGS": "-Xfrozen_modules=off" } } diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 00000000..6b76b4fa --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,15 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "name": "Python Debugger: Current File", + "type": "debugpy", + "request": "launch", + "program": "${file}", + "console": "integratedTerminal" + } + ] +} \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json index 9a9ea9f8..88c2ae2d 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -11,6 +11,7 @@ }, "python.testing.unittestEnabled": false, "python.testing.pytestEnabled": true, + "python.testing.pytestArgs": ["-s", "-q", "--no-cov"] // Hot reload setting that needs to be in user settings // "jupyter.runStartupCommands": [ diff --git a/backend/condition/parsing/lbwf_parser.py b/backend/condition/parsing/lbwf_parser.py index 7404189f..fd65e24a 100644 --- a/backend/condition/parsing/lbwf_parser.py +++ b/backend/condition/parsing/lbwf_parser.py @@ -1,9 +1,99 @@ -from typing import BinaryIO, Any +from typing import BinaryIO, Any, Dict, Iterator, List, Tuple +from openpyxl import Workbook, load_workbook +from datetime import date from backend.condition.parsing.parser import Parser -from backend.condition.parsing.records.lbwf_property_condition import LbwfPropertyCondition +from backend.condition.parsing.records.lbwf_asset_condition import LbwfAssetCondition +from backend.condition.utils.date_utils import normalise_date +from utils.logger import setup_logger + +logger = setup_logger class LbwfParser(Parser): def parse(self, file_stream: BinaryIO) -> Any: - raise NotImplementedError \ No newline at end of file + wb = load_workbook(file_stream) + urn_to_address_map: Dict[str, int] = LbwfParser._map_uprn_to_address(wb) + print(urn_to_address_map) + + assets_sheet: Workbook = wb["Houses Asset Data"] + rows: Iterator[Tuple[object | None, ...]] = assets_sheet.iter_rows(values_only=True) + headers = next(rows) + header_indexes: Dict[str, int] = LbwfParser._get_column_indexes_by_name(headers) + + assets: List[LbwfAssetCondition] = [] + + for row in rows: + try: + install_date: date = normalise_date(row[header_indexes["INSTALL DATE"]]) + except ValueError as e: + logger.error(f"Failed to process LBWF asset due to badly formatted date: {e}") + continue + + assets.append(LbwfAssetCondition( + uprn=0, #placeholder + prop_ref=row[header_indexes["PROP REF"]], + domna=row[header_indexes["Domna"]], + address=row[header_indexes["ADDRESS"]], + ownership=row[header_indexes["OWNERSHIP"]], + prop_status=row[header_indexes["PROP STATUS"]], + prop_type=row[header_indexes["PROP TYPE"]], + prop_sub_type=row[header_indexes["PROP SUB TYPE"]], + element_group=row[header_indexes["ELEMENT GROUP"]], + element_code=row[header_indexes["ELEMENT CODE"]], + element_code_description=row[header_indexes["ELEMENT CODE DESCRIPTION"]], + attribute_code=row[header_indexes["ATTRIBUTE CODE"]], + attribute_code_description=row[header_indexes["ATTRIBUTE CODE DESCRIPTION"]], + element_date_value=row[header_indexes["ELEMENT DATE VALUE"]], + element_numerical_value=row[header_indexes["ELEMENT NUMERIC VALUE"]], + element_text_value=row[header_indexes["ELEMENT TEXT VALUE"]], + quantity=row[header_indexes["QUANTITY"]], + install_date=install_date, + remaining_life=row[header_indexes["REMAINING LIFE"]], + element_comments=row[header_indexes["ELEMENT COMMENTS"]], + )) + + return assets + + + @staticmethod + def _map_uprn_to_address(wb: Workbook) -> Dict[str, int | None]: + print(wb.sheetnames) + sheet: Workbook = wb["All Energy Breakdown "] + + rows: Iterator[Tuple[object | None, ...]] = sheet.iter_rows(values_only=True) + + headers = next(rows) + header_indexes: Dict[str, int] = LbwfParser._get_column_indexes_by_name(headers) + + address_idx = header_indexes["Address"] + uprn_idx = header_indexes["UPRN"] + + mapping: Dict[str, int | None] = {} + + for row in rows: + address = row[address_idx] + uprn = row[uprn_idx] + + if not isinstance(address, str): + continue + + if uprn is not None and not isinstance(uprn, int): + raise ValueError(f"Unexpected UPRN value: {uprn!r}") + + mapping[address] = uprn + + return mapping + + + def _get_column_indexes_by_name( + headers: Tuple[object | None, ...] + ) -> Dict[str, int]: + index: Dict[str, int] = {} + + for i, header in enumerate(headers): + if isinstance(header, str): + index[header] = i + + return index + diff --git a/backend/condition/parsing/records/lbwf_property_condition.py b/backend/condition/parsing/records/lbwf_asset_condition.py similarity index 95% rename from backend/condition/parsing/records/lbwf_property_condition.py rename to backend/condition/parsing/records/lbwf_asset_condition.py index 1ecd00d6..3955350b 100644 --- a/backend/condition/parsing/records/lbwf_property_condition.py +++ b/backend/condition/parsing/records/lbwf_asset_condition.py @@ -3,7 +3,7 @@ from datetime import date @dataclass -class LbwfPropertyCondition: +class LbwfAssetCondition: uprn: int prop_ref: int domna: int diff --git a/backend/condition/processor.py b/backend/condition/processor.py index f19c4257..3939ba08 100644 --- a/backend/condition/processor.py +++ b/backend/condition/processor.py @@ -1,4 +1,4 @@ -from typing import BinaryIO, List +from typing import Any, BinaryIO, List from backend.condition.parsing.parser import Parser from utils.logger import setup_logger @@ -13,3 +13,4 @@ def process_file(file_stream: BinaryIO, source_key: str) -> None: parser: Parser = select_parser(file_type) # Orchestration + records: List[Any] = parser.parse(file_stream) \ No newline at end of file diff --git a/backend/condition/tests/parsing/test_lbwf_parser.py b/backend/condition/tests/parsing/test_lbwf_parser.py index d9ed1e90..6a93979a 100644 --- a/backend/condition/tests/parsing/test_lbwf_parser.py +++ b/backend/condition/tests/parsing/test_lbwf_parser.py @@ -3,10 +3,9 @@ import pytest from io import BytesIO from openpyxl import Workbook from datetime import datetime -import debugpy from backend.condition.parsing.lbwf_parser import LbwfParser -from backend.condition.parsing.records.lbwf_property_condition import LbwfPropertyCondition +from backend.condition.parsing.records.lbwf_asset_condition import LbwfAssetCondition @pytest.fixture def lbwf_homes_xlsx_bytes() -> BytesIO: @@ -78,7 +77,7 @@ def lbwf_homes_xlsx_bytes() -> BytesIO: "Source of Data = Joe Bloggs", ]) - all_energy_breakdown = wb.create_sheet("All Energy Breakdown") + all_energy_breakdown = wb.create_sheet("All Energy Breakdown ") # Trailing space is intentional; matches source all_energy_breakdown.append([ "UPRN", "Organisation Reference", @@ -108,7 +107,6 @@ def lbwf_homes_xlsx_bytes() -> BytesIO: return stream def test_lbwf_parser_passes_houses(lbwf_homes_xlsx_bytes): - debugpy.wait_for_client() # arrange parser = LbwfParser() @@ -117,4 +115,4 @@ def test_lbwf_parser_passes_houses(lbwf_homes_xlsx_bytes): # assert assert len(result) == 2 - assert isinstance(result[0], LbwfPropertyCondition) # TODO: Improve these asserts + assert isinstance(result[0], LbwfAssetCondition) # TODO: Improve these asserts diff --git a/backend/condition/utils/date_utils.py b/backend/condition/utils/date_utils.py new file mode 100644 index 00000000..4535acd9 --- /dev/null +++ b/backend/condition/utils/date_utils.py @@ -0,0 +1,10 @@ +from datetime import datetime, date +from typing import Any + + +def normalise_date(value: Any, allow_none: bool = True) -> date | None: + if value is None and allow_none: + return None + if isinstance(value, datetime): + return value.date() + raise ValueError(f"Unexpected date value: {value!r}") \ No newline at end of file From 4553e9937bb8ee10dcb61fd86647f48c8695d5cb Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Mon, 19 Jan 2026 16:28:51 +0000 Subject: [PATCH 194/202] =?UTF-8?q?parse=20lbwf=20houses=20=F0=9F=9F=AA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/condition/parsing/lbwf_parser.py | 54 ++++++++++++------------ 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/backend/condition/parsing/lbwf_parser.py b/backend/condition/parsing/lbwf_parser.py index fd65e24a..2d6463c9 100644 --- a/backend/condition/parsing/lbwf_parser.py +++ b/backend/condition/parsing/lbwf_parser.py @@ -25,36 +25,36 @@ class LbwfParser(Parser): for row in rows: try: - install_date: date = normalise_date(row[header_indexes["INSTALL DATE"]]) - except ValueError as e: - logger.error(f"Failed to process LBWF asset due to badly formatted date: {e}") - continue + assets.append(LbwfParser._map_row_to_asset_record(row, header_indexes)) + except Exception as e: + logger.error(f"Error mapping LBWF row to asset record: {e}") - assets.append(LbwfAssetCondition( - uprn=0, #placeholder - prop_ref=row[header_indexes["PROP REF"]], - domna=row[header_indexes["Domna"]], - address=row[header_indexes["ADDRESS"]], - ownership=row[header_indexes["OWNERSHIP"]], - prop_status=row[header_indexes["PROP STATUS"]], - prop_type=row[header_indexes["PROP TYPE"]], - prop_sub_type=row[header_indexes["PROP SUB TYPE"]], - element_group=row[header_indexes["ELEMENT GROUP"]], - element_code=row[header_indexes["ELEMENT CODE"]], - element_code_description=row[header_indexes["ELEMENT CODE DESCRIPTION"]], - attribute_code=row[header_indexes["ATTRIBUTE CODE"]], - attribute_code_description=row[header_indexes["ATTRIBUTE CODE DESCRIPTION"]], - element_date_value=row[header_indexes["ELEMENT DATE VALUE"]], - element_numerical_value=row[header_indexes["ELEMENT NUMERIC VALUE"]], - element_text_value=row[header_indexes["ELEMENT TEXT VALUE"]], - quantity=row[header_indexes["QUANTITY"]], - install_date=install_date, - remaining_life=row[header_indexes["REMAINING LIFE"]], - element_comments=row[header_indexes["ELEMENT COMMENTS"]], - )) - return assets + @staticmethod + def _map_row_to_asset_record(row: Any | Tuple[object | None, ...], header_indexes: Dict[str, int]) -> LbwfAssetCondition: + return LbwfAssetCondition( + uprn=0, #placeholder + prop_ref=row[header_indexes["PROP REF"]], + domna=row[header_indexes["Domna"]], + address=row[header_indexes["ADDRESS"]], + ownership=row[header_indexes["OWNERSHIP"]], + prop_status=row[header_indexes["PROP STATUS"]], + prop_type=row[header_indexes["PROP TYPE"]], + prop_sub_type=row[header_indexes["PROP SUB TYPE"]], + element_group=row[header_indexes["ELEMENT GROUP"]], + element_code=row[header_indexes["ELEMENT CODE"]], + element_code_description=row[header_indexes["ELEMENT CODE DESCRIPTION"]], + attribute_code=row[header_indexes["ATTRIBUTE CODE"]], + attribute_code_description=row[header_indexes["ATTRIBUTE CODE DESCRIPTION"]], + element_date_value=row[header_indexes["ELEMENT DATE VALUE"]], + element_numerical_value=row[header_indexes["ELEMENT NUMERIC VALUE"]], + element_text_value=row[header_indexes["ELEMENT TEXT VALUE"]], + quantity=row[header_indexes["QUANTITY"]], + install_date=normalise_date(row[header_indexes["INSTALL DATE"]]), + remaining_life=row[header_indexes["REMAINING LIFE"]], + element_comments=row[header_indexes["ELEMENT COMMENTS"]], + ) @staticmethod def _map_uprn_to_address(wb: Workbook) -> Dict[str, int | None]: From 330580c7750641f1aac7b866791948b8db774537 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Mon, 19 Jan 2026 16:32:16 +0000 Subject: [PATCH 195/202] =?UTF-8?q?map=20correct=20uprn=20when=20parsing?= =?UTF-8?q?=20asset=20conditions=20=F0=9F=9F=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/condition/tests/parsing/test_lbwf_parser.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/backend/condition/tests/parsing/test_lbwf_parser.py b/backend/condition/tests/parsing/test_lbwf_parser.py index 6a93979a..e962f229 100644 --- a/backend/condition/tests/parsing/test_lbwf_parser.py +++ b/backend/condition/tests/parsing/test_lbwf_parser.py @@ -114,5 +114,8 @@ def test_lbwf_parser_passes_houses(lbwf_homes_xlsx_bytes): result: Any = parser.parse(lbwf_homes_xlsx_bytes) # assert + # TODO: Improve these asserts assert len(result) == 2 - assert isinstance(result[0], LbwfAssetCondition) # TODO: Improve these asserts + assert isinstance(result[0], LbwfAssetCondition) + assert result[0].uprn == 1 + assert result[1].uprn == 2 \ No newline at end of file From 12bbd1a4feb7301a8ea93507c47ddbf75ac1343f Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Mon, 19 Jan 2026 16:41:13 +0000 Subject: [PATCH 196/202] handle dates as strings --- backend/condition/utils/date_utils.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/backend/condition/utils/date_utils.py b/backend/condition/utils/date_utils.py index 4535acd9..713d151c 100644 --- a/backend/condition/utils/date_utils.py +++ b/backend/condition/utils/date_utils.py @@ -5,6 +5,14 @@ from typing import Any def normalise_date(value: Any, allow_none: bool = True) -> date | None: if value is None and allow_none: return None + if isinstance(value, datetime): return value.date() + + if isinstance(value, str): + try: + return datetime.strptime(value.strip(), "%d/%m/%Y").date() + except ValueError as exc: + raise ValueError(f"Invalid date string: {value!r}") from exc + raise ValueError(f"Unexpected date value: {value!r}") \ No newline at end of file From 13daa62bdf9bc25a7538186b8bd047e231a5c75a Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Mon, 19 Jan 2026 16:51:18 +0000 Subject: [PATCH 197/202] =?UTF-8?q?map=20correct=20uprn=20when=20parsing?= =?UTF-8?q?=20asset=20conditions=20=F0=9F=9F=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/condition/parsing/lbwf_parser.py | 29 ++++++++++++++----- .../tests/parsing/test_lbwf_parser.py | 12 ++++---- 2 files changed, 27 insertions(+), 14 deletions(-) diff --git a/backend/condition/parsing/lbwf_parser.py b/backend/condition/parsing/lbwf_parser.py index 2d6463c9..293311b8 100644 --- a/backend/condition/parsing/lbwf_parser.py +++ b/backend/condition/parsing/lbwf_parser.py @@ -13,8 +13,7 @@ class LbwfParser(Parser): def parse(self, file_stream: BinaryIO) -> Any: wb = load_workbook(file_stream) - urn_to_address_map: Dict[str, int] = LbwfParser._map_uprn_to_address(wb) - print(urn_to_address_map) + address_to_uprn_map: Dict[str, int] = LbwfParser._generate_address_to_uprn_dict(wb) assets_sheet: Workbook = wb["Houses Asset Data"] rows: Iterator[Tuple[object | None, ...]] = assets_sheet.iter_rows(values_only=True) @@ -25,19 +24,26 @@ class LbwfParser(Parser): for row in rows: try: - assets.append(LbwfParser._map_row_to_asset_record(row, header_indexes)) + assets.append(LbwfParser._map_row_to_asset_record(row, header_indexes, address_to_uprn_map)) except Exception as e: logger.error(f"Error mapping LBWF row to asset record: {e}") + print(assets) return assets @staticmethod - def _map_row_to_asset_record(row: Any | Tuple[object | None, ...], header_indexes: Dict[str, int]) -> LbwfAssetCondition: + def _map_row_to_asset_record( + row: Any | Tuple[object | None, ...], + header_indexes: Dict[str, int], + address_to_uprn_map: Dict[str, int] + ) -> LbwfAssetCondition: + address: str = row[header_indexes["ADDRESS"]] + return LbwfAssetCondition( - uprn=0, #placeholder + uprn=LbwfParser._get_uprn_from_address(address, address_to_uprn_map), prop_ref=row[header_indexes["PROP REF"]], domna=row[header_indexes["Domna"]], - address=row[header_indexes["ADDRESS"]], + address=address, ownership=row[header_indexes["OWNERSHIP"]], prop_status=row[header_indexes["PROP STATUS"]], prop_type=row[header_indexes["PROP TYPE"]], @@ -57,8 +63,7 @@ class LbwfParser(Parser): ) @staticmethod - def _map_uprn_to_address(wb: Workbook) -> Dict[str, int | None]: - print(wb.sheetnames) + def _generate_address_to_uprn_dict(wb: Workbook) -> Dict[str, int | None]: sheet: Workbook = wb["All Energy Breakdown "] rows: Iterator[Tuple[object | None, ...]] = sheet.iter_rows(values_only=True) @@ -96,4 +101,12 @@ class LbwfParser(Parser): index[header] = i return index + + def _get_uprn_from_address(address: str, address_to_uprn_map: Dict[str, int]) -> int | None: + pseudo_name = address.split(",")[0] + + if pseudo_name.lower() in (k.lower() for k in address_to_uprn_map.keys()): + return address_to_uprn_map[pseudo_name.upper()] + + return None diff --git a/backend/condition/tests/parsing/test_lbwf_parser.py b/backend/condition/tests/parsing/test_lbwf_parser.py index e962f229..412c4c17 100644 --- a/backend/condition/tests/parsing/test_lbwf_parser.py +++ b/backend/condition/tests/parsing/test_lbwf_parser.py @@ -87,16 +87,16 @@ def lbwf_homes_xlsx_bytes() -> BytesIO: ]) all_energy_breakdown.append([ 1, - 100, - 101, - "100 RANDOM ROAD", + 200, + None, + "123 FAKE STREET", "A10 1AB" ]) all_energy_breakdown.append([ 2, - 200, - None, - "123 FAKE STREET", + 100, + 101, + "100 RANDOM ROAD", "A10 1AB" ]) From dce8442fff6d42ff6118a9ac80ec5cc1ba02d716 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Tue, 20 Jan 2026 09:34:23 +0000 Subject: [PATCH 198/202] =?UTF-8?q?Parse=20Houses=20sheet=20as=20well=20?= =?UTF-8?q?=F0=9F=9F=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/condition/parsing/lbwf_parser.py | 2 +- .../records/{ => lbwf}/lbwf_asset_condition.py | 0 .../condition/parsing/records/lbwf/lbwf_house.py | 15 +++++++++++++++ .../condition/tests/parsing/test_lbwf_parser.py | 9 +++++++-- 4 files changed, 23 insertions(+), 3 deletions(-) rename backend/condition/parsing/records/{ => lbwf}/lbwf_asset_condition.py (100%) create mode 100644 backend/condition/parsing/records/lbwf/lbwf_house.py diff --git a/backend/condition/parsing/lbwf_parser.py b/backend/condition/parsing/lbwf_parser.py index 293311b8..2cd34ae4 100644 --- a/backend/condition/parsing/lbwf_parser.py +++ b/backend/condition/parsing/lbwf_parser.py @@ -3,7 +3,7 @@ from openpyxl import Workbook, load_workbook from datetime import date from backend.condition.parsing.parser import Parser -from backend.condition.parsing.records.lbwf_asset_condition import LbwfAssetCondition +from backend.condition.parsing.records.lbwf.lbwf_asset_condition import LbwfAssetCondition from backend.condition.utils.date_utils import normalise_date from utils.logger import setup_logger diff --git a/backend/condition/parsing/records/lbwf_asset_condition.py b/backend/condition/parsing/records/lbwf/lbwf_asset_condition.py similarity index 100% rename from backend/condition/parsing/records/lbwf_asset_condition.py rename to backend/condition/parsing/records/lbwf/lbwf_asset_condition.py diff --git a/backend/condition/parsing/records/lbwf/lbwf_house.py b/backend/condition/parsing/records/lbwf/lbwf_house.py new file mode 100644 index 00000000..6db16862 --- /dev/null +++ b/backend/condition/parsing/records/lbwf/lbwf_house.py @@ -0,0 +1,15 @@ +from dataclasses import dataclass +from typing import List + +from backend.condition.parsing.records.lbwf.lbwf_asset_condition import LbwfAssetCondition + +@dataclass +class LbwfHouse: + uprn: int + reference: int + address: str + epc: str # TODO: make enum + shdf: bool + house: str + fail_decency: int + assets: List[LbwfAssetCondition] \ No newline at end of file diff --git a/backend/condition/tests/parsing/test_lbwf_parser.py b/backend/condition/tests/parsing/test_lbwf_parser.py index 412c4c17..dfa1403e 100644 --- a/backend/condition/tests/parsing/test_lbwf_parser.py +++ b/backend/condition/tests/parsing/test_lbwf_parser.py @@ -5,7 +5,7 @@ from openpyxl import Workbook from datetime import datetime from backend.condition.parsing.lbwf_parser import LbwfParser -from backend.condition.parsing.records.lbwf_asset_condition import LbwfAssetCondition +from backend.condition.parsing.records.lbwf.lbwf_house import LbwfHouse @pytest.fixture def lbwf_homes_xlsx_bytes() -> BytesIO: @@ -77,6 +77,11 @@ def lbwf_homes_xlsx_bytes() -> BytesIO: "Source of Data = Joe Bloggs", ]) + houses = wb.create_sheet("Houses") + houses.append(["Reference", "Address", "EPC", "SHDF", "HOSUE", "Fail Decency"]) + houses.append([12345, "123 Fake Street, London, A10 1AB", "E", "NO", "HOUSE", 2025]) + houses.append([54321, "100 Random Road, London, A10 1AB", "F", "NO", "HOUSE", 2025]) + all_energy_breakdown = wb.create_sheet("All Energy Breakdown ") # Trailing space is intentional; matches source all_energy_breakdown.append([ "UPRN", @@ -116,6 +121,6 @@ def test_lbwf_parser_passes_houses(lbwf_homes_xlsx_bytes): # assert # TODO: Improve these asserts assert len(result) == 2 - assert isinstance(result[0], LbwfAssetCondition) + assert isinstance(result[0], LbwfHouse) assert result[0].uprn == 1 assert result[1].uprn == 2 \ No newline at end of file From e11f59a7324e78fe16b72eca76ceedde1ac29e4c Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Tue, 20 Jan 2026 09:53:01 +0000 Subject: [PATCH 199/202] =?UTF-8?q?Parse=20Houses=20sheet=20as=20well=20?= =?UTF-8?q?=F0=9F=9F=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/condition/parsing/lbwf_parser.py | 65 +++++++++++++++---- .../records/lbwf/lbwf_asset_condition.py | 1 - .../tests/parsing/test_lbwf_parser.py | 10 ++- 3 files changed, 61 insertions(+), 15 deletions(-) diff --git a/backend/condition/parsing/lbwf_parser.py b/backend/condition/parsing/lbwf_parser.py index 2cd34ae4..3c2fbf93 100644 --- a/backend/condition/parsing/lbwf_parser.py +++ b/backend/condition/parsing/lbwf_parser.py @@ -1,9 +1,11 @@ from typing import BinaryIO, Any, Dict, Iterator, List, Tuple from openpyxl import Workbook, load_workbook from datetime import date +from collections import defaultdict from backend.condition.parsing.parser import Parser from backend.condition.parsing.records.lbwf.lbwf_asset_condition import LbwfAssetCondition +from backend.condition.parsing.records.lbwf.lbwf_house import LbwfHouse from backend.condition.utils.date_utils import normalise_date from utils.logger import setup_logger @@ -15,35 +17,71 @@ class LbwfParser(Parser): wb = load_workbook(file_stream) address_to_uprn_map: Dict[str, int] = LbwfParser._generate_address_to_uprn_dict(wb) + # Parse assets assets_sheet: Workbook = wb["Houses Asset Data"] - rows: Iterator[Tuple[object | None, ...]] = assets_sheet.iter_rows(values_only=True) - headers = next(rows) - header_indexes: Dict[str, int] = LbwfParser._get_column_indexes_by_name(headers) + asset_rows: Iterator[Tuple[object | None, ...]] = assets_sheet.iter_rows(values_only=True) + asset_headers = next(asset_rows) + asset_header_indexes: Dict[str, int] = LbwfParser._get_column_indexes_by_name(asset_headers) assets: List[LbwfAssetCondition] = [] - - for row in rows: + for row in asset_rows: try: - assets.append(LbwfParser._map_row_to_asset_record(row, header_indexes, address_to_uprn_map)) + assets.append(LbwfParser._map_row_to_asset_record(row, asset_header_indexes)) except Exception as e: logger.error(f"Error mapping LBWF row to asset record: {e}") - print(assets) - return assets + # Parse houses + houses_sheet: Workbook = wb["Houses"] + house_rows: Iterator[Tuple[object | None, ...]] = houses_sheet.iter_rows(values_only=True) + house_headers = next(house_rows) + house_header_indexes: Dict[str, int] = LbwfParser._get_column_indexes_by_name(house_headers) + houses: List[LbwfHouse] = [] + for row in house_rows: + try: + houses.append(LbwfParser._map_row_to_house_record(row, house_header_indexes, address_to_uprn_map)) + except Exception as e: + logger.error(f"Error mapping LBWF row to house record: {e}") + + # Merge assets and houses by Reference + assets_by_ref: Dict[int, List[LbwfAssetCondition]] = defaultdict(list) + for asset in assets: + assets_by_ref[asset.prop_ref].append(asset) + + for house in houses: + house.assets = assets_by_ref.get(house.reference, []) + + return houses + + + @staticmethod + def _map_row_to_house_record( + row: Any | Tuple[object | None, ...], + header_indexes: Dict[str, int], + address_to_uprn_map: Dict[str, int], + ) -> LbwfHouse: + address: str = row[header_indexes["Address"]] + + return LbwfHouse( + uprn=LbwfParser._get_uprn_from_address(address, address_to_uprn_map), + reference=row[header_indexes["Reference"]], + address=address, + epc=row[header_indexes["EPC"]], + shdf=row[header_indexes["SHDF"]], + house=row[header_indexes["HOSUE"]], + fail_decency=row[header_indexes["Fail Decency"]], + assets=[], + ) + @staticmethod def _map_row_to_asset_record( row: Any | Tuple[object | None, ...], header_indexes: Dict[str, int], - address_to_uprn_map: Dict[str, int] ) -> LbwfAssetCondition: - address: str = row[header_indexes["ADDRESS"]] - return LbwfAssetCondition( - uprn=LbwfParser._get_uprn_from_address(address, address_to_uprn_map), prop_ref=row[header_indexes["PROP REF"]], domna=row[header_indexes["Domna"]], - address=address, + address=row[header_indexes["ADDRESS"]], ownership=row[header_indexes["OWNERSHIP"]], prop_status=row[header_indexes["PROP STATUS"]], prop_type=row[header_indexes["PROP TYPE"]], @@ -62,6 +100,7 @@ class LbwfParser(Parser): element_comments=row[header_indexes["ELEMENT COMMENTS"]], ) + @staticmethod def _generate_address_to_uprn_dict(wb: Workbook) -> Dict[str, int | None]: sheet: Workbook = wb["All Energy Breakdown "] diff --git a/backend/condition/parsing/records/lbwf/lbwf_asset_condition.py b/backend/condition/parsing/records/lbwf/lbwf_asset_condition.py index 3955350b..dffd1e53 100644 --- a/backend/condition/parsing/records/lbwf/lbwf_asset_condition.py +++ b/backend/condition/parsing/records/lbwf/lbwf_asset_condition.py @@ -4,7 +4,6 @@ from datetime import date @dataclass class LbwfAssetCondition: - uprn: int prop_ref: int domna: int address: str diff --git a/backend/condition/tests/parsing/test_lbwf_parser.py b/backend/condition/tests/parsing/test_lbwf_parser.py index dfa1403e..78dbddad 100644 --- a/backend/condition/tests/parsing/test_lbwf_parser.py +++ b/backend/condition/tests/parsing/test_lbwf_parser.py @@ -5,6 +5,7 @@ from openpyxl import Workbook from datetime import datetime from backend.condition.parsing.lbwf_parser import LbwfParser +from backend.condition.parsing.records.lbwf.lbwf_asset_condition import LbwfAssetCondition from backend.condition.parsing.records.lbwf.lbwf_house import LbwfHouse @pytest.fixture @@ -121,6 +122,13 @@ def test_lbwf_parser_passes_houses(lbwf_homes_xlsx_bytes): # assert # TODO: Improve these asserts assert len(result) == 2 + assert isinstance(result[0], LbwfHouse) assert result[0].uprn == 1 - assert result[1].uprn == 2 \ No newline at end of file + assert len(result[0].assets) == 1 + assert isinstance(result[0].assets[0], LbwfAssetCondition) + + assert isinstance(result[1], LbwfHouse) + assert result[1].uprn == 2 + assert len(result[1].assets) == 1 + assert isinstance(result[1].assets[0], LbwfAssetCondition) \ No newline at end of file From 01bf0dbd9c7aeb5f0b3ca3c4053d9bde0d9f6e4e Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Tue, 20 Jan 2026 09:58:20 +0000 Subject: [PATCH 200/202] =?UTF-8?q?Parse=20Houses=20sheet=20as=20well=20?= =?UTF-8?q?=F0=9F=9F=AA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/condition/parsing/lbwf_parser.py | 58 ++++++++++++++++++------ 1 file changed, 43 insertions(+), 15 deletions(-) diff --git a/backend/condition/parsing/lbwf_parser.py b/backend/condition/parsing/lbwf_parser.py index 3c2fbf93..44ae05f7 100644 --- a/backend/condition/parsing/lbwf_parser.py +++ b/backend/condition/parsing/lbwf_parser.py @@ -14,36 +14,66 @@ logger = setup_logger class LbwfParser(Parser): def parse(self, file_stream: BinaryIO) -> Any: - wb = load_workbook(file_stream) - address_to_uprn_map: Dict[str, int] = LbwfParser._generate_address_to_uprn_dict(wb) + wb: Workbook = load_workbook(file_stream) + address_to_uprn_map: Dict[str, int] = self._generate_address_to_uprn_dict(wb) + + assets = self._parse_assets(wb) + houses = self._parse_houses(wb, address_to_uprn_map) + + self._merge_assets_into_houses(assets, houses) + + return houses + + @staticmethod + def _parse_assets(wb: Workbook) -> List[LbwfAssetCondition]: + assets_sheet = wb["Houses Asset Data"] + asset_rows = assets_sheet.iter_rows(values_only=True) - # Parse assets - assets_sheet: Workbook = wb["Houses Asset Data"] - asset_rows: Iterator[Tuple[object | None, ...]] = assets_sheet.iter_rows(values_only=True) asset_headers = next(asset_rows) - asset_header_indexes: Dict[str, int] = LbwfParser._get_column_indexes_by_name(asset_headers) + asset_header_indexes = LbwfParser._get_column_indexes_by_name(asset_headers) assets: List[LbwfAssetCondition] = [] for row in asset_rows: try: - assets.append(LbwfParser._map_row_to_asset_record(row, asset_header_indexes)) + assets.append( + LbwfParser._map_row_to_asset_record(row, asset_header_indexes) + ) except Exception as e: logger.error(f"Error mapping LBWF row to asset record: {e}") - # Parse houses - houses_sheet: Workbook = wb["Houses"] - house_rows: Iterator[Tuple[object | None, ...]] = houses_sheet.iter_rows(values_only=True) + return assets + + @staticmethod + def _parse_houses( + wb: Workbook, + address_to_uprn_map: Dict[str, int], + ) -> List[LbwfHouse]: + houses_sheet = wb["Houses"] + house_rows = houses_sheet.iter_rows(values_only=True) + house_headers = next(house_rows) - house_header_indexes: Dict[str, int] = LbwfParser._get_column_indexes_by_name(house_headers) + house_header_indexes = LbwfParser._get_column_indexes_by_name(house_headers) houses: List[LbwfHouse] = [] for row in house_rows: try: - houses.append(LbwfParser._map_row_to_house_record(row, house_header_indexes, address_to_uprn_map)) + houses.append( + LbwfParser._map_row_to_house_record( + row, + house_header_indexes, + address_to_uprn_map, + ) + ) except Exception as e: logger.error(f"Error mapping LBWF row to house record: {e}") - # Merge assets and houses by Reference + return houses + + @staticmethod + def _merge_assets_into_houses( + assets: List[LbwfAssetCondition], + houses: List[LbwfHouse], + ) -> None: assets_by_ref: Dict[int, List[LbwfAssetCondition]] = defaultdict(list) for asset in assets: assets_by_ref[asset.prop_ref].append(asset) @@ -51,8 +81,6 @@ class LbwfParser(Parser): for house in houses: house.assets = assets_by_ref.get(house.reference, []) - return houses - @staticmethod def _map_row_to_house_record( From 7846f9c949a64f133b7f6afa971ba65f37fba2f9 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Tue, 20 Jan 2026 10:34:09 +0000 Subject: [PATCH 201/202] Adjust EPC column name to correctly include trailing space --- backend/condition/parsing/lbwf_parser.py | 5 +++-- backend/condition/processor.py | 4 +++- backend/condition/tests/parsing/test_lbwf_parser.py | 2 +- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/backend/condition/parsing/lbwf_parser.py b/backend/condition/parsing/lbwf_parser.py index 44ae05f7..8d52f6d5 100644 --- a/backend/condition/parsing/lbwf_parser.py +++ b/backend/condition/parsing/lbwf_parser.py @@ -1,6 +1,5 @@ from typing import BinaryIO, Any, Dict, Iterator, List, Tuple from openpyxl import Workbook, load_workbook -from datetime import date from collections import defaultdict from backend.condition.parsing.parser import Parser @@ -40,6 +39,7 @@ class LbwfParser(Parser): ) except Exception as e: logger.error(f"Error mapping LBWF row to asset record: {e}") + continue return assets @@ -66,6 +66,7 @@ class LbwfParser(Parser): ) except Exception as e: logger.error(f"Error mapping LBWF row to house record: {e}") + continue return houses @@ -94,7 +95,7 @@ class LbwfParser(Parser): uprn=LbwfParser._get_uprn_from_address(address, address_to_uprn_map), reference=row[header_indexes["Reference"]], address=address, - epc=row[header_indexes["EPC"]], + epc=row[header_indexes["EPC "]], shdf=row[header_indexes["SHDF"]], house=row[header_indexes["HOSUE"]], fail_decency=row[header_indexes["Fail Decency"]], diff --git a/backend/condition/processor.py b/backend/condition/processor.py index 3939ba08..fb06c888 100644 --- a/backend/condition/processor.py +++ b/backend/condition/processor.py @@ -13,4 +13,6 @@ def process_file(file_stream: BinaryIO, source_key: str) -> None: parser: Parser = select_parser(file_type) # Orchestration - records: List[Any] = parser.parse(file_stream) \ No newline at end of file + records: List[Any] = parser.parse(file_stream) + + print(records) # temp \ No newline at end of file diff --git a/backend/condition/tests/parsing/test_lbwf_parser.py b/backend/condition/tests/parsing/test_lbwf_parser.py index 78dbddad..7556b845 100644 --- a/backend/condition/tests/parsing/test_lbwf_parser.py +++ b/backend/condition/tests/parsing/test_lbwf_parser.py @@ -79,7 +79,7 @@ def lbwf_homes_xlsx_bytes() -> BytesIO: ]) houses = wb.create_sheet("Houses") - houses.append(["Reference", "Address", "EPC", "SHDF", "HOSUE", "Fail Decency"]) + houses.append(["Reference", "Address", "EPC ", "SHDF", "HOSUE", "Fail Decency"]) houses.append([12345, "123 Fake Street, London, A10 1AB", "E", "NO", "HOUSE", 2025]) houses.append([54321, "100 Random Road, London, A10 1AB", "F", "NO", "HOUSE", 2025]) From 07cab931e509ec5b3f6d0f3a2b6f44d45f336472 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Tue, 20 Jan 2026 11:02:59 +0000 Subject: [PATCH 202/202] correction to gitignore --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 625277a5..6268360b 100644 --- a/.gitignore +++ b/.gitignore @@ -243,7 +243,7 @@ local_data/* /local_data/* etl/epc/local_data/* /backend/condition/sample_data/lbwf/* -/backend/condition/sample_data/peadody/* +/backend/condition/sample_data/peabody/* *.DS_Store infrastructure/terraform/.terraform*