From 615f2289e758c136e73dfaac88d0ff906785f03a Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 22 Feb 2024 12:39:46 +0000 Subject: [PATCH 001/248] Debugging list loading --- .idea/Model.iml | 2 +- .idea/misc.xml | 2 +- .../ha_15_32/ha_analysis_batch_3.py | 81 +++++++------------ 3 files changed, 29 insertions(+), 56 deletions(-) diff --git a/.idea/Model.iml b/.idea/Model.iml index 4413bb06..b0f9c00d 100644 --- a/.idea/Model.iml +++ b/.idea/Model.iml @@ -7,7 +7,7 @@ - + diff --git a/.idea/misc.xml b/.idea/misc.xml index 6f308057..1122b380 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -3,7 +3,7 @@ - + diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 92956337..7bb8b40c 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -131,9 +131,17 @@ class DataLoader: return ciga_list + @staticmethod + def get_sheetname(workbook): + if "Asset List" in workbook.sheetnames: + return "Asset List" + else: + return "Assets" + def load_asset_list(self, filepath, ha_name): workbook = openpyxl.load_workbook(filepath) - asset_sheet = workbook["Assets"] + sheetname = self.get_sheetname(workbook) + asset_sheet = workbook[sheetname] asset_sheet_colnames = [cell.value for cell in asset_sheet[1]] rows_data = [] @@ -170,8 +178,10 @@ class DataLoader: # Remove columns that are None survey_list = survey_list.loc[:, survey_list.columns.notnull()] survey_list["survey_list_row_id"] = [ha_name + "_survey_" + str(i) for i in range(0, len(survey_list))] + # Perform survey list merge - survey_list = self.merge_surveys_to_assets(asset_list, survey_list, ha_name) + if not survey_list.empty: + survey_list = self.merge_surveys_to_assets(asset_list, survey_list, ha_name) # We check if there are CIGA checks ciga_list = pd.DataFrame() @@ -185,9 +195,10 @@ class DataLoader: ciga_list = pd.DataFrame(ciga_rows, columns=[cell.value for cell in ciga_sheet[1]]) # Remove columns that are None ciga_list = ciga_list.loc[:, ciga_list.columns.notnull()] - ciga_list = self.create_ciga_list_house_no(ha_name, ciga_list) # Perform ciga list merge - ciga_list = self.merge_ciga_to_assets(asset_list, ciga_list, ha_name) + if not ciga_list.empty: + ciga_list = self.create_ciga_list_house_no(ha_name, ciga_list) + ciga_list = self.merge_ciga_to_assets(asset_list, ciga_list, ha_name) return asset_list, survey_list, ciga_list @@ -208,6 +219,10 @@ class DataLoader: return asset_list + @staticmethod + def correct_ha39_asset_list(asset_list): + return asset_list + @staticmethod def correct_ha6_survey_list(survey_list): @@ -337,6 +352,10 @@ class DataLoader: return survey_list + @staticmethod + def correct_ha39_survey_list(survey_list): + return survey_list + def merge_surveys_to_assets(self, asset_list, survey_list, ha_name): # Correct the asset list @@ -491,23 +510,10 @@ class DataLoader: ha_name=ha_name, ) - if file_config.get("survey_list"): - # TODO: Delete this - logger.info("Loading survey list for {}".format(ha_name)) - survey_list, matched_lookup = self.load_survey_list( - asset_list=asset_list, - file_path=file_config["survey_list"]["filepath"], - ha_name=ha_name, - sheet_name=file_config["survey_list"]["sheetname"] - ) - else: - survey_list = None - matched_lookup = None - data[ha_name] = { "asset_list": asset_list, "survey_list": survey_list, - "matched_lookup": matched_lookup + "ciga_list": ciga_list } self.data = data @@ -1288,42 +1294,9 @@ def app(): # List all of the data in the folder directories = [str(list(entry.iterdir())[0]) for entry in DATA_FOLDER.iterdir() if entry.is_dir()] - files = { - "ha_1": { - "asset_list": { - "filepath": "local_data/ha_data/HA1/ACCENT GROUP.xlsx", - "sheetname": "Energy data" - } - }, - "ha_6": { - "asset_list": { - "filepath": "etl/eligibility/ha_15_32/HA 6 - ASSET LIST.xlsx", - "sheetname": "HA 6" - }, - "survey_list": { - "filepath": "etl/eligibility/ha_15_32/HA 6 - SURVEY LIST.xlsx", - "sheetname": "HA 6" - } - }, - "ha_14": { - "asset_list": { - "filepath": "etl/eligibility/ha_15_32/HA 14 - ASSET LIST.xlsx", - "sheetname": "HA 14" - } - }, - "ha_39": { - "asset_list": { - "filepath": "etl/eligibility/ha_15_32/HA 39 - ASSET LIST.xlsx", - "sheetname": "Sheet1" - } - }, - "ha_107": { - "asset_list": { - "filepath": "etl/eligibility/ha_15_32/HA 107 - ASSET LIST.xlsx", - "sheetname": "HA 107" - } - } - } + priority_has = ["HA1", "HA6", "HA14", "HA39", "HA107"] + # Filter down the directories to only the priority HAs + directories = [d for d in directories if d.split("/")[2] in priority_has] loader = DataLoader(directories, use_cache) loader.load() From a1b2f9bf5bdd2d059c6327612fe2cb83c5be1687 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 22 Feb 2024 12:42:04 +0000 Subject: [PATCH 002/248] Added ciga list id --- .../ha_15_32/ha_analysis_batch_3.py | 24 ++++++++++++------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 7bb8b40c..fffc9daf 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -195,6 +195,7 @@ class DataLoader: ciga_list = pd.DataFrame(ciga_rows, columns=[cell.value for cell in ciga_sheet[1]]) # Remove columns that are None ciga_list = ciga_list.loc[:, ciga_list.columns.notnull()] + survey_list["survey_list_row_id"] = [ha_name + "_ciga_" + str(i) for i in range(0, len(survey_list))] # Perform ciga list merge if not ciga_list.empty: ciga_list = self.create_ciga_list_house_no(ha_name, ciga_list) @@ -440,14 +441,14 @@ class DataLoader: df = df[df["matching_postcode"].str.lower().str.contains(row["Post Code"].lower())] if df.shape[0] != 1: postcode_lower = row["Post Code"].lower() - if postcode_lower in missed_postcodes: - matching_lookup.append( - { - "survey_list_row_id": row["survey_list_row_id"], - "asset_list_row_id": None, - } - ) - continue + # if postcode_lower in missed_postcodes: + # matching_lookup.append( + # { + # "survey_list_row_id": row["survey_list_row_id"], + # "asset_list_row_id": None, + # } + # ) + # continue print(row["Street / Block Name"]) print(house_number) @@ -456,13 +457,18 @@ class DataLoader: matching_lookup.append( { - "survey_list_row_id": row["survey_list_row_id"], + "ciga_list_row_id": row["ciga_list_row_id"], "asset_list_row_id": df["asset_list_row_id"].values[0], } ) matching_lookup = pd.DataFrame(matching_lookup) + # Merge onto the ciga list + ciga_list = ciga_list.merge(matching_lookup, how='left', on="ciga_list_row_id") + + return ciga_list + @staticmethod def identify_built_form_ha6(property_string): """ From d3bff08df8a4ce0d786acc10f9ab605abc938131 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 22 Feb 2024 12:53:01 +0000 Subject: [PATCH 003/248] debugging survey matching for ha14 --- etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index fffc9daf..d27bf8e8 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -221,7 +221,7 @@ class DataLoader: return asset_list @staticmethod - def correct_ha39_asset_list(asset_list): + def correct_ha14_asset_list(asset_list): return asset_list @staticmethod @@ -354,7 +354,15 @@ class DataLoader: return survey_list @staticmethod - def correct_ha39_survey_list(survey_list): + def correct_ha14_survey_list(survey_list): + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "Godfrey Road", "Godfrey Drive" + ) + + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "Oiliver Road", "Oliver Road" + ) + return survey_list def merge_surveys_to_assets(self, asset_list, survey_list, ha_name): @@ -389,7 +397,7 @@ class DataLoader: if df.shape[0] != 1: df = df[df["HouseNo"] == str(house_number)] if df.shape[0] != 1: - df = df[df["matching_postcode"].str.lower().str.contains(row["Post Code"].lower())] + df = df[df["matching_postcode"].str.lower().str.contains(row["Post Code"].lower().strip())] if df.shape[0] != 1: postcode_lower = row["Post Code"].lower() if postcode_lower in missed_postcodes: From c6daf520467b0c994a67f7746b51450f36b6bea7 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 22 Feb 2024 16:00:23 +0000 Subject: [PATCH 004/248] Trying to handle streetname extraction and edge case in ciga matching --- .../ha_15_32/ha_analysis_batch_3.py | 192 +++++++++++++----- 1 file changed, 143 insertions(+), 49 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index d27bf8e8..cb4b9885 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -1,4 +1,5 @@ import os +import re import openpyxl from pathlib import Path import msgpack @@ -36,6 +37,10 @@ class DataLoader: } } + UNMATCHED_CIGA = { + "HA14": 6 + } + def __init__(self, directories, use_cache): self.directories = directories self.use_cache = use_cache @@ -101,6 +106,9 @@ class DataLoader: else: split_addresses = asset_list['matching_address'].str.split(',', expand=True) house_numbers = split_addresses[0].str.split(' ', expand=True) + # If we have "flat" or valley" as the house number, then the house number is actually in the second column + house_numbers[0] = np.where(house_numbers[0].isin(["flat", "valley"]), house_numbers[1], house_numbers[0]) + # THe first column should be HouseNo - we aren't interested in the other columns, but we don't know how # many columns there might be house_numbers = house_numbers.iloc[:, 0:1] @@ -117,7 +125,7 @@ class DataLoader: :return: """ - if ha_name in ["HA6"]: + if ha_name in ["HA6", "HA14"]: split_addresses = ciga_list['Matched Address'].str.split(',', expand=True) house_numbers = split_addresses[0].str.split(' ', expand=True) # THe first column should be HouseNo - we aren't interested in the other columns, but we don't know how @@ -132,16 +140,23 @@ class DataLoader: return ciga_list @staticmethod - def get_sheetname(workbook): + def get_asset_sheetname(workbook): if "Asset List" in workbook.sheetnames: return "Asset List" else: return "Assets" + @staticmethod + def get_ciga_sheetname(workbook): + if "CIGA Checks" in workbook.sheetnames: + return "CIGA Checks" + else: + return "CIGA" + def load_asset_list(self, filepath, ha_name): workbook = openpyxl.load_workbook(filepath) - sheetname = self.get_sheetname(workbook) - asset_sheet = workbook[sheetname] + asset_sheetname = self.get_asset_sheetname(workbook) + asset_sheet = workbook[asset_sheetname] asset_sheet_colnames = [cell.value for cell in asset_sheet[1]] rows_data = [] @@ -165,41 +180,46 @@ class DataLoader: asset_list = self.append_asset_list_built_form(ha_name=ha_name, asset_list=asset_list) + # We correct the asset list if it needs it + # Correct the asset list + correction_function_name = f"correct_{ha_name.lower()}_asset_list" + if hasattr(self, correction_function_name): + asset_list_correction_function = getattr(self, f"correct_{ha_name.lower()}_asset_list") + asset_list = asset_list_correction_function(asset_list) + # We check if there is a survey list - survey_list = pd.DataFrame() - if "ECO Surveys" in workbook.sheetnames: - survey_sheet = workbook["ECO Surveys"] - survey_rows = [] - for row in survey_sheet.iter_rows(min_row=2, values_only=False): # Assuming the first row is headers - row_data = [cell.value for cell in row] # This will get you the cell values - survey_rows.append(row_data) + survey_sheetname = "ECO Surveys" + survey_sheet = workbook[survey_sheetname] + survey_rows = [] + for row in survey_sheet.iter_rows(min_row=2, values_only=False): # Assuming the first row is headers + row_data = [cell.value for cell in row] # This will get you the cell values + survey_rows.append(row_data) - survey_list = pd.DataFrame(survey_rows, columns=[cell.value for cell in survey_sheet[1]]) - # Remove columns that are None - survey_list = survey_list.loc[:, survey_list.columns.notnull()] - survey_list["survey_list_row_id"] = [ha_name + "_survey_" + str(i) for i in range(0, len(survey_list))] + survey_list = pd.DataFrame(survey_rows, columns=[cell.value for cell in survey_sheet[1]]) + # Remove columns that are None + survey_list = survey_list.loc[:, survey_list.columns.notnull()] + survey_list["survey_list_row_id"] = [ha_name + "_survey_" + str(i) for i in range(0, len(survey_list))] - # Perform survey list merge - if not survey_list.empty: - survey_list = self.merge_surveys_to_assets(asset_list, survey_list, ha_name) + # Perform survey list merge + if not survey_list.empty: + survey_list = self.merge_surveys_to_assets(asset_list, survey_list, ha_name) # We check if there are CIGA checks - ciga_list = pd.DataFrame() - if "CIGA Checks" in workbook.sheetnames: - ciga_sheet = workbook["CIGA Checks"] - ciga_rows = [] - for row in ciga_sheet.iter_rows(min_row=2, values_only=False): - row_data = [cell.value for cell in row] # This will get you the cell values - ciga_rows.append(row_data) + ciga_sheetname = self.get_ciga_sheetname(workbook) + ciga_sheet = workbook[ciga_sheetname] + ciga_rows = [] + for row in ciga_sheet.iter_rows(min_row=2, values_only=False): + row_data = [cell.value for cell in row] # This will get you the cell values + ciga_rows.append(row_data) - ciga_list = pd.DataFrame(ciga_rows, columns=[cell.value for cell in ciga_sheet[1]]) - # Remove columns that are None - ciga_list = ciga_list.loc[:, ciga_list.columns.notnull()] - survey_list["survey_list_row_id"] = [ha_name + "_ciga_" + str(i) for i in range(0, len(survey_list))] - # Perform ciga list merge - if not ciga_list.empty: - ciga_list = self.create_ciga_list_house_no(ha_name, ciga_list) - ciga_list = self.merge_ciga_to_assets(asset_list, ciga_list, ha_name) + ciga_list = pd.DataFrame(ciga_rows, columns=[cell.value for cell in ciga_sheet[1]]) + # Remove columns that are None + ciga_list = ciga_list.loc[:, ciga_list.columns.notnull()] + ciga_list["ciga_list_row_id"] = [ha_name + "_ciga_" + str(i) for i in range(0, len(ciga_list))] + # Perform ciga list merge + if not ciga_list.empty: + ciga_list = self.create_ciga_list_house_no(ha_name, ciga_list) + ciga_list = self.merge_ciga_to_assets(asset_list, ciga_list, ha_name) return asset_list, survey_list, ciga_list @@ -222,6 +242,21 @@ class DataLoader: @staticmethod def correct_ha14_asset_list(asset_list): + + # For 5 Queens Court, DE72 3NP, the postcode is actually DE72 3QZ + asset_list.loc[ + (asset_list["Address 1"] == "5 Queens Court") & + (asset_list["Postcode"].str.strip() == "DE72 3NP"), + "matching_postcode" + ] = "DE72 3QZ" + + # We then correct the matching_address + asset_list.loc[ + (asset_list["Address 1"] == "5 Queens Court") & + (asset_list["Postcode"].str.strip() == "DE72 3NP"), + "matching_address" + ] = "5 queens court, garfield avenue, draycott, derby, de72 3qz" + return asset_list @staticmethod @@ -363,13 +398,22 @@ class DataLoader: "Oiliver Road", "Oliver Road" ) + # For postodes DE7 4FB, DE7 4EZ, it's actually spelled WINDERMERE AVENUE, not WINDEREMERE AVENUE (without the + # extra e) + survey_list.loc[ + (survey_list["Street / Block Name"] == "WINDEREMERE AVENUE") & + (survey_list["Post Code"].isin(["DE7 4FB", "DE7 4EZ"])), + "Street / Block Name" + ] = "WINDERMERE AVENUE" + + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "MACDONALD SQAURE", "MACDONALD SQUARE" + ) + return survey_list def merge_surveys_to_assets(self, asset_list, survey_list, ha_name): - # Correct the asset list - asset_list_correction_function = getattr(self, f"correct_{ha_name.lower()}_asset_list") - asset_list = asset_list_correction_function(asset_list) # Correct the survey list survey_list_correction_function = getattr(self, f"correct_{ha_name.lower()}_survey_list") survey_list = survey_list_correction_function(survey_list) @@ -411,7 +455,7 @@ class DataLoader: print(row["Street / Block Name"]) print(house_number) - print(row["Post Code"].lower()) + print(row["Post Code"]) raise ValueError("Investigate") matching_lookup.append( @@ -428,8 +472,38 @@ class DataLoader: return survey_list + @staticmethod + def extract_streetname(address, house_number=None, postcode=None): + """ + Cleans an address by removing the house number and postcode, and converts everything to lower case. + + :param address: The full address as a string. + :param house_number: The house number to remove, as a string or integer. + :param postcode: The postcode to remove, as a string. + :return: The cleaned address. + """ + # Convert everything to lower case + address = address.lower() + + if house_number is not None: + # Remove the house number + address = re.sub(r'\b{}\b'.format(house_number), '', address, flags=re.IGNORECASE).strip() + + if postcode is not None: + # Remove the postcode + address = re.sub(r'\b{}\b'.format(re.escape(postcode)), '', address, flags=re.IGNORECASE).strip() + + # Get first section before a comma + address = address.split(",")[0] + # Additional cleaning to remove extra spaces and commas left over + address = re.sub(r'\s+', ' ', address) # Replace multiple spaces with a single space + address = re.sub(r'\s*,\s*', ', ', address) # Clean up space around commas + + return address + def merge_ciga_to_assets(self, asset_list, ciga_list, ha_name): matching_lookup = [] + unmatched_addresses = [] for _, row in tqdm(ciga_list.iterrows(), total=len(ciga_list)): house_number = row["HouseNo"] @@ -442,22 +516,35 @@ class DataLoader: ].copy() df = df[df["HouseNo"] == str(house_number)] + # For ciga, we skip + if df.empty: + if row["Matched Postcode"] == "LE3 3EE": + dew + unmatched_addresses.append( + { + "ciga_list_row_id": row["ciga_list_row_id"], + "HouseNo": house_number, + "Matched Postcode": row["Matched Postcode"] + } + ) + continue # TODO: Might need to consider street name at some point if df.shape[0] != 1: - if df.shape[0] != 1: - df = df[df["matching_postcode"].str.lower().str.contains(row["Post Code"].lower())] - if df.shape[0] != 1: - postcode_lower = row["Post Code"].lower() - # if postcode_lower in missed_postcodes: - # matching_lookup.append( - # { - # "survey_list_row_id": row["survey_list_row_id"], - # "asset_list_row_id": None, - # } - # ) - # continue + # We split house number and postcode out of the matched address for ciga + street_name = self.extract_streetname( + address=row["Matched Address"], house_number=house_number, postcode=row["Matched Postcode"] + ) + df = df[df["matching_address"].str.contains(street_name)] + if df.shape[0] != 1: + # The final check we do here is to check for the presence of flat in the address + if "flat" in row["Matched Address"]: + df = df[df["matching_address"].str.contains("flat")] + else: + df = df[df["matching_address"].str.contains("flat") == False] + + if df.shape[0] != 1: print(row["Street / Block Name"]) print(house_number) print(row["Post Code"].lower()) @@ -470,6 +557,13 @@ class DataLoader: } ) + # We have an acceptable number of ciga failures for each HA + if len(unmatched_addresses) != self.UNMATCHED_CIGA[ha_name]: + raise ValueError(f"Unmatched addresses for {ha_name} is not as expected") + + # In ciga: 35 Valley Drive, Leicester, LE3 3EE + # + matching_lookup = pd.DataFrame(matching_lookup) # Merge onto the ciga list From 75102704cdfeacaac68194c9646e23f208e48baf Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 22 Feb 2024 16:05:31 +0000 Subject: [PATCH 005/248] ciga matching for ha14 --- etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index cb4b9885..1a28500b 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -38,7 +38,9 @@ class DataLoader: } UNMATCHED_CIGA = { - "HA14": 6 + # We expect 4 unmatched addresses, which have been validated manually as being in the ciga file but not + # the asset list + "HA14": 4 } def __init__(self, directories, use_cache): @@ -518,8 +520,6 @@ class DataLoader: df = df[df["HouseNo"] == str(house_number)] # For ciga, we skip if df.empty: - if row["Matched Postcode"] == "LE3 3EE": - dew unmatched_addresses.append( { "ciga_list_row_id": row["ciga_list_row_id"], @@ -528,18 +528,18 @@ class DataLoader: } ) continue - # TODO: Might need to consider street name at some point + if df.shape[0] != 1: # We split house number and postcode out of the matched address for ciga street_name = self.extract_streetname( address=row["Matched Address"], house_number=house_number, postcode=row["Matched Postcode"] ) - df = df[df["matching_address"].str.contains(street_name)] + df = df[df["matching_address"].str.replace(",", "").str.contains(street_name)] if df.shape[0] != 1: # The final check we do here is to check for the presence of flat in the address - if "flat" in row["Matched Address"]: + if "flat" in row["Matched Address"].lower(): df = df[df["matching_address"].str.contains("flat")] else: df = df[df["matching_address"].str.contains("flat") == False] From 32352bbde145c6a0c76f503c766e7fca80c2af99 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 22 Feb 2024 17:46:11 +0000 Subject: [PATCH 006/248] working on survey match for ha107 --- .../ha_15_32/ha_analysis_batch_3.py | 45 +++++++++++++------ 1 file changed, 32 insertions(+), 13 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 1a28500b..9e850c0e 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -40,7 +40,9 @@ class DataLoader: UNMATCHED_CIGA = { # We expect 4 unmatched addresses, which have been validated manually as being in the ciga file but not # the asset list - "HA14": 4 + "HA14": 4, + # There's just too many unmatched here - if we identify some homes that + "HA6": 117 } def __init__(self, directories, use_cache): @@ -78,11 +80,11 @@ class DataLoader: elif ha_name == "HA107": # Create matching_address by concatenating House No, Street, Town, District, Postcode asset_list["matching_address"] = asset_list["House No"].astype(str).str.lower().str.strip() + ", " + \ - asset_list["Street"].str.lower().str.strip() + ", " + \ - asset_list["Town"].str.lower().str.strip() + ", " + \ - asset_list["District"].str.lower().str.strip() + ", " + \ - asset_list["Postcode"].str.lower().str.strip() - asset_list["matching_postcode"] = asset_list["Postcode"].str.lower().str.strip() + asset_list["Street"].astype(str).str.lower().str.strip() + ", " + \ + asset_list["Town"].astype(str).str.lower().str.strip() + ", " + \ + asset_list["District"].astype(str).str.lower().str.strip() + ", " + \ + asset_list["Postcode"].astype(str).str.lower().str.strip() + asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip() else: raise NotImplementedError("implement me") @@ -155,6 +157,13 @@ class DataLoader: else: return "CIGA" + @staticmethod + def get_survey_sheetname(workbook): + if "ECO Surveys" in workbook.sheetnames: + return "ECO Surveys" + else: + return "ECO surveys" + def load_asset_list(self, filepath, ha_name): workbook = openpyxl.load_workbook(filepath) asset_sheetname = self.get_asset_sheetname(workbook) @@ -189,8 +198,13 @@ class DataLoader: asset_list_correction_function = getattr(self, f"correct_{ha_name.lower()}_asset_list") asset_list = asset_list_correction_function(asset_list) + # For HA1, there is an exception in the structure of the data. We don't have any survey or ciga lists, and so + # we can return the asset list now + if ha_name == "HA1": + return asset_list, pd.DataFrame(), pd.DataFrame() + # We check if there is a survey list - survey_sheetname = "ECO Surveys" + survey_sheetname = self.get_survey_sheetname(workbook) survey_sheet = workbook[survey_sheetname] survey_rows = [] for row in survey_sheet.iter_rows(min_row=2, values_only=False): # Assuming the first row is headers @@ -217,6 +231,9 @@ class DataLoader: ciga_list = pd.DataFrame(ciga_rows, columns=[cell.value for cell in ciga_sheet[1]]) # Remove columns that are None ciga_list = ciga_list.loc[:, ciga_list.columns.notnull()] + # Remove rows with missing postcode which happens in a small number of cases + ciga_list = ciga_list[~pd.isnull(ciga_list["Matched Postcode"])] + ciga_list["ciga_list_row_id"] = [ha_name + "_ciga_" + str(i) for i in range(0, len(ciga_list))] # Perform ciga list merge if not ciga_list.empty: @@ -414,6 +431,10 @@ class DataLoader: return survey_list + @staticmethod + def correct_ha107_survey_list(survey_list): + return survey_list + def merge_surveys_to_assets(self, asset_list, survey_list, ha_name): # Correct the survey list @@ -441,7 +462,7 @@ class DataLoader: df = df[df["matching_address"].str.contains(str(house_number))] if df.shape[0] != 1: - df = df[df["HouseNo"] == str(house_number)] + df = df[df["HouseNo"].astype(str) == str(house_number)] if df.shape[0] != 1: df = df[df["matching_postcode"].str.lower().str.contains(row["Post Code"].lower().strip())] if df.shape[0] != 1: @@ -506,6 +527,7 @@ class DataLoader: def merge_ciga_to_assets(self, asset_list, ciga_list, ha_name): matching_lookup = [] unmatched_addresses = [] + for _, row in tqdm(ciga_list.iterrows(), total=len(ciga_list)): house_number = row["HouseNo"] @@ -528,7 +550,7 @@ class DataLoader: } ) continue - + if df.shape[0] != 1: # We split house number and postcode out of the matched address for ciga @@ -561,9 +583,6 @@ class DataLoader: if len(unmatched_addresses) != self.UNMATCHED_CIGA[ha_name]: raise ValueError(f"Unmatched addresses for {ha_name} is not as expected") - # In ciga: 35 Valley Drive, Leicester, LE3 3EE - # - matching_lookup = pd.DataFrame(matching_lookup) # Merge onto the ciga list @@ -612,7 +631,7 @@ class DataLoader: for filepath in self.directories: ha_name = filepath.split("/")[2] # Load asset list - logger.info("Loading asset list for {}".format(ha_name)) + logger.info("Loading data for {}".format(ha_name)) asset_list, survey_list, ciga_list = self.load_asset_list( filepath=filepath, ha_name=ha_name, From d038d668b8fa8360577ef0f83403e3d4cb6e854e Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 22 Feb 2024 17:52:20 +0000 Subject: [PATCH 007/248] ha107 matching 73% complete --- etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 9e850c0e..46581eca 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -433,6 +433,16 @@ class DataLoader: @staticmethod def correct_ha107_survey_list(survey_list): + # Replace Front Street, East Stockham with Front Street, East Stockwith + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "Front Street, East Stockham", "Front Street, East Stockwith" + ) + + # Replace "HONEYHOLE L;ANE" with "HONEYHOLES LANE" + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "HONEYHOLE L;ANE", "HONEYHOLES LANE" + ) + return survey_list def merge_surveys_to_assets(self, asset_list, survey_list, ha_name): From ccb764d4a968efeaef67a068f1cc21f92dfe7000 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 22 Feb 2024 18:01:24 +0000 Subject: [PATCH 008/248] ha107 matching 74% done --- etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 46581eca..60ef485a 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -443,6 +443,16 @@ class DataLoader: "HONEYHOLE L;ANE", "HONEYHOLES LANE" ) + # Replace "Croft Lane Cherry Willingham, Lincoln" with "Croft Lane, Cherry Willingham, Lincoln" + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "Croft Lane Cherry Willingham, Lincoln", "Croft Lane, Cherry Willingham, Lincoln" + ) + + # Replace "Snelland Road Wickenby, Lincoln" with "Snelland Road, Wickenby, Lincoln" + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "Snelland Road Wickenby, Lincoln", "Snelland Road, Wickenby, Lincoln" + ) + return survey_list def merge_surveys_to_assets(self, asset_list, survey_list, ha_name): From cef20c6e2cf97275146f36f97349f4d0a46d2410 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 23 Feb 2024 12:08:44 +0000 Subject: [PATCH 009/248] completed matching for ha107, added levenstein method --- .../ha_15_32/ha_analysis_batch_3.py | 64 +++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 60ef485a..bf3e6d31 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -1,6 +1,7 @@ import os import re import openpyxl +import Levenshtein from pathlib import Path import msgpack from datetime import datetime @@ -453,6 +454,41 @@ class DataLoader: "Snelland Road Wickenby, Lincoln", "Snelland Road, Wickenby, Lincoln" ) + # Replace Reasby Road Snelland, Lincoln with Reasby Road, Snelland, Lincoln + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "Reasby Road Snelland, Lincoln", "Reasby Road, Snelland, Lincoln" + ) + + # Replace Silver Street Bardney, Lincoln with Silver Street, Bardney, Lincoln + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "Silver Street Bardney, Lincoln", "Silver Street, Bardney, Lincoln" + ) + + # Replace Manor Close Bardney, Lincoln with Manor Close, Bardney, Lincoln + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "Manor Close Bardney, Lincoln", "Manor Close, Bardney, Lincoln" + ) + + # Replace Ferry Road Southrey, Lincoln with Ferry Road, Southrey, Lincoln + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "Ferry Road Southrey, Lincoln", "Ferry Road, Southrey, Lincoln" + ) + + # Replace Harvey Kent Gardens Bardney, Lincoln with Harvey Kent Gardens, Bardney, Lincoln + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "Harvey Kent Gardens Bardney, Lincoln", "Harvey Kent Gardens, Bardney, Lincoln" + ) + + # Replace Wragby Road Bardney, Lincoln with Wragby Road, Bardney, Lincoln + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "Wragby Road Bardney, Lincoln", "Wragby Road, Bardney, Lincoln" + ) + + # Replace SPRINKHILL ROAD with SPINKHILL ROAD + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "SPRINKHILL ROAD", "SPINKHILL ROAD" + ) + return survey_list def merge_surveys_to_assets(self, asset_list, survey_list, ha_name): @@ -481,10 +517,35 @@ class DataLoader: ].copy() df = df[df["matching_address"].str.contains(str(house_number))] + + if df.empty: + print(row["Street / Block Name"]) + print(house_number) + print(row["Post Code"]) + raise ValueError("Investigate") + if df.shape[0] != 1: df = df[df["HouseNo"].astype(str) == str(house_number)] if df.shape[0] != 1: df = df[df["matching_postcode"].str.lower().str.contains(row["Post Code"].lower().strip())] + + full_key = str(row["NO."]).lower().strip() + row["Street / Block Name"].lower().strip() + row[ + "Town/Area"].lower().strip() + row["Post Code"].lower().strip() + # Remove any spaces from the full key + full_key = full_key.replace(" ", "") + + match_to = df["matching_address"].tolist() + # Strip out punctuation and spaces + match_to = [re.sub(r'[^\w\s]', '', x) for x in match_to] + match_to = [x.replace(" ", "") for x in match_to] + + # Perform matching between full key and match_to + distances = [Levenshtein.distance(full_key, s) for s in match_to] + best_match_index = distances.index(min(distances)) + # We might want to consider a threshold for the distance, however for the momeny, + # we don't consider this for the moment + df = df.iloc[best_match_index:best_match_index + 1] + if df.shape[0] != 1: postcode_lower = row["Post Code"].lower() if postcode_lower in missed_postcodes: @@ -510,6 +571,9 @@ class DataLoader: matching_lookup = pd.DataFrame(matching_lookup) + if matching_lookup.shape[0] != survey_list.shape[0]: + raise ValueError("Mismatch in the number of survey rows and matching lookup rows") + # Merge onto the survey list survey_list = survey_list.merge(matching_lookup, how='left', on="survey_list_row_id") From bc0a2b8e37eab7dcfc4130b18b5c3ebe1c0953cc Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 23 Feb 2024 12:11:00 +0000 Subject: [PATCH 010/248] debygging location of dropping nulls from ciga list --- etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index bf3e6d31..f1709d6e 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -232,12 +232,11 @@ class DataLoader: ciga_list = pd.DataFrame(ciga_rows, columns=[cell.value for cell in ciga_sheet[1]]) # Remove columns that are None ciga_list = ciga_list.loc[:, ciga_list.columns.notnull()] - # Remove rows with missing postcode which happens in a small number of cases - ciga_list = ciga_list[~pd.isnull(ciga_list["Matched Postcode"])] - - ciga_list["ciga_list_row_id"] = [ha_name + "_ciga_" + str(i) for i in range(0, len(ciga_list))] # Perform ciga list merge if not ciga_list.empty: + # Remove rows with missing postcode which happens in a small number of cases + ciga_list = ciga_list[~pd.isnull(ciga_list["Matched Postcode"])] + ciga_list["ciga_list_row_id"] = [ha_name + "_ciga_" + str(i) for i in range(0, len(ciga_list))] ciga_list = self.create_ciga_list_house_no(ha_name, ciga_list) ciga_list = self.merge_ciga_to_assets(asset_list, ciga_list, ha_name) From 5a451f2f8239aaac05237c93b99c435de83a8652 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 23 Feb 2024 12:20:46 +0000 Subject: [PATCH 011/248] fixed logic for missed postcodes for ha6 --- .../ha_15_32/ha_analysis_batch_3.py | 21 ++++++++++--------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index f1709d6e..95ca3901 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -518,6 +518,17 @@ class DataLoader: df = df[df["matching_address"].str.contains(str(house_number))] if df.empty: + + postcode_lower = row["Post Code"].lower() + if postcode_lower in missed_postcodes: + matching_lookup.append( + { + "survey_list_row_id": row["survey_list_row_id"], + "asset_list_row_id": None, + } + ) + continue + print(row["Street / Block Name"]) print(house_number) print(row["Post Code"]) @@ -546,16 +557,6 @@ class DataLoader: df = df.iloc[best_match_index:best_match_index + 1] if df.shape[0] != 1: - postcode_lower = row["Post Code"].lower() - if postcode_lower in missed_postcodes: - matching_lookup.append( - { - "survey_list_row_id": row["survey_list_row_id"], - "asset_list_row_id": None, - } - ) - continue - print(row["Street / Block Name"]) print(house_number) print(row["Post Code"]) From 75183902c193a8c5634b8cbc9c7bf045dd5a0898 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 23 Feb 2024 15:54:28 +0000 Subject: [PATCH 012/248] completed creationg of matching tables --- .../ha_15_32/ha_analysis_batch_3.py | 63 ++++++++++++++----- 1 file changed, 48 insertions(+), 15 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 95ca3901..2d95a946 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -43,7 +43,8 @@ class DataLoader: # the asset list "HA14": 4, # There's just too many unmatched here - if we identify some homes that - "HA6": 117 + "HA6": 117, + "HA107": 52 } def __init__(self, directories, use_cache): @@ -130,7 +131,7 @@ class DataLoader: :return: """ - if ha_name in ["HA6", "HA14"]: + if ha_name in ["HA6", "HA14", "HA107"]: split_addresses = ciga_list['Matched Address'].str.split(',', expand=True) house_numbers = split_addresses[0].str.split(' ', expand=True) # THe first column should be HouseNo - we aren't interested in the other columns, but we don't know how @@ -153,8 +154,11 @@ class DataLoader: @staticmethod def get_ciga_sheetname(workbook): + if "CIGA Checks" in workbook.sheetnames: return "CIGA Checks" + elif "CIGA checks" in workbook.sheetnames: + return "CIGA checks" else: return "CIGA" @@ -490,6 +494,22 @@ class DataLoader: return survey_list + @staticmethod + def levenstein_match(matching_string, df): + match_to = df["matching_address"].tolist() + # Strip out punctuation and spaces + match_to = [re.sub(r'[^\w\s]', '', x) for x in match_to] + match_to = [x.replace(" ", "") for x in match_to] + + # Perform matching between full key and match_to + distances = [Levenshtein.distance(matching_string, s) for s in match_to] + best_match_index = distances.index(min(distances)) + # We might want to consider a threshold for the distance, however for the momeny, + # we don't consider this for the moment + df = df.iloc[best_match_index:best_match_index + 1] + + return df + def merge_surveys_to_assets(self, asset_list, survey_list, ha_name): # Correct the survey list @@ -544,17 +564,7 @@ class DataLoader: # Remove any spaces from the full key full_key = full_key.replace(" ", "") - match_to = df["matching_address"].tolist() - # Strip out punctuation and spaces - match_to = [re.sub(r'[^\w\s]', '', x) for x in match_to] - match_to = [x.replace(" ", "") for x in match_to] - - # Perform matching between full key and match_to - distances = [Levenshtein.distance(full_key, s) for s in match_to] - best_match_index = distances.index(min(distances)) - # We might want to consider a threshold for the distance, however for the momeny, - # we don't consider this for the moment - df = df.iloc[best_match_index:best_match_index + 1] + df = self.levenstein_match(full_key, df) if df.shape[0] != 1: print(row["Street / Block Name"]) @@ -623,7 +633,7 @@ class DataLoader: asset_list["matching_address"].str.contains(row["Matched Postcode"].lower().strip()) ].copy() - df = df[df["HouseNo"] == str(house_number)] + df = df[df["HouseNo"].astype(str) == str(house_number)] # For ciga, we skip if df.empty: unmatched_addresses.append( @@ -641,7 +651,9 @@ class DataLoader: street_name = self.extract_streetname( address=row["Matched Address"], house_number=house_number, postcode=row["Matched Postcode"] ) - df = df[df["matching_address"].str.replace(",", "").str.contains(street_name)] + # We check if any of the rows contains the street name and if they do, filter + if any(df["matching_address"].str.replace(",", "").str.contains(street_name)): + df = df[df["matching_address"].str.replace(",", "").str.contains(street_name)] if df.shape[0] != 1: # The final check we do here is to check for the presence of flat in the address @@ -650,6 +662,13 @@ class DataLoader: else: df = df[df["matching_address"].str.contains("flat") == False] + if df.shape[0] != 1: + full_key = str(row["HouseNo"]).lower().strip() + row["Matched Address"].lower().strip() + row[ + "Matched Postcode"].lower().strip() + # Remove any spaces from the full key + full_key = full_key.replace(" ", "") + df = self.levenstein_match(full_key, df) + if df.shape[0] != 1: print(row["Street / Block Name"]) print(house_number) @@ -737,6 +756,19 @@ class DataLoader: s3_file_name="ha-analysis/batch3-inputs.pickle", ) + def ha_facts_and_figures(self): + """ + This function will return a dictionary of facts and figures for each HA + :return: + """ + ha_facts_and_figures = [] + for ha_name, data_assets in self.data.items(): + asset_list = data_assets["asset_list"] + survey_list = data_assets["survey_list"] + ciga_list = data_assets["ciga_list"] + + return ha_facts_and_figures + def get_epc_data( loader, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds, pull_data=True @@ -1511,6 +1543,7 @@ def app(): loader = DataLoader(directories, use_cache) loader.load() + loader.ha_facts_and_figures() # TODO: We probably need to make sure that we have all of the columns that we need From 6693ab4ca6e12a6b9da112e8c8a3d48b1fe6ad87 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 23 Feb 2024 17:13:18 +0000 Subject: [PATCH 013/248] Added in read of december figures --- .../ha_15_32/ha_analysis_batch_3.py | 55 +++++++++++++++++-- 1 file changed, 49 insertions(+), 6 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 2d95a946..dbe12e92 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -47,11 +47,13 @@ class DataLoader: "HA107": 52 } - def __init__(self, directories, use_cache): + def __init__(self, directories, december_figures_filepath, use_cache): self.directories = directories self.use_cache = use_cache + self.december_figures_filepath = december_figures_filepath self.data = {} + self.december_figures = None def create_asset_list_matching_address(self, ha_name, asset_list): @@ -730,6 +732,11 @@ class DataLoader: ) return + # Get the december figures, which is just a csv + self.december_figures = pd.read_csv(self.december_figures_filepath) + # Remove the spaces in HA Name + self.december_figures["HA Name"] = december_figures["HA Name"].str.replace(" ", "") + data = {} for filepath in self.directories: ha_name = filepath.split("/")[2] @@ -763,9 +770,43 @@ class DataLoader: """ ha_facts_and_figures = [] for ha_name, data_assets in self.data.items(): - asset_list = data_assets["asset_list"] - survey_list = data_assets["survey_list"] - ciga_list = data_assets["ciga_list"] + asset_list = data_assets["asset_list"].copy() + survey_list = data_assets["survey_list"].copy() + ciga_list = data_assets["ciga_list"].copy() + + asset_list["ECO Eligibility"].value_counts() + + # We merge on ciga and update the status to reflect if it has failed ciga or not + # If Guarantee is Yes, this means that there is a guarantee in place, and the property failed the CIGA + # check + asset_list = asset_list.merge( + ciga_list[["asset_list_row_id", "Guarantee"]], + how='left', + on="asset_list_row_id" + ) + + asset_list["ECO Eligibility"].value_counts() + + asset_list["ECO Eligibility"] = np.where( + ( + asset_list["ECO Eligibility"].str.contains("(Subject to CIGA)", regex=False) & + (asset_list["Guarantee"] == "Yes") + ), + "Failed CIGA", + asset_list["ECO Eligibility"] + ) + + # We replace any remaining "Subject to CIGA" with pass Ciga + asset_list["ECO Eligibility"] = np.where( + asset_list["ECO Eligibility"].str.contains("Subject to CIGA", regex=False), + "Pass CIGA", + asset_list["ECO Eligibility"] + ) + + asset_list = asset_list.drop(columns=["Guarantee"]) + + # Update the asset list with the categorisations + self.data[ha_name]["asset_list"] = asset_list return ha_facts_and_figures @@ -1532,16 +1573,18 @@ def app(): :return: """ - use_cache = False + use_cache = True # List all of the data in the folder directories = [str(list(entry.iterdir())[0]) for entry in DATA_FOLDER.iterdir() if entry.is_dir()] + # Grab the December HA figures filepath + december_figures_filepath = "local_data/ha_data/HA_December_figures.csv" priority_has = ["HA1", "HA6", "HA14", "HA39", "HA107"] # Filter down the directories to only the priority HAs directories = [d for d in directories if d.split("/")[2] in priority_has] - loader = DataLoader(directories, use_cache) + loader = DataLoader(directories, december_figures_filepath, use_cache) loader.load() loader.ha_facts_and_figures() From 8b48dbac9e5e9f25e3c738c1322b1f3a9fbb11db Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 26 Feb 2024 13:37:50 +0000 Subject: [PATCH 014/248] working on eco eligibility code --- .../ha_15_32/ha_analysis_batch_3.py | 153 ++++++++++++++---- 1 file changed, 122 insertions(+), 31 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index dbe12e92..fdc00876 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -725,6 +725,13 @@ class DataLoader: def load(self): + # Get the december figures, which is just a csv + self.december_figures = pd.read_csv(self.december_figures_filepath) + # Remove the spaces in HA Name + self.december_figures["HA Name"] = self.december_figures["HA Name"].str.replace(" ", "") + self.december_figures["ECO4"] = self.december_figures["ECO4"].astype("Int64") + self.december_figures["GBIS"] = self.december_figures["GBIS"].astype("Int64") + if self.use_cache: self.data = read_pickle_from_s3( bucket_name="retrofit-datalake-dev", @@ -732,11 +739,6 @@ class DataLoader: ) return - # Get the december figures, which is just a csv - self.december_figures = pd.read_csv(self.december_figures_filepath) - # Remove the spaces in HA Name - self.december_figures["HA Name"] = december_figures["HA Name"].str.replace(" ", "") - data = {} for filepath in self.directories: ha_name = filepath.split("/")[2] @@ -768,46 +770,135 @@ class DataLoader: This function will return a dictionary of facts and figures for each HA :return: """ + + scheme_map = { + "ECO4": "ECO4", + "AFFORDABLE WARMTH": "ECO4", + } + + eco_eligibility_map = { + "not eligble": "not eligible" + } + ha_facts_and_figures = [] for ha_name, data_assets in self.data.items(): asset_list = data_assets["asset_list"].copy() survey_list = data_assets["survey_list"].copy() ciga_list = data_assets["ciga_list"].copy() - asset_list["ECO Eligibility"].value_counts() + # Change the column name if it's ECO eligibility + asset_list = asset_list.rename(columns={"ECO eligibility": "ECO Eligibility"}) + # Remove surplus whitespace from the ECO Eligibility column + asset_list["ECO Eligibility"] = asset_list["ECO Eligibility"].str.strip() + # Push to lower case + asset_list["ECO Eligibility"] = asset_list["ECO Eligibility"].str.lower() + # Remap + asset_list["ECO Eligibility"] = asset_list["ECO Eligibility"].map(eco_eligibility_map) - # We merge on ciga and update the status to reflect if it has failed ciga or not - # If Guarantee is Yes, this means that there is a guarantee in place, and the property failed the CIGA - # check - asset_list = asset_list.merge( - ciga_list[["asset_list_row_id", "Guarantee"]], - how='left', - on="asset_list_row_id" - ) + if not ciga_list.empty: + # We merge on ciga and update the status to reflect if it has failed ciga or not + # If Guarantee is Yes, this means that there is a guarantee in place, and the property failed the CIGA + # check + asset_list = asset_list.merge( + ciga_list[["asset_list_row_id", "Guarantee"]], + how='left', + on="asset_list_row_id" + ) - asset_list["ECO Eligibility"].value_counts() + asset_list["ECO Eligibility"].value_counts() - asset_list["ECO Eligibility"] = np.where( - ( - asset_list["ECO Eligibility"].str.contains("(Subject to CIGA)", regex=False) & - (asset_list["Guarantee"] == "Yes") - ), - "Failed CIGA", - asset_list["ECO Eligibility"] - ) + asset_list["ECO Eligibility"] = np.where( + ( + asset_list["ECO Eligibility"].str.contains("(subject to ciga)", regex=False) & + (asset_list["Guarantee"] == "Yes") + ), + "failed ciga", + asset_list["ECO Eligibility"] + ) - # We replace any remaining "Subject to CIGA" with pass Ciga - asset_list["ECO Eligibility"] = np.where( - asset_list["ECO Eligibility"].str.contains("Subject to CIGA", regex=False), - "Pass CIGA", - asset_list["ECO Eligibility"] - ) + # We replace any remaining "Subject to CIGA" with pass Ciga + asset_list["ECO Eligibility"] = np.where( + asset_list["ECO Eligibility"].str.contains("Subject to CIGA", regex=False), + "eco4 - passed ciga", + asset_list["ECO Eligibility"] + ) - asset_list = asset_list.drop(columns=["Guarantee"]) + asset_list = asset_list.drop(columns=["Guarantee"]) - # Update the asset list with the categorisations + # Update the asset list with the categorisations and rename changes self.data[ha_name]["asset_list"] = asset_list + # Report on sales + sales_report = {} + if not survey_list.empty: + scheme_column = survey_list.columns[0] + # We clean up the survey list installation or cancelled + survey_list["installed_or_cancelled_clean"] = survey_list["INSTALLED OR CANCELLED"].str.lower() + # Remove all punctuation + survey_list["installed_or_cancelled_clean"] = survey_list["installed_or_cancelled_clean"].str.replace( + r'[^\w\s]', '', regex=True + ) + # Remove double spaces + survey_list["installed_or_cancelled_clean"] = survey_list["installed_or_cancelled_clean"].str.replace( + r'\s+', ' ', regex=True + ) + # Remove trailing spaces + survey_list["installed_or_cancelled_clean"] = survey_list["installed_or_cancelled_clean"].str.strip() + + # Remap the values in the scheme column + survey_list[scheme_column] = survey_list[scheme_column].map(scheme_map) + + survey_list["installation_status"] = None + survey_list["installation_status"] = np.where( + survey_list["installed_or_cancelled_clean"].isin(["installed", "installed see notes"]), + "installed", + survey_list["installation_status"] + ) + survey_list["installation_status"] = np.where( + survey_list["installed_or_cancelled_clean"].isin(["cancelled"]), + "cancelled", + survey_list["installation_status"] + ) + # Find partial installations + survey_list["installation_status"] = np.where( + survey_list["installed_or_cancelled_clean"].str.contains("still to be installed"), + "partially installed", + survey_list["installation_status"] + ) + # Find partial cancellations + # TODO: We might have more indications of partial cancellations + survey_list["installation_status"] = np.where( + survey_list["installed_or_cancelled_clean"].isin(["loft cancelled"]), + "partially cancelled", + survey_list["installation_status"] + ) + + # Finally, for other cases, we set the status to "in progress" + survey_list["installation_status"] = survey_list["installation_status"].fillna("in progress") + + # We concatenate the scheme name with the installation status + survey_list["installation_status"] = ( + survey_list[scheme_column] + " - " + survey_list["installation_status"] + ) + + # We get the sales + sales_report = survey_list["installation_status"].value_counts().to_dict() + + ha_facts_and_figures.append( + { + "HA Name": ha_name, + **asset_list["ECO Eligibility"].value_counts().to_dict(), + **sales_report + } + ) + + ha_facts_and_figures = pd.DataFrame(ha_facts_and_figures) + ha_facts_and_figures = ha_facts_and_figures.drop( + columns=["not eligible"] + ) + + ha_facts_and_figures = self.december_figures.merge(ha_facts_and_figures, how="inner", on="HA Name") + return ha_facts_and_figures From ae2cc3fab57687bdc83d4aef4d60c23bd3a3b5e8 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 26 Feb 2024 14:14:19 +0000 Subject: [PATCH 015/248] working on ha facts and figures --- .../ha_15_32/ha_analysis_batch_3.py | 25 +++++++++++-------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index fdc00876..d75a9f34 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -42,7 +42,7 @@ class DataLoader: # We expect 4 unmatched addresses, which have been validated manually as being in the ciga file but not # the asset list "HA14": 4, - # There's just too many unmatched here - if we identify some homes that + # There's just too many unmatched here "HA6": 117, "HA107": 52 } @@ -786,6 +786,8 @@ class DataLoader: survey_list = data_assets["survey_list"].copy() ciga_list = data_assets["ciga_list"].copy() + asset_list_starting_size = asset_list.shape[0] + # Change the column name if it's ECO eligibility asset_list = asset_list.rename(columns={"ECO eligibility": "ECO Eligibility"}) # Remove surplus whitespace from the ECO Eligibility column @@ -793,19 +795,17 @@ class DataLoader: # Push to lower case asset_list["ECO Eligibility"] = asset_list["ECO Eligibility"].str.lower() # Remap - asset_list["ECO Eligibility"] = asset_list["ECO Eligibility"].map(eco_eligibility_map) + asset_list["ECO Eligibility"] = asset_list["ECO Eligibility"].replace(eco_eligibility_map) if not ciga_list.empty: # We merge on ciga and update the status to reflect if it has failed ciga or not # If Guarantee is Yes, this means that there is a guarantee in place, and the property failed the CIGA # check - asset_list = asset_list.merge( - ciga_list[["asset_list_row_id", "Guarantee"]], - how='left', - on="asset_list_row_id" - ) - asset_list["ECO Eligibility"].value_counts() + ciga_list_to_merge = ciga_list[["asset_list_row_id", "Guarantee"]].copy() + ciga_list_to_merge = ciga_list_to_merge[~pd.isnull(ciga_list_to_merge["asset_list_row_id"])] + + asset_list = asset_list.merge(ciga_list_to_merge, how='left', on="asset_list_row_id") asset_list["ECO Eligibility"] = np.where( ( @@ -818,7 +818,10 @@ class DataLoader: # We replace any remaining "Subject to CIGA" with pass Ciga asset_list["ECO Eligibility"] = np.where( - asset_list["ECO Eligibility"].str.contains("Subject to CIGA", regex=False), + ( + asset_list["ECO Eligibility"].str.contains("(subject to ciga)", regex=False) & + (asset_list["Guarantee"] == "No") + ), "eco4 - passed ciga", asset_list["ECO Eligibility"] ) @@ -826,6 +829,8 @@ class DataLoader: asset_list = asset_list.drop(columns=["Guarantee"]) # Update the asset list with the categorisations and rename changes + if asset_list.shape[0] != asset_list_starting_size: + raise ValueError("The asset list has changed in size") self.data[ha_name]["asset_list"] = asset_list # Report on sales @@ -846,7 +851,7 @@ class DataLoader: survey_list["installed_or_cancelled_clean"] = survey_list["installed_or_cancelled_clean"].str.strip() # Remap the values in the scheme column - survey_list[scheme_column] = survey_list[scheme_column].map(scheme_map) + survey_list[scheme_column] = survey_list[scheme_column].replace(scheme_map) survey_list["installation_status"] = None survey_list["installation_status"] = np.where( From 8ef0198606486cf3eee9abf84723181ef221ea6b Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 26 Feb 2024 16:22:50 +0000 Subject: [PATCH 016/248] handling deduping ciga match --- .../ha_15_32/ha_analysis_batch_3.py | 21 +++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index d75a9f34..6ffe50e3 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -41,7 +41,7 @@ class DataLoader: UNMATCHED_CIGA = { # We expect 4 unmatched addresses, which have been validated manually as being in the ciga file but not # the asset list - "HA14": 4, + "HA14": 3, # There's just too many unmatched here "HA6": 117, "HA107": 52 @@ -147,6 +147,17 @@ class DataLoader: return ciga_list + @staticmethod + def dedupe_ciga_list(ciga_list): + ciga_list["unique_key"] = ciga_list["Matched Address"] + ciga_list["Matched Postcode"] + # Remove spaces from the unique key + ciga_list["unique_key"] = ciga_list["unique_key"].str.replace(" ", "") + # Remove punctuation from the unique key + ciga_list["unique_key"] = ciga_list["unique_key"].str.replace(r'[^\w\s]', '') + # Drop duplicated keys + ciga_list = ciga_list[~ciga_list["unique_key"].duplicated()] + return ciga_list + @staticmethod def get_asset_sheetname(workbook): if "Asset List" in workbook.sheetnames: @@ -244,6 +255,7 @@ class DataLoader: ciga_list = ciga_list[~pd.isnull(ciga_list["Matched Postcode"])] ciga_list["ciga_list_row_id"] = [ha_name + "_ciga_" + str(i) for i in range(0, len(ciga_list))] ciga_list = self.create_ciga_list_house_no(ha_name, ciga_list) + ciga_list = self.dedupe_ciga_list(ciga_list) ciga_list = self.merge_ciga_to_assets(asset_list, ciga_list, ha_name) return asset_list, survey_list, ciga_list @@ -686,10 +698,15 @@ class DataLoader: # We have an acceptable number of ciga failures for each HA if len(unmatched_addresses) != self.UNMATCHED_CIGA[ha_name]: - raise ValueError(f"Unmatched addresses for {ha_name} is not as expected") + raise ValueError( + f"Unmatched addresses for {ha_name} is not as expected, got {len(unmatched_addresses)} unmatched") matching_lookup = pd.DataFrame(matching_lookup) + # Check dupes as this will cause problems later on + if matching_lookup["asset_list_row_id"].duplicated().any(): + raise ValueError("Duplicated asset list row ids") + # Merge onto the ciga list ciga_list = ciga_list.merge(matching_lookup, how='left', on="ciga_list_row_id") From 78f5226ad7a5ec81e4da1ca6f9e78565146e0457 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 26 Feb 2024 16:38:14 +0000 Subject: [PATCH 017/248] put together ha facts and figures --- etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 6ffe50e3..bd4d5128 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -44,7 +44,7 @@ class DataLoader: "HA14": 3, # There's just too many unmatched here "HA6": 117, - "HA107": 52 + "HA107": 51 } def __init__(self, directories, december_figures_filepath, use_cache): @@ -54,6 +54,7 @@ class DataLoader: self.data = {} self.december_figures = None + self.ha_facts_and_figures = None def create_asset_list_matching_address(self, ha_name, asset_list): @@ -794,7 +795,8 @@ class DataLoader: } eco_eligibility_map = { - "not eligble": "not eligible" + "not eligble": "not eligible", + "eco 4(subject to ciga)": "eco4 (subject to ciga)", } ha_facts_and_figures = [] @@ -919,9 +921,15 @@ class DataLoader: columns=["not eligible"] ) - ha_facts_and_figures = self.december_figures.merge(ha_facts_and_figures, how="inner", on="HA Name") + ha_facts_and_figures = ha_facts_and_figures.fillna(0) + # Make all columns apart from HA NAme integers + for col in ha_facts_and_figures.columns[1:]: + ha_facts_and_figures[col] = ha_facts_and_figures[col].astype(int) - return ha_facts_and_figures + ha_facts_and_figures = self.december_figures.merge(ha_facts_and_figures, how="inner", on="HA Name") + ha_facts_and_figures = ha_facts_and_figures.fillna(0) + + self.ha_facts_and_figures = ha_facts_and_figures def get_epc_data( From c18740eebda1a2b307a91e215f78fdeafcad8402 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 26 Feb 2024 18:44:11 +0000 Subject: [PATCH 018/248] updating eligibility detection --- etl/eligibility/Eligibility.py | 57 +-- .../ha_15_32/ha_analysis_batch_3.py | 402 ++++++++++-------- 2 files changed, 249 insertions(+), 210 deletions(-) diff --git a/etl/eligibility/Eligibility.py b/etl/eligibility/Eligibility.py index 906ff594..b09d2df5 100644 --- a/etl/eligibility/Eligibility.py +++ b/etl/eligibility/Eligibility.py @@ -340,7 +340,6 @@ class Eligibility: # Check if the property is suitable for cavity wall self.cavity_insulation() - self.loft_insulation() self.gbis_warmfront = (self.cavity["suitability"]) and ( int(self.epc["current-energy-efficiency"]) <= 68 @@ -384,43 +383,49 @@ class Eligibility: if current_sap >= 69: self.eco4_warmfront = { "eligible": False, - "message": "sap too high", + "message": "SAP too high", "cavity_type": self.cavity["type"], "loft_type": self.loft["thickness_classification"] } return - if post_retrofit_sap is None: - - if current_sap >= 55: - message = "Possibly eligible but property currently EPC D" - else: - message = "subject to post retrofit sap" if is_eligible else "not eligible" - - # Update the message to flag properties that failed just because of a full cavity. - # We need to double check that the wall is a cavity, that the loft is suitable and that the - # sap is within reason - # We can then estimate the age of the cavity fill - if not is_eligible and (current_sap < 69) and self.loft["suitability"] and self.walls["is_cavity_wall"]: - message = "Failed due to full cavity - check cavity age" - + if not is_eligible and current_sap >= 55: self.eco4_warmfront = { - "eligible": is_eligible, - "message": message, + "eligible": False, + "message": "failed fabric and SAP check", "cavity_type": self.cavity["type"], "loft_type": self.loft["thickness_classification"] } return - is_eligible = is_eligible & (post_retrofit_sap >= 69) + if not is_eligible and current_sap < 55: + self.eco4_warmfront = { + "eligible": False, + "message": "failed fabric check", + "cavity_type": self.cavity["type"], + "loft_type": self.loft["thickness_classification"] + } + return - self.eco4_warmfront = { - "eligible": is_eligible, - "message": None, - "cavity_type": self.cavity["type"], - "loft_type": self.loft["thickness_classification"] - } - return + if is_eligible and current_sap >= 55: + self.eco4_warmfront = { + "eligible": True, + "message": "Meets fabric, fails SAP check", + "cavity_type": self.cavity["type"], + "loft_type": self.loft["thickness_classification"] + } + return + + if is_eligible and current_sap < 55: + self.eco4_warmfront = { + "eligible": True, + "message": "Meets fabric and SAP check", + "cavity_type": self.cavity["type"], + "loft_type": self.loft["thickness_classification"] + } + return + + raise ValueError("Implement me") def check_gbis(self): diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index bd4d5128..5dd9b6e1 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -25,6 +25,84 @@ DATA_FOLDER = Path(__file__).parent / "local_data" / "ha_data" logger = setup_logger() load_dotenv(ENV_FILE) +PROPERTY_TYPE_LOOKUP = { + "HA1": { + "built_form": { + 'Mid Terrace': 'Mid-Terrace', + 'Semi-Detached': 'Semi-Detached', + 'End Terrace': 'End-Terrace', + 'Detached': 'Detached', + 'Enclosed Mid': 'Mid-Terrace', + 'Detached Local Connect': 'Detached', + } + }, + "HA6": { + "property_type": { + 'HOUSE': "House", + 'GROUND FLOOR FLAT': "Flat", + 'UPPER FLOOR FLAT': "Flat", + 'MAISONETTE': "Maisonette", + 'BUNGALOW': "Bungalow", + 'WARDEN BUNGALOW': "Bungalow", + 'WARDEN FLAT': "Flat", + 'EXTRACARE SCHEME': "Flat", + } + }, + "HA14": { + "property_type": { + "House": "House", + "Flat": "Flat", + "Bungalow": "Bungalow", + "Maisonette": "Maisonette", + } + }, + "HA39": { + "Semi house": {"property_type": "House", "built_form": "Semi-Detached"}, + "1st floor flat": {"property_type": "Flat", "built_form": None}, + "Mid terrace house": {"property_type": "House", "built_form": "Mid-Terrace"}, + "Ground floor flat": {"property_type": "Flat", "built_form": None}, + "End terrace house": {"property_type": "House", "built_form": "End-Terrace"}, + "Semi bungalow": {"property_type": "Bungalow", "built_form": "Semi-Detached"}, + "End terrace bungalow": {"property_type": "Bungalow", "built_form": "End-Terrace"}, + "2nd floor flat": {"property_type": "Flat", "built_form": None}, + "Mid terrace bungalow": {"property_type": "Bungalow", "built_form": "Mid-Terrace"}, + "3rd floor flat": {"property_type": "Flat", "built_form": None}, + "Detached bungalow": {"property_type": "Bungalow", "built_form": "Detached"}, + "Maisonette": {"property_type": "Maisonette", "built_form": None}, + "Detached house": {"property_type": "House", "built_form": "Detached"}, + "Lower ground floor flat": {"property_type": "Flat", "built_form": None}, + "Dormer bungalow": {"property_type": "Bungalow", "built_form": None}, + "Basement flat": {"property_type": "Flat", "built_form": None}, + "Cluster House": {"property_type": "House", "built_form": "Detached"}, + "2nd/3rd floor duplex flat": {"property_type": "Flat", "built_form": None}, + "Ground floor flat with study": {"property_type": "Flat", "built_form": None}, + "4th floor flat": {"property_type": "Flat", "built_form": None}, + "1st floor flat with study room": {"property_type": "Flat", "built_form": None}, + "2nd floor flat with study": {"property_type": "Flat", "built_form": None}, + }, + "HA107": { + "property_type": { + "HOUSE": "House", + "BUNGALOW": "Bungalow", + "GRD FLOOR FLAT": "Flat", + "FIRST FLOOR FLAT": "Flat", + "SHELTERED BUNGALOW": "Bungalow", + "MAISONETTE": "Maisonette", + "SECOND FLOOR FLAT": "Flat", + "SHELTERED FIRST FLR": "Flat", + "SHELTERED GROUND FLR": "Flat", + "GRD FLOOR BED SIT": "House" + }, + "built_form": { + "Semi Detached": "Semi-Detached", + "Mid Terrace": "Mid-Terrace", + "End Terrace": "End-Terrace", + "Detached": "Detached", + "Detatched": "Detached", + } + } +} + class DataLoader: COLUMN_CONFIG = { @@ -54,7 +132,7 @@ class DataLoader: self.data = {} self.december_figures = None - self.ha_facts_and_figures = None + self.facts_and_figures = None def create_asset_list_matching_address(self, ha_name, asset_list): @@ -929,7 +1007,77 @@ class DataLoader: ha_facts_and_figures = self.december_figures.merge(ha_facts_and_figures, how="inner", on="HA Name") ha_facts_and_figures = ha_facts_and_figures.fillna(0) - self.ha_facts_and_figures = ha_facts_and_figures + self.facts_and_figures = ha_facts_and_figures + + +def get_property_type_and_built_form(property_meta, ha_name): + if ha_name == "HA1": + property_type = property_meta["Asset Type"] + # We correct a small error + if property_type == "a": + property_type = "House" + + # Remap bedsits to flats + if property_type in ["Bedsit", "Room"]: + property_type = "Flat" + + built_form = PROPERTY_TYPE_LOOKUP[ha_name]["built_form"].get(property_meta["Property Type"], None) + elif ha_name == "HA6": + property_type = PROPERTY_TYPE_LOOKUP[ha_name]["property_type"][property_meta["Dwelling type"]] + built_form = property_meta["built_form"] + elif ha_name == "HA14": + if property_meta["Asset Type Description"] == "Block - Repair": + # We try and deduce if it's a flat or house, depending on if it has "room" or "flats" in the address + if "room" in property_meta["Address 1"].lower(): + property_type = "House" + else: + property_type = "Flat" + + else: + property_type = PROPERTY_TYPE_LOOKUP[ha_name]["property_type"][ + property_meta["Asset Type Description"] + ] + + built_form = None + elif ha_name == "HA39": + + property_type_config = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["ConstructionStyle"], {}) + property_type = property_type_config.get("property_type", None) + built_form = property_type_config.get("built_form", None) + + if property_type is None: + # We check for the presence of room or flat + if "flat" in property_meta["matching_address"]: + property_type = "Flat" + else: + property_type = "House" + elif ha_name == "HA107": + + dwelling_style = property_meta["Dwelling Style"] + if isinstance(dwelling_style, str): + dwelling_style = dwelling_style.strip() + + property_type = PROPERTY_TYPE_LOOKUP[ha_name]["property_type"].get(property_meta["DwellingType"]) + built_form = PROPERTY_TYPE_LOOKUP[ha_name]["built_form"].get(dwelling_style, None) + + if property_type is None: + if built_form in ["Semi-Detached", "Mid-Terrace", "End-Terrace", "Detached"]: + property_type = "House" + + if "flat" in property_meta["Wall Construction"].lower(): + property_type = "Flat" + + if (property_meta["DwellingType"] == "UNKNOWN") & (property_meta["Dwelling Style"] == 0): + # Hand a few specific cases + property_type = "Bungalow" + + if property_meta["Street"] == "School View": + property_type = "Bungalow" + + else: + raise NotImplementedError("Implement me") + + return property_type, built_form def get_epc_data( @@ -938,84 +1086,6 @@ def get_epc_data( if not loader.data: raise ValueError("Data not found - please run loader.load() first") - property_type_lookup = { - "ha_1": { - "built_form": { - 'Mid Terrace': 'Mid-Terrace', - 'Semi-Detached': 'Semi-Detached', - 'End Terrace': 'End-Terrace', - 'Detached': 'Detached', - 'Enclosed Mid': 'Mid-Terrace', - 'Detached Local Connect': 'Detached', - } - }, - "ha_6": { - "property_type": { - 'HOUSE': "House", - 'GROUND FLOOR FLAT': "Flat", - 'UPPER FLOOR FLAT': "Flat", - 'MAISONETTE': "Maisonette", - 'BUNGALOW': "Bungalow", - 'WARDEN BUNGALOW': "Bungalow", - 'WARDEN FLAT': "Flat", - 'EXTRACARE SCHEME': "Flat", - } - }, - "ha_14": { - "property_type": { - "House": "House", - "Flat": "Flat", - "Bungalow": "Bungalow", - "Maisonette": "Maisonette", - } - }, - "ha_39": { - "Semi house": {"property_type": "House", "built_form": "Semi-Detached"}, - "1st floor flat": {"property_type": "Flat", "built_form": None}, - "Mid terrace house": {"property_type": "House", "built_form": "Mid-Terrace"}, - "Ground floor flat": {"property_type": "Flat", "built_form": None}, - "End terrace house": {"property_type": "House", "built_form": "End-Terrace"}, - "Semi bungalow": {"property_type": "Bungalow", "built_form": "Semi-Detached"}, - "End terrace bungalow": {"property_type": "Bungalow", "built_form": "End-Terrace"}, - "2nd floor flat": {"property_type": "Flat", "built_form": None}, - "Mid terrace bungalow": {"property_type": "Bungalow", "built_form": "Mid-Terrace"}, - "3rd floor flat": {"property_type": "Flat", "built_form": None}, - "Detached bungalow": {"property_type": "Bungalow", "built_form": "Detached"}, - "Maisonette": {"property_type": "Maisonette", "built_form": None}, - "Detached house": {"property_type": "House", "built_form": "Detached"}, - "Lower ground floor flat": {"property_type": "Flat", "built_form": None}, - "Dormer bungalow": {"property_type": "Bungalow", "built_form": None}, - "Basement flat": {"property_type": "Flat", "built_form": None}, - "Cluster House": {"property_type": "House", "built_form": "Detached"}, - "2nd/3rd floor duplex flat": {"property_type": "Flat", "built_form": None}, - "Ground floor flat with study": {"property_type": "Flat", "built_form": None}, - "4th floor flat": {"property_type": "Flat", "built_form": None}, - "1st floor flat with study room": {"property_type": "Flat", "built_form": None}, - "2nd floor flat with study": {"property_type": "Flat", "built_form": None}, - }, - "ha_107": { - "property_type": { - "HOUSE": "House", - "BUNGALOW": "Bungalow", - "GRD FLOOR FLAT": "Flat", - "FIRST FLOOR FLAT": "Flat", - "SHELTERED BUNGALOW": "Bungalow", - "MAISONETTE": "Maisonette", - "SECOND FLOOR FLAT": "Flat", - "SHELTERED FIRST FLR": "Flat", - "SHELTERED GROUND FLR": "Flat", - "GRD FLOOR BED SIT": "House" - }, - "built_form": { - "Semi Detached": "Semi-Detached", - "Mid Terrace": "Mid-Terrace", - "End Terrace": "End-Terrace", - "Detached": "Detached", - "Detatched": "Detached", - } - } - } - outputs = {} for ha_name, data_assets in loader.data.items(): @@ -1049,77 +1119,15 @@ def get_epc_data( if property_meta["matching_postcode"] is None: continue - if ha_name == "ha_1": - property_type = property_meta["Asset Type"] - # We correct a small error - if property_type == "a": - property_type = "House" - - # Remap bedsits to flats - if property_type in ["Bedsit", "Room"]: - property_type = "Flat" - - built_form = property_type_lookup[ha_name]["built_form"].get(property_meta["Property Type"], None) - elif ha_name == "ha_6": - property_type = property_type_lookup[ha_name]["property_type"][property_meta["Dwelling type"]] - built_form = property_meta["built_form"] - elif ha_name == "ha_14": - if property_meta["Asset Type Description"] == "Block - Repair": - # We try and deduce if it's a flat or house, depending on if it has "room" or "flats" in the address - if "room" in property_meta["Address 1"].lower(): - property_type = "House" - else: - property_type = "Flat" - - else: - property_type = property_type_lookup[ha_name]["property_type"][ - property_meta["Asset Type Description"] - ] - - built_form = None - elif ha_name == "ha_39": - - property_type_config = property_type_lookup[ha_name].get(property_meta["ConstructionStyle"], {}) - property_type = property_type_config.get("property_type", None) - built_form = property_type_config.get("built_form", None) - - if property_type is None: - # We check for the presence of room or flat - if "flat" in property_meta["matching_address"]: - property_type = "Flat" - else: - property_type = "House" - elif ha_name == "ha_107": - - dwelling_style = property_meta["Dwelling Style"] - if isinstance(dwelling_style, str): - dwelling_style = dwelling_style.strip() - - property_type = property_type_lookup[ha_name]["property_type"].get(property_meta["DwellingType"]) - built_form = property_type_lookup[ha_name]["built_form"].get(dwelling_style, None) - - if property_type is None: - if built_form in ["Semi-Detached", "Mid-Terrace", "End-Terrace", "Detached"]: - property_type = "House" - - if "flat" in property_meta["Wall Construction"].lower(): - property_type = "Flat" - - if (property_meta["DwellingType"] == "UNKNOWN") & (property_meta["Dwelling Style"] == 0): - # Hand a few specific cases - property_type = "Bungalow" - - if property_meta["Street"] == "School View": - property_type = "Bungalow" - - else: - raise NotImplementedError("Implement me") + property_type, built_form = get_property_type_and_built_form( + property_meta=property_meta, ha_name=ha_name + ) searcher = SearchEpc( address1=str(property_meta["HouseNo"]), postcode=property_meta["matching_postcode"], auth_token=EPC_AUTH_TOKEN, - os_api_key=None, + os_api_key="", full_address=property_meta["matching_address"] ) searcher.ordnance_survey_client.property_type = property_type @@ -1150,9 +1158,21 @@ def get_epc_data( eligibility.check_gbis_warmfront() eligibility.check_eco4_warmfront() - if (not eligibility.eco4_warmfront["eligible"]) and ( - not eligibility.gbis_warmfront - ) and consider_penultimate_epc: + # We check the conditions for checking the penultimate epc + identified_for_gbis = property_meta["ECO Eligibility"] == "gbis" + identified_for_eco4 = property_meta["ECO Eligibility"] in ["eco4"] + + # condition 1 - identified for gbis and not eligible + condition_1 = ( + identified_for_gbis and not eligibility.gbis_warmfront and not eligibility.eco4_warmfront["eligible"] + ) & consider_penultimate_epc + + # condition 2 - identified for eco4 and not eligible + condition_2 = ( + identified_for_eco4 and not eligibility.eco4_warmfront["eligible"] + ) & consider_penultimate_epc + + if identified_for_gbis and not eligibility.gbis_warmfront and not eligibility.eco4_warmfront["eligible"]: # We check the penultimate epc eligibility = Eligibility(epc=penultimate_epc, cleaned=cleaned) eligibility.check_gbis_warmfront() @@ -1161,6 +1181,10 @@ def get_epc_data( # We don't update just to make data cleaning easier if penultimate_epc.get("estimated") is None: older_epcs = [x for x in searcher.data["rows"] if x["lmk-key"] != penultimate_epc["lmk-key"]] + elif identified_for_eco4 and not eligibility.eco4_warmfront["eligible"]: + + else: + blah # If the property is a cavity wall and it's filled, we produce an estimate for the age of the cavity # Loft MUST be suitable @@ -1199,6 +1223,7 @@ def get_epc_data( { "row_id": property_meta["asset_list_row_id"], "uprn": eligibility.epc["uprn"], + "is_estimated": searcher.newest_epc.get("estimated") is not None, "property_type": eligibility.epc["property-type"], "gbis_eligible": eligibility.gbis_warmfront, "eco4_eligible": eligibility.eco4_warmfront["eligible"], @@ -1219,7 +1244,6 @@ def get_epc_data( "cavity_age": cavity_age, **eligibility.walls, **eligibility.roof, - "is_estimated": searcher.newest_epc.get("estimated") is not None, "eligibility_cavity_type": eligibility.eco4_warmfront["cavity_type"], "eligibility_loft_type": eligibility.eco4_warmfront["loft_type"] } @@ -1687,38 +1711,7 @@ def analyse_ha_data(outputs, loader): writer.sheets[sheet].set_column(i, i, width) -def app(): - """ - This app contains the housin association analysis for HAs 1, 6, 14, 39 and 107. - Only HA 6 has surveys - :return: - """ - - use_cache = True - - # List all of the data in the folder - directories = [str(list(entry.iterdir())[0]) for entry in DATA_FOLDER.iterdir() if entry.is_dir()] - # Grab the December HA figures filepath - december_figures_filepath = "local_data/ha_data/HA_December_figures.csv" - - priority_has = ["HA1", "HA6", "HA14", "HA39", "HA107"] - # Filter down the directories to only the priority HAs - directories = [d for d in directories if d.split("/")[2] in priority_has] - - loader = DataLoader(directories, december_figures_filepath, use_cache) - loader.load() - loader.ha_facts_and_figures() - - # TODO: We probably need to make sure that we have all of the columns that we need - - # We load in the additional data required to perform the analysis - - cleaned = read_from_s3( - s3_file_name="cleaned_epc_data/cleaned.bson", - bucket_name="retrofit-data-dev" - ) - cleaned = msgpack.unpackb(cleaned, raw=False) - +def patch_cleaned(cleaned): # Patch to handle the a missing description cleaned["floor-description"].extend( [ @@ -1762,16 +1755,57 @@ def app(): x["another_property_below"] = True x["thermal_transmittance"] = 0 + return cleaned + + +def app(): + """ + This app contains the housin association analysis for HAs 1, 6, 14, 39 and 107. + Only HA 6 has surveys + :return: + """ + + # Determines if we want to use the cached data in s3 + use_cache = True + # Determines if we want to perform the data pull + pull_data = True + + # List all of the data in the folder + directories = [str(list(entry.iterdir())[0]) for entry in DATA_FOLDER.iterdir() if entry.is_dir()] + # Grab the December HA figures filepath + december_figures_filepath = "local_data/ha_data/HA_December_figures.csv" + + priority_has = ["HA1", "HA6", "HA14", "HA39", "HA107"] + # Filter down the directories to only the priority HAs + directories = [d for d in directories if d.split("/")[2] in priority_has] + + loader = DataLoader(directories, december_figures_filepath, use_cache) + loader.load() + loader.ha_facts_and_figures() + + # We load in the additional data required to perform the analysis + cleaned = read_from_s3( + s3_file_name="cleaned_epc_data/cleaned.bson", + bucket_name="retrofit-data-dev" + ) + cleaned = msgpack.unpackb(cleaned, raw=False) + cleaned = patch_cleaned(cleaned) + cleaning_data = read_dataframe_from_s3_parquet( bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet", ) - created_at = datetime.now().isoformat() photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev") outputs = get_epc_data( - loader, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds, pull_data=False + loader=loader, + cleaned=cleaned, + cleaning_data=cleaning_data, + created_at=created_at, + photo_supply_lookup=photo_supply_lookup, + floor_area_decile_thresholds=floor_area_decile_thresholds, + pull_data=pull_data ) # for ha_name, datasets in outputs.items(): From 807ce14790600dce8a810847f47bc216bcddf6b3 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 26 Feb 2024 19:09:19 +0000 Subject: [PATCH 019/248] updating the code to do eligibility --- .../ha_15_32/ha_analysis_batch_3.py | 42 +++++++++++++------ 1 file changed, 29 insertions(+), 13 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 5dd9b6e1..3d0964c6 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -1164,15 +1164,33 @@ def get_epc_data( # condition 1 - identified for gbis and not eligible condition_1 = ( - identified_for_gbis and not eligibility.gbis_warmfront and not eligibility.eco4_warmfront["eligible"] - ) & consider_penultimate_epc + identified_for_gbis and not eligibility.gbis_warmfront and not eligibility.eco4_warmfront[ + "eligible"] + ) & consider_penultimate_epc # condition 2 - identified for eco4 and not eligible - condition_2 = ( - identified_for_eco4 and not eligibility.eco4_warmfront["eligible"] - ) & consider_penultimate_epc + condition_2 = (identified_for_eco4 and not eligibility.eco4_warmfront[ + "eligible"]) & consider_penultimate_epc - if identified_for_gbis and not eligibility.gbis_warmfront and not eligibility.eco4_warmfront["eligible"]: + # successfully identigied gbis + condition_3 = ( + identified_for_gbis and (eligibility.gbis_warmfront or eligibility.eco4_warmfront["eligible"]) + ) + + # Nothing identified + condition_4 = ( + not identified_for_gbis and not identified_for_eco4 and not eligibility.gbis_warmfront and not + eligibility.eco4_warmfront["eligible"] + ) + + # Not identified but seemingly eligible for eco4 or gbis + condition_5 = ( + not identified_for_gbis and not identified_for_eco4 and ( + eligibility.eco4_warmfront["eligible"] or eligibility.gbis_warmfront + ) + ) + + if condition_1 or condition_2: # We check the penultimate epc eligibility = Eligibility(epc=penultimate_epc, cleaned=cleaned) eligibility.check_gbis_warmfront() @@ -1181,10 +1199,11 @@ def get_epc_data( # We don't update just to make data cleaning easier if penultimate_epc.get("estimated") is None: older_epcs = [x for x in searcher.data["rows"] if x["lmk-key"] != penultimate_epc["lmk-key"]] - elif identified_for_eco4 and not eligibility.eco4_warmfront["eligible"]: - + elif condition_3 or condition_4 or condition_5: + # If we have successfully identified for gbis, we don't need to check the penultimate epc + pass else: - blah + NotImplementedError("Implement me") # If the property is a cavity wall and it's filled, we produce an estimate for the age of the cavity # Loft MUST be suitable @@ -1229,10 +1248,7 @@ def get_epc_data( "eco4_eligible": eligibility.eco4_warmfront["eligible"], "eco4_message": eligibility.eco4_warmfront["message"], "sap": float(eligibility.epc["current-energy-efficiency"]), - "gbis_eligible_future": eligibility.gbis["eligible"], - "gbis_eligible_future_message": eligibility.gbis["message"], - "eco4_eligible_future": eligibility.eco4["eligible"], - "eco4_eligible_future_message": eligibility.eco4["message"], + # Property components "roof": eligibility.roof["clean_description"], "walls": eligibility.walls["clean_description"], From 69dcc73363c43d12076b887707db802384046e07 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 26 Feb 2024 19:18:58 +0000 Subject: [PATCH 020/248] deugging null lodgement-date --- backend/SearchEpc.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/backend/SearchEpc.py b/backend/SearchEpc.py index 4f6fd33d..4a3f371a 100644 --- a/backend/SearchEpc.py +++ b/backend/SearchEpc.py @@ -609,7 +609,11 @@ class SearchEpc: # Insert an estimated lodgement datetime, with a weighted average estimated_epc["lodgement-datetime"] = self.calculate_weighted_lodgement_datetime(epc_data=epc_data) # Extract logement date - estimated_epc["lodgement-date"] = estimated_epc["lodgement-datetime"].strftime("%Y-%m-%d") + # It is possible that there is still no lodgement date, so we need to handle this + if pd.isnull(estimated_epc["lodgement-datetime"]): + estimated_epc["lodgement-date"] = None + else: + estimated_epc["lodgement-date"] = estimated_epc["lodgement-datetime"].strftime("%Y-%m-%d") estimated_epc["postcode"] = self.postcode estimated_epc["uprn"] = self.uprn From b80ffda392e0601f08dd376cfaacba73e733fc9c Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 26 Feb 2024 19:29:46 +0000 Subject: [PATCH 021/248] updating eligibility pipeline to factor in ciga --- .../ha_15_32/ha_analysis_batch_3.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 3d0964c6..ecbb4e0a 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -1159,8 +1159,11 @@ def get_epc_data( eligibility.check_eco4_warmfront() # We check the conditions for checking the penultimate epc - identified_for_gbis = property_meta["ECO Eligibility"] == "gbis" + identified_for_gbis = property_meta["ECO Eligibility"] in ["gbis"] identified_for_eco4 = property_meta["ECO Eligibility"] in ["eco4"] + subject_to_ciga = property_meta["ECO Eligibility"] in [ + "eco4 (subject to ciga)", "eco4 - passed ciga", "failed ciga" + ] # condition 1 - identified for gbis and not eligible condition_1 = ( @@ -1179,8 +1182,11 @@ def get_epc_data( # Nothing identified condition_4 = ( - not identified_for_gbis and not identified_for_eco4 and not eligibility.gbis_warmfront and not - eligibility.eco4_warmfront["eligible"] + not identified_for_gbis + and not identified_for_eco4 + and not eligibility.gbis_warmfront + and not subject_to_ciga + and not eligibility.eco4_warmfront["eligible"] ) # Not identified but seemingly eligible for eco4 or gbis @@ -1190,6 +1196,10 @@ def get_epc_data( ) ) + condition_6 = ( + subject_to_ciga and not eligibility.eco4_warmfront["eligible"] + ) + if condition_1 or condition_2: # We check the penultimate epc eligibility = Eligibility(epc=penultimate_epc, cleaned=cleaned) @@ -1199,8 +1209,7 @@ def get_epc_data( # We don't update just to make data cleaning easier if penultimate_epc.get("estimated") is None: older_epcs = [x for x in searcher.data["rows"] if x["lmk-key"] != penultimate_epc["lmk-key"]] - elif condition_3 or condition_4 or condition_5: - # If we have successfully identified for gbis, we don't need to check the penultimate epc + elif condition_3 or condition_4 or condition_5 or condition_6: pass else: NotImplementedError("Implement me") From 281c6f626c833a482a199ba120e1b0e8b1869cf1 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 26 Feb 2024 23:23:29 +0000 Subject: [PATCH 022/248] working on eligibility --- backend/Property.py | 3 +- etl/eligibility/Eligibility.py | 90 ++++++++-- etl/eligibility/ha_15_32/app.py | 18 +- .../ha_15_32/ha_analysis_batch_3.py | 156 +++++++++--------- 4 files changed, 167 insertions(+), 100 deletions(-) diff --git a/backend/Property.py b/backend/Property.py index 4a55e504..f86e33dc 100644 --- a/backend/Property.py +++ b/backend/Property.py @@ -147,7 +147,8 @@ class Property: # self.base_difference_record.df def adjust_difference_record_with_recommendations( - self, property_recommendations, + self, + property_recommendations, property_representative_recommendations ): """ diff --git a/etl/eligibility/Eligibility.py b/etl/eligibility/Eligibility.py index b09d2df5..bda34923 100644 --- a/etl/eligibility/Eligibility.py +++ b/etl/eligibility/Eligibility.py @@ -145,6 +145,7 @@ class Eligibility: "reason": None, "thickness_classification": thickness_classification } + return # Insulation is already thick enough self.loft = { @@ -164,8 +165,10 @@ class Eligibility: """ is_cavity = self.walls["is_cavity_wall"] - is_empty = (not self.walls["is_filled_cavity"]) or ( + is_empty = (not self.walls["is_filled_cavity"]) + is_as_built = ( self.walls["is_as_built"] and self.walls["insulation_thickness"] not in ["average", "above average"] + and self.walls["is_assumed"] ) is_partial_filled = "partial" in self.walls["clean_description"].lower() # We look for potentially under performing cavities - anything that is assumed, as built and insulated @@ -175,6 +178,7 @@ class Eligibility: is_unfilled_cavity = is_cavity and (is_empty and not is_partial_filled) is_partial_filled_cavity = is_cavity and is_partial_filled + is_assumed_filled_cavity = is_cavity and is_as_built is_underperforming_cavity = is_cavity and is_underperforming # Check if it has internal or external wall insulation @@ -195,6 +199,13 @@ class Eligibility: } return + if is_assumed_filled_cavity: + self.cavity = { + "suitability": True, + "type": "as built assumed", + } + return + if is_partial_filled_cavity: self.cavity = { "suitability": True, @@ -345,7 +356,7 @@ class Eligibility: int(self.epc["current-energy-efficiency"]) <= 68 ) - def check_eco4_warmfront(self, post_retrofit_sap=None): + def check_eco4_warmfront(self): """ This funciton will check if the property is eligible for funding under the ECO4 scheme @@ -377,49 +388,100 @@ class Eligibility: self.cavity_insulation() self.loft_insulation() - # make sure conditions 2 and 3 are true - is_eligible = self.cavity["suitability"] & self.loft["suitability"] - - if current_sap >= 69: + # Case 1: No conditions meet + if not self.cavity["suitability"] and (self.loft["thickness"] > 100) and current_sap >= 55: self.eco4_warmfront = { "eligible": False, - "message": "SAP too high", + "strict": False, + "message": "All conditions fail", "cavity_type": self.cavity["type"], "loft_type": self.loft["thickness_classification"] } return - if not is_eligible and current_sap >= 55: + # Case 2 - perfect match + if (self.cavity["type"] == "empty") and (self.loft["thickness"] <= 100) and (current_sap < 55): self.eco4_warmfront = { - "eligible": False, - "message": "failed fabric and SAP check", + "eligible": True, + "strict": True, + "message": "Perfect suitability", "cavity_type": self.cavity["type"], "loft_type": self.loft["thickness_classification"] } return - if not is_eligible and current_sap < 55: + # Case 2.5 - near perfect match - but we would not recommend this using the model + if self.cavity["suitability"] and (self.loft["thickness"] <= 100) and (current_sap < 55): + self.eco4_warmfront = { + "eligible": True, + "strict": True, + "message": "Perfect suitability", + "cavity_type": self.cavity["type"], + "loft_type": self.loft["thickness_classification"] + } + return + + # Case 3 - cavity is suitable, loft is not, sap is good + if self.cavity["suitability"] and (self.loft["thickness"] > 100) and (current_sap < 55): + self.eco4_warmfront = { + "eligible": True, + "strict": False, + "message": "Meets cavity and sap", + "cavity_type": self.cavity["type"], + "loft_type": self.loft["thickness_classification"] + } + return + + # Case 4 - cavity is not suitable, loft is, sap is not - we say this is not elifible + if not self.cavity["suitability"] and (self.loft["thickness"] <= 100) and (current_sap < 55): self.eco4_warmfront = { "eligible": False, + "strict": False, "message": "failed fabric check", "cavity_type": self.cavity["type"], "loft_type": self.loft["thickness_classification"] } return - if is_eligible and current_sap >= 55: + # Case 5 - cavity and loft suitable, sap too high + if self.cavity["suitability"] and (self.loft["thickness"] <= 100) and (current_sap >= 55): self.eco4_warmfront = { "eligible": True, + "strict": False, "message": "Meets fabric, fails SAP check", "cavity_type": self.cavity["type"], "loft_type": self.loft["thickness_classification"] } return - if is_eligible and current_sap < 55: + # Case 6 - meets just cavity + if self.cavity["suitability"] and (self.loft["thickness"] > 100) and (current_sap >= 55): self.eco4_warmfront = { "eligible": True, - "message": "Meets fabric and SAP check", + "strict": False, + "message": "Meets just cavity", + "cavity_type": self.cavity["type"], + "loft_type": self.loft["thickness_classification"] + } + return + + # Case 7 - fails cavity, loft but meets sap + if not self.cavity["suitability"] and (self.loft["thickness"] > 100) and (current_sap < 55): + self.eco4_warmfront = { + "eligible": False, + "strict": False, + "message": "Fails cavity nd lodt, meets SAP", + "cavity_type": self.cavity["type"], + "loft_type": self.loft["thickness_classification"] + } + return + + # Case 8 - fails cavity, meets loft, fails sap + if not self.cavity["suitability"] and (self.loft["thickness"] <= 100) and (current_sap >= 55): + self.eco4_warmfront = { + "eligible": False, + "strict": False, + "message": "Fails cavity, meets loft, fails SAP", "cavity_type": self.cavity["type"], "loft_type": self.loft["thickness_classification"] } diff --git a/etl/eligibility/ha_15_32/app.py b/etl/eligibility/ha_15_32/app.py index a68bf272..378a0e83 100644 --- a/etl/eligibility/ha_15_32/app.py +++ b/etl/eligibility/ha_15_32/app.py @@ -387,17 +387,19 @@ def prepare_model_data_row( } simulations = [ - [cavity_simulation], - [loft_simulation] + cavity_simulation, + loft_simulation ] - p.adjust_difference_record_with_recommendations(simulations) + recommendation_record = p.base_difference_record.df.to_dict("records")[0].copy() + scoring_dict = p.create_recommendation_scoring_data( + property_id=p.id, + recommendation_record=recommendation_record, + recommendations=simulations, + primary_recommendation_id=cavity_simulation["recommendation_id"] + ) - # Make sure we definitely have the correct data - cavity_scoring = [x for x in p.recommendations_scoring_data if "cavity" in x["id"]][0] - loft_scoring = [x for x in p.recommendations_scoring_data if "loft" in x["id"]][0] - - return [cavity_scoring, loft_scoring] + return [scoring_dict] def get_ha_32data(ha_data, cleaned, cleaning_data, created_at): diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index ecbb4e0a..239fce65 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -1114,7 +1114,7 @@ def get_epc_data( results = [] scoring_data = [] nodata = [] - for index, property_meta in tqdm(asset_list.iterrows(), total=len(asset_list)): + for index, property_meta in tqdm(eco4.iterrows(), total=len(eco4)): if property_meta["matching_postcode"] is None: continue @@ -1226,10 +1226,6 @@ def get_epc_data( # We check the age of the cavity and if it's particularly old, we flag it cavity_age = calculate_cavity_age(newest_epc, older_epcs, cleaned) - # Full checks - eligibility.check_gbis() - eligibility.check_eco4() - if eligibility.eco4_warmfront["eligible"]: if eligibility.epc["uprn"] == "": eligibility.epc["uprn"] = int(property_meta["asset_list_row_id"].split(ha_name)[1]) @@ -1256,8 +1252,8 @@ def get_epc_data( "gbis_eligible": eligibility.gbis_warmfront, "eco4_eligible": eligibility.eco4_warmfront["eligible"], "eco4_message": eligibility.eco4_warmfront["message"], + "eco4_strict": eligibility.eco4_warmfront["strict"], "sap": float(eligibility.epc["current-energy-efficiency"]), - # Property components "roof": eligibility.roof["clean_description"], "walls": eligibility.walls["clean_description"], @@ -1267,91 +1263,97 @@ def get_epc_data( "date_epc": eligibility.epc["lodgement-date"], "loft_thickness": eligibility.roof["insulation_thickness"], "cavity_age": cavity_age, - **eligibility.walls, - **eligibility.roof, "eligibility_cavity_type": eligibility.eco4_warmfront["cavity_type"], "eligibility_loft_type": eligibility.eco4_warmfront["loft_type"] } ) - scoring_df = pd.DataFrame(scoring_data) - scoring_df = scoring_df.drop( - columns=[ - "rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending", - "carbon_ending" - ] - ) - - model_api = ModelApi(portfolio_id="-".join([ha_name, "eligibility"]), timestamp=created_at) - - # scoring_df["is_community"].value_counts() - # scoring_df[scoring_df["is_community"] == "Unknown"] - # property_meta = asset_list[asset_list["asset_list_row_id"] == "ha_67238"].squeeze() - - all_predictions = model_api.predict_all( - df=scoring_df, - bucket="retrofit-data-dev", - prediction_buckets={ - "sap_change_predictions": "retrofit-sap-predictions-dev", - "heat_demand_predictions": "retrofit-heat-predictions-dev", - "carbon_change_predictions": "retrofit-carbon-predictions-dev" - } - ) - results_df = pd.DataFrame(results) + scoring_df = pd.DataFrame(scoring_data) + results_df["post_install_sap"] = None + results_df["eligibility_classification"] = None - predictions = all_predictions["sap_change_predictions"].copy() + eco4 = asset_list[asset_list["ECO Eligibility"] == "eco4"] + z = results_df[results_df["row_id"].isin(eco4["asset_list_row_id"])] + z["walls"].value_counts() + z1 = z[z["walls"] == "Cavity wall, as built, no insulation"] + k = z1[z1["roof"] == "Pitched, 100 mm loft insulation"] + property_meta = asset_list[asset_list["asset_list_row_id"] == k["row_id"].values[0]].squeeze() + z[z["walls"] == "Cavity wall, as built, insulated"]["roof"].value_counts() + z[z["walls"] == "Cavity wall, as built, insulated"]["roof"].value_counts() - predictions = predictions.rename(columns={"property_id": "row_id"}).merge( - results_df[["row_id", "sap"]], how="left", on="row_id" - ) - predictions["sap_uplift"] = predictions["predictions"] - predictions["sap"] - predictions = predictions.groupby("row_id")["sap_uplift"].sum().reset_index() + if not scoring_df.empty: + scoring_df = scoring_df.drop( + columns=[ + "rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending", + "carbon_ending" + ] + ) - results_df = results_df.merge( - predictions[["sap_uplift", "row_id"]], - how="left", - on="row_id" - ) - results_df["post_install_sap"] = results_df["sap"] + results_df["sap_uplift"] + model_api = ModelApi(portfolio_id="-".join([ha_name, "eligibility"]), timestamp=created_at) - eligibility_assessment = [] - for _, row in results_df[results_df["eco4_eligible"] == True].iterrows(): - # The upgrade requirements are dependent on the current SAP - - # If the property is an F or G, it only needs to upgrade to an % - if row["sap"] <= 38: - if row["post_install_sap"] >= 57: - eligibility_classification = "highest confidence" - elif row["post_install_sap"] >= 55: - eligibility_classification = "high confidence" - elif row["post_install_sap"] >= 53: - eligibility_classification = "medium confidence" - else: - eligibility_classification = "unlikely" - else: - - if row["post_install_sap"] >= 71: - eligibility_classification = "highest confidence" - elif row["post_install_sap"] >= 69: - eligibility_classification = "high confidence" - elif row["post_install_sap"] >= 67: - eligibility_classification = "medium confidence" - else: - eligibility_classification = "unlikely" - - eligibility_assessment.append( - { - "row_id": row["row_id"], - "eligibility_classification": eligibility_classification + all_predictions = model_api.predict_all( + df=scoring_df, + bucket="retrofit-data-dev", + prediction_buckets={ + "sap_change_predictions": "retrofit-sap-predictions-dev", + "heat_demand_predictions": "retrofit-heat-predictions-dev", + "carbon_change_predictions": "retrofit-carbon-predictions-dev" } ) - eligibility_assessment = pd.DataFrame(eligibility_assessment) + predictions = all_predictions["sap_change_predictions"].copy() - results_df = results_df.merge( - eligibility_assessment, how="left", on="row_id" - ) + predictions = predictions.rename(columns={"property_id": "row_id"}).merge( + results_df[["row_id", "sap"]], how="left", on="row_id" + ) + predictions["sap_uplift"] = predictions["predictions"] - predictions["sap"] + predictions = predictions.groupby("row_id")["sap_uplift"].sum().reset_index() + + results_df = results_df.merge( + predictions[["sap_uplift", "row_id"]], + how="left", + on="row_id" + ) + results_df["post_install_sap"] = results_df["sap"] + results_df["sap_uplift"] + + eligibility_assessment = [] + for _, row in results_df[results_df["eco4_eligible"] == True].iterrows(): + # The upgrade requirements are dependent on the current SAP + + # If the property is an F or G, it only needs to upgrade to an % + if row["sap"] <= 38: + if row["post_install_sap"] >= 57: + eligibility_classification = "highest confidence" + elif row["post_install_sap"] >= 55: + eligibility_classification = "high confidence" + elif row["post_install_sap"] >= 53: + eligibility_classification = "medium confidence" + else: + eligibility_classification = "unlikely" + else: + + if row["post_install_sap"] >= 71: + eligibility_classification = "highest confidence" + elif row["post_install_sap"] >= 69: + eligibility_classification = "high confidence" + elif row["post_install_sap"] >= 67: + eligibility_classification = "medium confidence" + else: + eligibility_classification = "unlikely" + + eligibility_assessment.append( + { + "row_id": row["row_id"], + "eligibility_classification": eligibility_classification + } + ) + + eligibility_assessment = pd.DataFrame(eligibility_assessment) + + results_df = results_df.merge( + eligibility_assessment, how="left", on="row_id" + ) # We store the results in S3 as a pickle save_pickle_to_s3( From f4d27aa68dea5595037d55e7ad8c54cc9d7967ad Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 26 Feb 2024 23:30:06 +0000 Subject: [PATCH 023/248] fixing eligibility --- etl/eligibility/Eligibility.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/etl/eligibility/Eligibility.py b/etl/eligibility/Eligibility.py index bda34923..15e3158f 100644 --- a/etl/eligibility/Eligibility.py +++ b/etl/eligibility/Eligibility.py @@ -421,8 +421,19 @@ class Eligibility: } return + # Case 3 - cavity is suitable, loft is within 150mm, sap is good + if self.cavity["suitability"] and (self.loft["thickness"] <= 150) and (current_sap < 55): + self.eco4_warmfront = { + "eligible": True, + "strict": False, + "message": "Meets cavity, loft borderline, meets sap", + "cavity_type": self.cavity["type"], + "loft_type": self.loft["thickness_classification"] + } + return + # Case 3 - cavity is suitable, loft is not, sap is good - if self.cavity["suitability"] and (self.loft["thickness"] > 100) and (current_sap < 55): + if self.cavity["suitability"] and (self.loft["thickness"] > 150) and (current_sap < 55): self.eco4_warmfront = { "eligible": True, "strict": False, @@ -444,7 +455,7 @@ class Eligibility: return # Case 5 - cavity and loft suitable, sap too high - if self.cavity["suitability"] and (self.loft["thickness"] <= 100) and (current_sap >= 55): + if self.cavity["suitability"] and (self.loft["thickness"] <= 150) and (current_sap >= 55): self.eco4_warmfront = { "eligible": True, "strict": False, @@ -470,7 +481,7 @@ class Eligibility: self.eco4_warmfront = { "eligible": False, "strict": False, - "message": "Fails cavity nd lodt, meets SAP", + "message": "Fails cavity and loft, meets SAP", "cavity_type": self.cavity["type"], "loft_type": self.loft["thickness_classification"] } From 97ce8dc32ea0edd3d24ecefe942a0eb4e8df418e Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 26 Feb 2024 23:36:45 +0000 Subject: [PATCH 024/248] fixing eligibility --- etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 239fce65..1ba75e2b 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -1114,7 +1114,7 @@ def get_epc_data( results = [] scoring_data = [] nodata = [] - for index, property_meta in tqdm(eco4.iterrows(), total=len(eco4)): + for index, property_meta in tqdm(asset_list.iterrows(), total=len(asset_list)): if property_meta["matching_postcode"] is None: continue @@ -1218,10 +1218,7 @@ def get_epc_data( # Loft MUST be suitable cavity_age = None if ( - eligibility.walls["is_cavity_wall"] and - eligibility.walls["is_filled_cavity"] and - eligibility.loft["suitability"] and - eligibility.eco4_warmfront["message"] == "Failed due to full cavity - check cavity age" + identified_for_eco4 and not eligibility.eco4_warmfront["eligible"] ): # We check the age of the cavity and if it's particularly old, we flag it cavity_age = calculate_cavity_age(newest_epc, older_epcs, cleaned) From 0fbf00451291a09349c0bdeeb67bbc80bd4dc9bc Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 27 Feb 2024 10:20:55 +0000 Subject: [PATCH 025/248] Expanding gbis eligibiity checks --- etl/eligibility/Eligibility.py | 44 +++++++++++++++++-- .../ha_15_32/ha_analysis_batch_3.py | 20 +++++---- etl/epc/Dataset.py | 16 +++---- 3 files changed, 59 insertions(+), 21 deletions(-) diff --git a/etl/eligibility/Eligibility.py b/etl/eligibility/Eligibility.py index 15e3158f..f7a5ed98 100644 --- a/etl/eligibility/Eligibility.py +++ b/etl/eligibility/Eligibility.py @@ -352,9 +352,41 @@ class Eligibility: # Check if the property is suitable for cavity wall self.cavity_insulation() - self.gbis_warmfront = (self.cavity["suitability"]) and ( - int(self.epc["current-energy-efficiency"]) <= 68 - ) + current_sap = int(self.epc["current-energy-efficiency"]) + # We have a strict suitability check and a non-strict check + + # Perfect strictness + if (self.cavity["type"] == "empty") and (current_sap < 69): + self.gbis_warmfront = { + "eligible": True, + "strict": True, + "message": "Perfect suitability", + } + return + + # Near perfect + if self.cavity["suitability"] and (current_sap < 55): + self.gbis_warmfront = { + "eligible": True, + "strict": True, + "message": "Near perfect suitability", + } + return + + # Suitable cavity, but high sap + if self.cavity["suitability"] and (current_sap >= 55): + self.gbis_warmfront = { + "eligible": True, + "strict": False, + "message": "Meets cavity, fails SAP check", + } + return + + self.gbis_warmfront = { + "eligible": False, + "strict": False, + "message": "All conditions fail", + } def check_eco4_warmfront(self): """ @@ -388,6 +420,10 @@ class Eligibility: self.cavity_insulation() self.loft_insulation() + # We put in a placeholder when the roof is not a loft + if self.loft["reason"] == "roof not loft": + self.loft["thickness"] = 999 + # Case 1: No conditions meet if not self.cavity["suitability"] and (self.loft["thickness"] > 100) and current_sap >= 55: self.eco4_warmfront = { @@ -415,7 +451,7 @@ class Eligibility: self.eco4_warmfront = { "eligible": True, "strict": True, - "message": "Perfect suitability", + "message": "Near perfect suitability", "cavity_type": self.cavity["type"], "loft_type": self.loft["thickness_classification"] } diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 1ba75e2b..28efadd0 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -1270,15 +1270,6 @@ def get_epc_data( results_df["post_install_sap"] = None results_df["eligibility_classification"] = None - eco4 = asset_list[asset_list["ECO Eligibility"] == "eco4"] - z = results_df[results_df["row_id"].isin(eco4["asset_list_row_id"])] - z["walls"].value_counts() - z1 = z[z["walls"] == "Cavity wall, as built, no insulation"] - k = z1[z1["roof"] == "Pitched, 100 mm loft insulation"] - property_meta = asset_list[asset_list["asset_list_row_id"] == k["row_id"].values[0]].squeeze() - z[z["walls"] == "Cavity wall, as built, insulated"]["roof"].value_counts() - z[z["walls"] == "Cavity wall, as built, insulated"]["roof"].value_counts() - if not scoring_df.empty: scoring_df = scoring_df.drop( columns=[ @@ -1763,6 +1754,17 @@ def patch_cleaned(cleaned): ] ) + cleaned["roof-description"].extend( + [ + {'original_description': 'Pitched, 300+mm loft insulation', + 'clean_description': 'Pitched, 300+ mm loft insulation', 'thermal_transmittance': None, + 'thermal_transmittance_unit': None, 'is_pitched': True, 'is_roof_room': False, 'is_loft': True, + 'is_flat': False, 'is_thatched': False, 'is_at_rafters': False, 'is_assumed': False, + 'has_dwelling_above': False, 'is_valid': True, 'insulation_thickness': '300+' + } + ] + ) + # Patch mainheatcont-description cleaned["mainheatcont-description"].extend( [ diff --git a/etl/epc/Dataset.py b/etl/epc/Dataset.py index dac829e2..7040d66c 100644 --- a/etl/epc/Dataset.py +++ b/etl/epc/Dataset.py @@ -203,11 +203,11 @@ class TrainingDataset(BaseDataset): common_cols = [[col + "_starting", col + "_ending"] for col in common_cols] self.df = self.df.loc[ - :, - no_suffix_cols - + only_ending_cols - + [col for cols in common_cols for col in cols], - ] + :, + no_suffix_cols + + only_ending_cols + + [col for cols in common_cols for col in cols], + ] def _remove_abnormal_change_in_floor_area(self): """ @@ -509,7 +509,7 @@ class TrainingDataset(BaseDataset): expanded_df["is_sandstone_or_limestone"] == expanded_df["is_sandstone_or_limestone_ending"] ) - ] + ] elif component == "floor": expanded_df = expanded_df[ (expanded_df["is_suspended"] == expanded_df["is_suspended_ending"]) @@ -526,7 +526,7 @@ class TrainingDataset(BaseDataset): expanded_df["is_to_external_air"] == expanded_df["is_to_external_air_ending"] ) - ] + ] elif component == "roof": expanded_df = expanded_df[ (expanded_df["is_pitched"] == expanded_df["is_pitched_ending"]) @@ -539,7 +539,7 @@ class TrainingDataset(BaseDataset): expanded_df["has_dwelling_above"] == expanded_df["has_dwelling_above_ending"] ) - ] + ] return expanded_df From 7b080094fdf08daf720ac01c10bfad380a917062 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 27 Feb 2024 11:02:12 +0000 Subject: [PATCH 026/248] created distributed scoring for prediction --- .../ha_15_32/ha_analysis_batch_3.py | 46 ++++++++++++------- 1 file changed, 30 insertions(+), 16 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 28efadd0..3dc4d45f 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -1166,10 +1166,9 @@ def get_epc_data( ] # condition 1 - identified for gbis and not eligible - condition_1 = ( - identified_for_gbis and not eligibility.gbis_warmfront and not eligibility.eco4_warmfront[ - "eligible"] - ) & consider_penultimate_epc + condition_1 = (identified_for_gbis and not eligibility.gbis_warmfront + and not eligibility.eco4_warmfront["eligible"] + ) & consider_penultimate_epc # condition 2 - identified for eco4 and not eligible condition_2 = (identified_for_eco4 and not eligibility.eco4_warmfront[ @@ -1246,10 +1245,12 @@ def get_epc_data( "uprn": eligibility.epc["uprn"], "is_estimated": searcher.newest_epc.get("estimated") is not None, "property_type": eligibility.epc["property-type"], - "gbis_eligible": eligibility.gbis_warmfront, "eco4_eligible": eligibility.eco4_warmfront["eligible"], "eco4_message": eligibility.eco4_warmfront["message"], "eco4_strict": eligibility.eco4_warmfront["strict"], + "gbis_eligible": eligibility.gbis_warmfront["eligible"], + "gbis_message": eligibility.gbis_warmfront["message"], + "gbis_strict": eligibility.gbis_warmfront["strict"], "sap": float(eligibility.epc["current-energy-efficiency"]), # Property components "roof": eligibility.roof["clean_description"], @@ -1279,24 +1280,32 @@ def get_epc_data( ) model_api = ModelApi(portfolio_id="-".join([ha_name, "eligibility"]), timestamp=created_at) + model_api.MODEL_PREFIXES = ["sap_change_predictions"] - all_predictions = model_api.predict_all( - df=scoring_df, - bucket="retrofit-data-dev", - prediction_buckets={ - "sap_change_predictions": "retrofit-sap-predictions-dev", - "heat_demand_predictions": "retrofit-heat-predictions-dev", - "carbon_change_predictions": "retrofit-carbon-predictions-dev" - } - ) + scoring_df["id"] = scoring_df["id"] + "phase=0" + # We split up the scoring_df and score + predictions = [] + to_loop_over = range(0, scoring_df.shape[0], 400) + for chunk in tqdm(to_loop_over, total=len(to_loop_over)): + predictions_dict = model_api.predict_all( + df=scoring_df.iloc[chunk:chunk + 400], + bucket="retrofit-data-dev", + prediction_buckets={ + "sap_change_predictions": "retrofit-sap-predictions-dev", + } + ) - predictions = all_predictions["sap_change_predictions"].copy() + predictions.append(predictions_dict["sap_change_predictions"]) + + predictions = pd.concat(predictions) + predictions_size = predictions.shape[0] predictions = predictions.rename(columns={"property_id": "row_id"}).merge( results_df[["row_id", "sap"]], how="left", on="row_id" ) + if predictions.shape[0] != predictions_size: + raise ValueError("Predictions size has changed") predictions["sap_uplift"] = predictions["predictions"] - predictions["sap"] - predictions = predictions.groupby("row_id")["sap_uplift"].sum().reset_index() results_df = results_df.merge( predictions[["sap_uplift", "row_id"]], @@ -1339,9 +1348,12 @@ def get_epc_data( eligibility_assessment = pd.DataFrame(eligibility_assessment) + # Make sure the results haven't changed in size results_df = results_df.merge( eligibility_assessment, how="left", on="row_id" ) + if results_df.shape[0] != len(results): + raise ValueError("results has changed size") # We store the results in S3 as a pickle save_pickle_to_s3( @@ -1809,6 +1821,8 @@ def app(): loader.load() loader.ha_facts_and_figures() + loader.facts_and_figures.to_csv("facts_and_figures.csv", index=False) + # We load in the additional data required to perform the analysis cleaned = read_from_s3( s3_file_name="cleaned_epc_data/cleaned.bson", From 3ef346b248ed89e04a08d07a0231db987809521b Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 27 Feb 2024 13:12:54 +0000 Subject: [PATCH 027/248] patching roof description in cleaned further --- .../ha_15_32/ha_analysis_batch_3.py | 60 ++++++++++++++++++- etl/epc/Dataset.py | 28 +++++++++ 2 files changed, 87 insertions(+), 1 deletion(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 3dc4d45f..e261710e 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -1755,7 +1755,16 @@ def patch_cleaned(cleaned): ] ) - # We treat unknown loft insulation as no insulation + cleaned["roof-description"].extend( + [ + {'original_description': 'Pitched, Unknown loft insulation', 'clean_description': 'Pitched, no insulation', + 'thermal_transmittance': None, 'thermal_transmittance_unit': None, 'is_pitched': True, + 'is_roof_room': False, + 'is_loft': False, 'is_flat': False, 'is_thatched': False, 'is_at_rafters': False, 'is_assumed': True, + 'has_dwelling_above': False, 'is_valid': True, 'insulation_thickness': 'none'} + ] + ) + cleaned["roof-description"].extend( [ {'original_description': 'Pitched, Unknown loft insulation', 'clean_description': 'Pitched, no insulation', @@ -1777,6 +1786,55 @@ def patch_cleaned(cleaned): ] ) + thermal_transmittance_values = list(np.arange(0, 2, 0.01)) + for ttv in thermal_transmittance_values: + ttv_roundeded = round(ttv, 2) + # We look for an instance of that thermal transmittance value + rec = [ + x for x in cleaned["roof-description"] if + (x["thermal_transmittance"] == ttv_roundeded) and "Average thermal transmittance" in x["clean_description"] + ] + + if rec: + continue + else: + # We patch the record + cleaned["roof-description"].extend( + [{'original_description': f'Average thermal transmittance {ttv_roundeded} W/m-¦K', + 'clean_description': f'Average thermal transmittance {ttv_roundeded} w/m-¦k', + 'thermal_transmittance': ttv_roundeded, + 'thermal_transmittance_unit': 'w/m-¦k', 'is_pitched': False, 'is_roof_room': False, 'is_loft': False, + 'is_flat': False, 'is_thatched': False, 'is_at_rafters': False, 'is_assumed': False, + 'has_dwelling_above': False, 'is_valid': True, 'insulation_thickness': None}] + ) + + # We also patch a funny unit value we found + for ttv in thermal_transmittance_values: + ttv_rounded = round(ttv, 2) + # We look for an instance of that thermal transmittance value + rec = [ + x for x in cleaned["roof-description"] if + (x["thermal_transmittance"] == ttv_rounded) and "Average thermal transmittance" in x["clean_description"] + and x["thermal_transmittance_unit"] == "w/m?K" + ] + + if rec: + continue + else: + # We patch the record + ttv_string = str(ttv_rounded) + if len(ttv_string) == 3: + ttv_string = f"{ttv_string}0" + + cleaned["roof-description"].extend( + [{'original_description': f'Average thermal transmittance {ttv_string} W/m?K', + 'clean_description': f'Average thermal transmittance {ttv_string} w/m-¦k', + 'thermal_transmittance': ttv_rounded, + 'thermal_transmittance_unit': 'w/m-¦k', 'is_pitched': False, 'is_roof_room': False, 'is_loft': False, + 'is_flat': False, 'is_thatched': False, 'is_at_rafters': False, 'is_assumed': False, + 'has_dwelling_above': False, 'is_valid': True, 'insulation_thickness': None}] + ) + # Patch mainheatcont-description cleaned["mainheatcont-description"].extend( [ diff --git a/etl/epc/Dataset.py b/etl/epc/Dataset.py index 7040d66c..cf241747 100644 --- a/etl/epc/Dataset.py +++ b/etl/epc/Dataset.py @@ -658,6 +658,34 @@ class TrainingDataset(BaseDataset): components_to_expand = cols_to_drop.keys() + for comp in list(components_to_expand): + if comp == "main-fuel": + cleaned_key = "main-fuel" + left_on_starting = "main_fuel_starting" + left_on_ending = "main_fuel_ending" + original_cols = ["main_fuel_starting", "main_fuel_ending"] + else: + cleaned_key = f"{comp}-description" + left_on_starting = f"{comp}_description_starting" + left_on_ending = f"{comp}_description_ending" + original_cols = [ + f"{comp}_description_starting", + f"{comp}_description_ending", + ] + df = pd.DataFrame(cleaned_lookup[cleaned_key]) + # Check for the existence + filtered_1 = df[df["original_description"] == self.df[left_on_starting].values[0]] + filtered_2 = df[df["original_description"] == self.df[left_on_ending].values[0]] + if filtered_1.empty: + print(comp) + print(self.df[left_on_starting].values[0]) + + if filtered_2.empty: + print(f"Original description {self.df[left_on_ending].values[0]} not found in lookup") + + z = pd.DataFrame(cleaned_lookup["roof-description"]) + z[z["original_description"] == "Average thermal transmittance 0.20 W/m?K"] + for component in components_to_expand: # TODO: change cleaned dataframe to have underscores instead of dashes if component == "main-fuel": From 730ad0fd7144b2b5e86d98b8c3ef4e5d71ccd0cb Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 27 Feb 2024 13:13:28 +0000 Subject: [PATCH 028/248] removing temp code --- etl/epc/Dataset.py | 28 ---------------------------- 1 file changed, 28 deletions(-) diff --git a/etl/epc/Dataset.py b/etl/epc/Dataset.py index cf241747..7040d66c 100644 --- a/etl/epc/Dataset.py +++ b/etl/epc/Dataset.py @@ -658,34 +658,6 @@ class TrainingDataset(BaseDataset): components_to_expand = cols_to_drop.keys() - for comp in list(components_to_expand): - if comp == "main-fuel": - cleaned_key = "main-fuel" - left_on_starting = "main_fuel_starting" - left_on_ending = "main_fuel_ending" - original_cols = ["main_fuel_starting", "main_fuel_ending"] - else: - cleaned_key = f"{comp}-description" - left_on_starting = f"{comp}_description_starting" - left_on_ending = f"{comp}_description_ending" - original_cols = [ - f"{comp}_description_starting", - f"{comp}_description_ending", - ] - df = pd.DataFrame(cleaned_lookup[cleaned_key]) - # Check for the existence - filtered_1 = df[df["original_description"] == self.df[left_on_starting].values[0]] - filtered_2 = df[df["original_description"] == self.df[left_on_ending].values[0]] - if filtered_1.empty: - print(comp) - print(self.df[left_on_starting].values[0]) - - if filtered_2.empty: - print(f"Original description {self.df[left_on_ending].values[0]} not found in lookup") - - z = pd.DataFrame(cleaned_lookup["roof-description"]) - z[z["original_description"] == "Average thermal transmittance 0.20 W/m?K"] - for component in components_to_expand: # TODO: change cleaned dataframe to have underscores instead of dashes if component == "main-fuel": From d573c4d8a0ae911edd0e2f181eceb4087e3e78e4 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 27 Feb 2024 15:15:05 +0000 Subject: [PATCH 029/248] added try except mechanism --- .../ha_15_32/ha_analysis_batch_3.py | 35 ++++++++++++------- etl/epc/Record.py | 32 ++++++++--------- 2 files changed, 38 insertions(+), 29 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index e261710e..da484daa 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -1089,6 +1089,9 @@ def get_epc_data( outputs = {} for ha_name, data_assets in loader.data.items(): + if ha_name == "HA39": + continue + if not pull_data: # Then we retrieve the data from S3 processed_ha_results = read_pickle_from_s3( @@ -1114,6 +1117,7 @@ def get_epc_data( results = [] scoring_data = [] nodata = [] + failed_model_rows = [] for index, property_meta in tqdm(asset_list.iterrows(), total=len(asset_list)): if property_meta["matching_postcode"] is None: @@ -1225,19 +1229,24 @@ def get_epc_data( if eligibility.eco4_warmfront["eligible"]: if eligibility.epc["uprn"] == "": eligibility.epc["uprn"] = int(property_meta["asset_list_row_id"].split(ha_name)[1]) - - scoring_dictionary = prepare_model_data_row( - property_id=property_meta["asset_list_row_id"], - modelling_epc=eligibility.epc, - cleaned=cleaned, - cleaning_data=cleaning_data, - created_at=created_at, - old_data=older_epcs, - full_sap_epc=full_sap_epc, - photo_supply_lookup=photo_supply_lookup, - floor_area_decile_thresholds=floor_area_decile_thresholds - ) - scoring_data.extend(scoring_dictionary) + try: + scoring_dictionary = prepare_model_data_row( + property_id=property_meta["asset_list_row_id"], + modelling_epc=eligibility.epc, + cleaned=cleaned, + cleaning_data=cleaning_data, + created_at=created_at, + old_data=older_epcs, + full_sap_epc=full_sap_epc, + photo_supply_lookup=photo_supply_lookup, + floor_area_decile_thresholds=floor_area_decile_thresholds + ) + scoring_data.extend(scoring_dictionary) + except Exception as e: + # If we fail, we just keep a record of it + failed_model_rows.append( + property_meta["asset_list_row_id"] + ) results.append( { diff --git a/etl/epc/Record.py b/etl/epc/Record.py index c793716f..e74330a2 100644 --- a/etl/epc/Record.py +++ b/etl/epc/Record.py @@ -725,26 +725,26 @@ class EPCRecord: if self.prepared_epc["construction-age-band"] in DATA_ANOMALY_MATCHES: if self.old_data: # Take the most recent - max_datetime = max( - [ - old_record["lodgement-datetime"] - for old_record in self.old_data - if old_record["construction-age-band"] - not in DATA_ANOMALY_MATCHES - ] - ) - - most_recent = [ - old_record + old_age_bands = [ + old_record["lodgement-datetime"] for old_record in self.old_data - if old_record["lodgement-datetime"] == max_datetime + if old_record["construction-age-band"] not in DATA_ANOMALY_MATCHES ] - self.prepared_epc["construction-age-band"] = ( - EPCDataProcessor.clean_construction_age_band( - most_recent[0]["construction-age-band"] + if old_age_bands: + max_datetime = max(old_age_bands) + + most_recent = [ + old_record + for old_record in self.old_data + if old_record["lodgement-datetime"] == max_datetime + ] + + self.prepared_epc["construction-age-band"] = ( + EPCDataProcessor.clean_construction_age_band( + most_recent[0]["construction-age-band"] + ) ) - ) self.construction_age_band = self.prepared_epc["construction-age-band"] self.age_band = england_wales_age_band_lookup.get(self.construction_age_band) From b26e44b465e5c832a65b5bd09767f1015c2dfc1a Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 27 Feb 2024 15:45:33 +0000 Subject: [PATCH 030/248] Extending to HA 7 --- .../ha_15_32/ha_analysis_batch_3.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index da484daa..2fb26e73 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -48,6 +48,10 @@ PROPERTY_TYPE_LOOKUP = { 'EXTRACARE SCHEME': "Flat", } }, + "HA7": { + "property_type": {}, + "built_form": {} + }, "HA14": { "property_type": { "House": "House", @@ -143,6 +147,13 @@ class DataLoader: asset_list["matching_postcode"] = asset_list[ self.COLUMN_CONFIG[ha_name]["postcode"] ].str.lower().str.strip() + elif ha_name == "HA7": + # Create matching_address by concatenating Address 1, Address 2, Address 3, Address 4, Postcode + asset_list["matching_address"] = asset_list["Address"].str.lower().str.strip() + ", " + \ + asset_list["Address2"].str.lower().str.strip() + ", " + \ + asset_list["Address3"].str.lower().str.strip() + ", " + \ + asset_list["Postcode"].str.lower().str.strip() + asset_list["matching_postcode"] = asset_list["Postcode"].str.lower().str.strip() elif ha_name == "HA14": # Create matching_address by concatenating Address 1, Address 2, Address 3, Address 4, Postcode asset_list["matching_address"] = asset_list["Address 1"].str.lower().str.strip() + ", " + \ @@ -241,6 +252,8 @@ class DataLoader: def get_asset_sheetname(workbook): if "Asset List" in workbook.sheetnames: return "Asset List" + elif "Asset" in workbook.sheetnames and "Assets" not in workbook.sheetnames: + return "Asset" else: return "Assets" @@ -311,6 +324,8 @@ class DataLoader: survey_list = pd.DataFrame(survey_rows, columns=[cell.value for cell in survey_sheet[1]]) # Remove columns that are None survey_list = survey_list.loc[:, survey_list.columns.notnull()] + # Remove rows that are completely empty + survey_list = survey_list.loc[survey_list.loc[:, survey_list.columns].notnull().any(axis=1)] survey_list["survey_list_row_id"] = [ha_name + "_survey_" + str(i) for i in range(0, len(survey_list))] # Perform survey list merge @@ -328,6 +343,8 @@ class DataLoader: ciga_list = pd.DataFrame(ciga_rows, columns=[cell.value for cell in ciga_sheet[1]]) # Remove columns that are None ciga_list = ciga_list.loc[:, ciga_list.columns.notnull()] + # Remove rows that are completely None + ciga_list = ciga_list.loc[ciga_list.loc[:, ciga_list.columns].notnull().any(axis=1)] # Perform ciga list merge if not ciga_list.empty: # Remove rows with missing postcode which happens in a small number of cases @@ -1880,7 +1897,7 @@ def app(): # Grab the December HA figures filepath december_figures_filepath = "local_data/ha_data/HA_December_figures.csv" - priority_has = ["HA1", "HA6", "HA14", "HA39", "HA107"] + priority_has = ["HA1", "HA6", "HA7", "HA14", "HA39", "HA107"] # Filter down the directories to only the priority HAs directories = [d for d in directories if d.split("/")[2] in priority_has] From eb216e55d39817a6d7bdd6c582c6da6826050ac9 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 27 Feb 2024 16:45:37 +0000 Subject: [PATCH 031/248] Handling missing dates in SearchEpc class --- backend/SearchEpc.py | 15 ++++++++++----- etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 1 + 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/backend/SearchEpc.py b/backend/SearchEpc.py index 4a3f371a..3d2df9fb 100644 --- a/backend/SearchEpc.py +++ b/backend/SearchEpc.py @@ -30,7 +30,7 @@ vartypes = { 'environment-impact-potential': "Int64", 'glazed-type': 'str', 'heating-cost-current': 'float', - 'address3': 'str', + # 'address3': 'str', 'mainheatcont-description': 'str', 'sheating-energy-eff': 'str', 'property-type': 'str', @@ -40,7 +40,7 @@ vartypes = { 'mechanical-ventilation': 'str', 'hot-water-cost-current': 'str', 'county': 'str', - 'postcode': 'str', + # 'postcode': 'str', 'solar-water-heating-flag': 'str', 'constituency': 'str', 'co2-emissions-potential': 'float', @@ -55,7 +55,7 @@ vartypes = { # 'inspection-date': str, 'mains-gas-flag': 'str', 'co2-emiss-curr-per-floor-area': 'float', - 'address1': 'str', + # 'address1': 'str', 'heat-loss-corridor': 'str', 'flat-storey-count': "Int64", 'constituency-label': 'str', @@ -67,7 +67,7 @@ vartypes = { 'roof-description': 'str', 'floor-energy-eff': 'str', 'number-habitable-rooms': 'float', - 'address2': 'str', + # 'address2': 'str', 'hot-water-env-eff': 'str', 'posttown': 'str', 'mainheatc-energy-eff': 'str', @@ -98,7 +98,7 @@ vartypes = { # 'lodgement-date', 'extension-count': "Int64", 'mainheatc-env-eff': 'str', - 'lmk-key': 'str', + # 'lmk-key': 'str', 'wind-turbine-count': "Int64", 'tenure': 'str', 'floor-level': 'str', @@ -575,6 +575,11 @@ class SearchEpc: property_type=property_type ) + # If we have missing lodgment date, we fill it with inspection-date + epc_data["lodgement-datetime"] = epc_data["lodgement-datetime"].fillna(epc_data["inspection-date"]) + # If we still have missing dates, we set it to the mean of the non NA dates + epc_data["lodgement-datetime"] = epc_data["lodgement-datetime"].fillna(epc_data["lodgement-datetime"].mean()) + # For each attribute, we need to determine the datatype and use an appropriate method # to estimate. estimated_epc = {} diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 2fb26e73..a8f0bfa9 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -1135,6 +1135,7 @@ def get_epc_data( scoring_data = [] nodata = [] failed_model_rows = [] + # Failed at index 13691 for index, property_meta in tqdm(asset_list.iterrows(), total=len(asset_list)): if property_meta["matching_postcode"] is None: From 2a4d16162abc8bcda788950d44a0762148e8904d Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 27 Feb 2024 18:01:29 +0000 Subject: [PATCH 032/248] Added ha7 --- .../ha_15_32/ha_analysis_batch_3.py | 24 ++++++++++++------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index a8f0bfa9..889ae776 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -49,8 +49,19 @@ PROPERTY_TYPE_LOOKUP = { } }, "HA7": { - "property_type": {}, - "built_form": {} + "property_type": { + "House": "House", + "Flat": "Flat", + "Bungalow": "Bungalow", + "Maisonette": "Maisonette", + }, + "built_form": { + "Semi Detached": "Semi-Detached", + "Mid Terrace": "Mid-Terrace", + "End Terrace": "End-Terrace", + "Detached": "Detached", + "End Terraced": "End-Terrace", + } }, "HA14": { "property_type": { @@ -1042,6 +1053,9 @@ def get_property_type_and_built_form(property_meta, ha_name): elif ha_name == "HA6": property_type = PROPERTY_TYPE_LOOKUP[ha_name]["property_type"][property_meta["Dwelling type"]] built_form = property_meta["built_form"] + elif ha_name == "HA7": + property_type = PROPERTY_TYPE_LOOKUP[ha_name]["property_type"][property_meta["Archetype"]] + built_form = PROPERTY_TYPE_LOOKUP[ha_name]["built_form"][property_meta["Property Type"]] elif ha_name == "HA14": if property_meta["Asset Type Description"] == "Block - Repair": # We try and deduce if it's a flat or house, depending on if it has "room" or "flats" in the address @@ -1106,9 +1120,6 @@ def get_epc_data( outputs = {} for ha_name, data_assets in loader.data.items(): - if ha_name == "HA39": - continue - if not pull_data: # Then we retrieve the data from S3 processed_ha_results = read_pickle_from_s3( @@ -1135,7 +1146,6 @@ def get_epc_data( scoring_data = [] nodata = [] failed_model_rows = [] - # Failed at index 13691 for index, property_meta in tqdm(asset_list.iterrows(), total=len(asset_list)): if property_meta["matching_postcode"] is None: @@ -1906,8 +1916,6 @@ def app(): loader.load() loader.ha_facts_and_figures() - loader.facts_and_figures.to_csv("facts_and_figures.csv", index=False) - # We load in the additional data required to perform the analysis cleaned = read_from_s3( s3_file_name="cleaned_epc_data/cleaned.bson", From 9ca6c179bca70cfffd34da4e278e144ff8263e24 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 27 Feb 2024 18:34:49 +0000 Subject: [PATCH 033/248] Adding HA16 --- .../ha_15_32/ha_analysis_batch_3.py | 139 +++++++++++++++++- 1 file changed, 135 insertions(+), 4 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 889ae776..a707cfa5 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -128,6 +128,10 @@ class DataLoader: "HA6": { "address": "propertyaddress", "postcode": "address" # The 'address' column actually contains postcode + }, + "HA16": { + "address": "Address", + "postcode": "Postcode" } } @@ -135,9 +139,10 @@ class DataLoader: # We expect 4 unmatched addresses, which have been validated manually as being in the ciga file but not # the asset list "HA14": 3, + "HA16": 7, # There's just too many unmatched here "HA6": 117, - "HA107": 51 + "HA107": 51, } def __init__(self, directories, december_figures_filepath, use_cache): @@ -151,7 +156,7 @@ class DataLoader: def create_asset_list_matching_address(self, ha_name, asset_list): - if ha_name in ["HA1", "HA6"]: + if ha_name in ["HA1", "HA6", "HA16"]: asset_list["matching_address"] = asset_list[ self.COLUMN_CONFIG[ha_name]["address"] ].str.lower().str.strip() @@ -173,6 +178,7 @@ class DataLoader: asset_list["Address 4"].str.lower().str.strip() + ", " + \ asset_list["Postcode"].str.lower().str.strip() asset_list["matching_postcode"] = asset_list["Postcode"].str.lower().str.strip() + elif ha_name == "HA39": # Create matching_address by concatenating add_1, add_2, add_3, add_4, add_5, post_code asset_list["matching_address"] = asset_list["add_1"].astype(str).str.lower().str.strip() + ", " + \ @@ -234,7 +240,7 @@ class DataLoader: :return: """ - if ha_name in ["HA6", "HA14", "HA107"]: + if ha_name in ["HA6", "HA14", "HA107", "HA16"]: split_addresses = ciga_list['Matched Address'].str.split(',', expand=True) house_numbers = split_addresses[0].str.split(' ', expand=True) # THe first column should be HouseNo - we aren't interested in the other columns, but we don't know how @@ -556,6 +562,129 @@ class DataLoader: return survey_list + @staticmethod + def correct_ha16_survey_list(survey_list): + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("/", ", ") + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.lower() + survey_list["Street / Block Name"] = np.where( + survey_list["Street / Block Name"] == "REEDS RD", + "Reeds ROAD", + survey_list["Street / Block Name"] + ) + # Replace " rd " with "road" + survey_list['Street / Block Name'] = survey_list['Street / Block Name'].str.replace(r'\brd\b', 'road', + regex=True) + + # Replace " , " with ", " + survey_list['Street / Block Name'] = survey_list['Street / Block Name'].str.replace( + " , ", ', ', + ) + # Fix "{place} ,{place}" with "{place}, {place}" + survey_list['Street / Block Name'] = survey_list['Street / Block Name'].str.replace(r'\s*,\s*', ', ', + regex=True) + # Strip whitespace + survey_list['Street / Block Name'] = survey_list['Street / Block Name'].str.strip() + + # Correct errors + survey_list["Post Code"] = np.where( + survey_list["Post Code"] == "M38 0SA", + "M38 9SA", + survey_list["Post Code"] + ) + + survey_list["Post Code"] = np.where( + (survey_list["Street / Block Name"] == "nelson drive") & (survey_list["Post Code"] == "M44 5JE"), + "M44 5JF", + survey_list["Post Code"] + ) + + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("eccels", "eccles") + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("chatley, road", + "chatley road") + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("vaughen", "Vaughan") + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("cresent", "crescent") + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("plantation road", + "plantation avenue") + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("how clough drive", + "howclough drive") + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("brockhurst lane", + "brookhurst lane") + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("biirch road", + "birch road") + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("hadson road", + "hodson road") + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("harbonne avennue", + "narbonne avenue") + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "cumberland road, cadishead", + "cumberland avenue, cadishead") + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("aston field drive", + "ashton field drive") + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("wedgewood road", + "wedgwood road") + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("hamilton close", + "hamilton avenue") + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "lichens crescent, fitton hill", + "lichens crescent") + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("south croft, fitton hill", + "south croft") + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(", fitton hill", "") + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("firtree dr", + "fir tree avenue") + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("hawthorne road", + "hawthorn crescent") + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("rein lee avenue", + "reins lee avenue") + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("westerhill road", + "wester hill road") + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("st martins road", + "saint martins road") + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("timperley avenue", + "timperley close") + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("eastwood road", + "eastwood avenue") + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("new road", "new street") + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("grassmere road", + "grasmere road") + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("hulton road", + "hulton avenue") + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("beechfield avenue", + "beechfield road") + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("princess avenue", + "princes avenue") + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("edge ford crecent", + "edge fold crescent") + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("conniston avenue", + "coniston avenue") + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("blackthorne crescent", + "blackthorn crescent") + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("wellstock road", + "wellstock lane") + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("brackley avenue", + "brackley street") + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("brook avenue swinton", + "brook avenue, swinton") + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("green avenue swinton", + "green avenue, swinton") + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("grasmere avenue wardley", + "grasmere avenue, wardley") + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("mardale avenue wardle", + "mardale avenue, wardle") + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("carleach grove", + "cartleach Grove") + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("arbour grove", + "arbor Grove") + + # Replacement for clively avenue 66-68 + survey_list["NO."] = np.where( + survey_list["NO."] == "66-68", + "66", + survey_list["NO."] + ) + + return survey_list + @staticmethod def correct_ha107_survey_list(survey_list): # Replace Front Street, East Stockham with Front Street, East Stockwith @@ -898,6 +1027,8 @@ class DataLoader: scheme_map = { "ECO4": "ECO4", "AFFORDABLE WARMTH": "ECO4", + "ECO4 A/W": "ECO4", + "ECO4 GBIS (ECO+)": "GBIS" } eco_eligibility_map = { @@ -1908,7 +2039,7 @@ def app(): # Grab the December HA figures filepath december_figures_filepath = "local_data/ha_data/HA_December_figures.csv" - priority_has = ["HA1", "HA6", "HA7", "HA14", "HA39", "HA107"] + priority_has = ["HA1", "HA6", "HA7", "HA14", "HA16", "HA39", "HA107"] # Filter down the directories to only the priority HAs directories = [d for d in directories if d.split("/")[2] in priority_has] From 102600b19651964c4b6c7945307a8defd454f9d1 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 27 Feb 2024 18:40:17 +0000 Subject: [PATCH 034/248] Added HA16 --- .../ha_15_32/ha_analysis_batch_3.py | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index a707cfa5..ee23f238 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -71,6 +71,24 @@ PROPERTY_TYPE_LOOKUP = { "Maisonette": "Maisonette", } }, + "HA16": { + 'Semi Detached Bungalow': {"property-type": "Bungalow", "built-form": "Semi-Detached"}, + 'Mid Terraced House': {"property-type": "House", "built-form": "Mid-Terrace"}, + 'End Terraced House': {"property-type": "House", "built-form": "End-Terrace"}, + 'Low Rise Flat': {"property-type": "Flat", "built-form": "Mid-Terrace"}, + 'Semi-Detached House': {"property-type": "House", "built-form": "Semi-Detached"}, + 'Detached Bungalow': {"property-type": "Bungalow", "built-form": "Detached"}, + 'End Terraced Bungalow': {"property-type": "Bungalow", "built-form": "End-Terrace"}, + 'Mid Terraced Bungalow': {"property-type": "Bungalow", "built-form": "Mid-Terrace"}, + 'Medium Rise Flat': {"property-type": "Flat", "built-form": "Mid-Terrace"}, + 'Detached House': {"property-type": "House", "built-form": "Detached"}, + 'Cottage Flat': {"property-type": "Flat", "built-form": "Semi-Detached"}, + 'Maisonette Medium Rise': {"property-type": "Flat", "built-form": "Mid-Terrace"}, + 'Maisonette Over Shop': {"property-type": "Flat", "built-form": "Mid-Terrace"}, + 'End Terraced Town House': {"property-type": "House", "built-form": "End-Terrace"}, + 'Flat Over Shop': {"property-type": "Flat", "built-form": "Mid-Terrace"}, + 'Mid Terraced Town House': {"property-type": "House", "built-form": "Mid-Terrace"}, + }, "HA39": { "Semi house": {"property_type": "House", "built_form": "Semi-Detached"}, "1st floor flat": {"property_type": "Flat", "built_form": None}, @@ -1201,6 +1219,10 @@ def get_property_type_and_built_form(property_meta, ha_name): ] built_form = None + elif ha_name == "HA16": + config = PROPERTY_TYPE_LOOKUP[ha_name][property_meta["Type"]] + property_type = config.get("property-type") + built_form = config.get("built-form") elif ha_name == "HA39": property_type_config = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["ConstructionStyle"], {}) From a1c19b5b8883ead263880c2d589bd76da76d6403 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 27 Feb 2024 19:01:32 +0000 Subject: [PATCH 035/248] Adding ha24 wip --- .../ha_15_32/ha_analysis_batch_3.py | 47 ++++++++++++++++++- 1 file changed, 45 insertions(+), 2 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index ee23f238..94df8ceb 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -150,6 +150,10 @@ class DataLoader: "HA16": { "address": "Address", "postcode": "Postcode" + }, + "HA24": { + "address": "Address", + "postcode": "Postcode" } } @@ -174,7 +178,7 @@ class DataLoader: def create_asset_list_matching_address(self, ha_name, asset_list): - if ha_name in ["HA1", "HA6", "HA16"]: + if ha_name in ["HA1", "HA6", "HA16", "HA24"]: asset_list["matching_address"] = asset_list[ self.COLUMN_CONFIG[ha_name]["address"] ].str.lower().str.strip() @@ -289,6 +293,8 @@ class DataLoader: return "Asset List" elif "Asset" in workbook.sheetnames and "Assets" not in workbook.sheetnames: return "Asset" + elif "Decent Homes Stock" in workbook.sheetnames: + return "Decent Homes Stock" else: return "Assets" @@ -703,6 +709,43 @@ class DataLoader: return survey_list + @staticmethod + def correct_ha24_survey_list(survey_list): + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("/", ", ") + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.lower() + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.strip() + + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "council house, nidds lane", "nidds lane" + ) + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "wirral avenue", "wirrall avenue" + ) + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "st ives road", "st. ives crescent" + ) + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "sundringham road", "sandringham road" + ) + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "milton avenue", "milton road" + ) + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "st ives crescent", "st. ives crescent" + ) + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "council house, waterbelly lane", "waterbelly lane" + ) + # Generally remove "councile house, " from the start of the street name + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "council house, ", "" + ) + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "st. leodegars close", "st leodegars close" + ) + + return survey_list + @staticmethod def correct_ha107_survey_list(survey_list): # Replace Front Street, East Stockham with Front Street, East Stockwith @@ -2061,7 +2104,7 @@ def app(): # Grab the December HA figures filepath december_figures_filepath = "local_data/ha_data/HA_December_figures.csv" - priority_has = ["HA1", "HA6", "HA7", "HA14", "HA16", "HA39", "HA107"] + priority_has = ["HA1", "HA6", "HA7", "HA14", "HA16", "HA24", "HA39", "HA107"] # Filter down the directories to only the priority HAs directories = [d for d in directories if d.split("/")[2] in priority_has] From e9bfd63c3588206cd9e7c79b25c6067b617bf436 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 27 Feb 2024 21:00:23 +0000 Subject: [PATCH 036/248] Fixed getting property type and built form for ha107 --- .../ha_15_32/ha_analysis_batch_3.py | 77 ++++++++++++++----- 1 file changed, 57 insertions(+), 20 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 94df8ceb..5cbfb90c 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -223,12 +223,67 @@ class DataLoader: return asset_list + @staticmethod + def extract_property_info_ha107(properties): + property_types = { + "House": "House", + "Flat": "Flat", + "Bungalow": "Bungalow", + "Maisonette": "Maisonette", + "Bedsit": None + } + + built_forms = { + "Detached": "Detached", + "Semi Detached": "Semi-Detached", + "End Terrace": "End-Terrace", + "Mid Terrace": "Mid-Terrace" + } + + # Function to extract property type and built form from a description + def extract_from_description(description): + property_type = None + built_form = None + + for key in property_types: + if key in description: + property_type = property_types[key] + break + + for key in built_forms: + if key in description: + built_form = built_forms[key] + break + + return property_type, built_form + + # Process each property in the list + results = [] + for property_description in properties: + property_type, built_form = extract_from_description(property_description) + results.append( + { + "Property type": property_description, + "property_type": property_type, + "built_form": built_form + } + ) + results = pd.DataFrame(results) + + return results + def append_asset_list_built_form(self, ha_name, asset_list): # Finally, we process property_type or built form, where needed if ha_name == "HA6": asset_list["built_form"] = asset_list["Property Type"].apply(self.identify_built_form_ha6) + if ha_name == "HA107": + mapped_df = self.extract_property_info_ha107(asset_list["Property type"].unique()) + asset_list = asset_list.merge( + mapped_df, how="left", on="Property type" + ) + return asset_list @staticmethod @@ -1280,26 +1335,8 @@ def get_property_type_and_built_form(property_meta, ha_name): property_type = "House" elif ha_name == "HA107": - dwelling_style = property_meta["Dwelling Style"] - if isinstance(dwelling_style, str): - dwelling_style = dwelling_style.strip() - - property_type = PROPERTY_TYPE_LOOKUP[ha_name]["property_type"].get(property_meta["DwellingType"]) - built_form = PROPERTY_TYPE_LOOKUP[ha_name]["built_form"].get(dwelling_style, None) - - if property_type is None: - if built_form in ["Semi-Detached", "Mid-Terrace", "End-Terrace", "Detached"]: - property_type = "House" - - if "flat" in property_meta["Wall Construction"].lower(): - property_type = "Flat" - - if (property_meta["DwellingType"] == "UNKNOWN") & (property_meta["Dwelling Style"] == 0): - # Hand a few specific cases - property_type = "Bungalow" - - if property_meta["Street"] == "School View": - property_type = "Bungalow" + property_type = property_meta.get("property_type", None) + built_form = property_meta.get("built_form", None) else: raise NotImplementedError("Implement me") From 6ae21bbcb023139961eb69749ac1380a7d3ac001 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 28 Feb 2024 12:31:48 +0000 Subject: [PATCH 037/248] Creating the output structure --- etl/eligibility/Eligibility.py | 11 +- .../ha_15_32/ha_analysis_batch_3.py | 548 +++++++----------- 2 files changed, 220 insertions(+), 339 deletions(-) diff --git a/etl/eligibility/Eligibility.py b/etl/eligibility/Eligibility.py index f7a5ed98..b594579f 100644 --- a/etl/eligibility/Eligibility.py +++ b/etl/eligibility/Eligibility.py @@ -365,7 +365,7 @@ class Eligibility: return # Near perfect - if self.cavity["suitability"] and (current_sap < 55): + if self.cavity["suitability"] and (current_sap < 69): self.gbis_warmfront = { "eligible": True, "strict": True, @@ -373,15 +373,6 @@ class Eligibility: } return - # Suitable cavity, but high sap - if self.cavity["suitability"] and (current_sap >= 55): - self.gbis_warmfront = { - "eligible": True, - "strict": False, - "message": "Meets cavity, fails SAP check", - } - return - self.gbis_warmfront = { "eligible": False, "strict": False, diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 5cbfb90c..61c4a243 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -1646,10 +1646,26 @@ def get_epc_data( def get_col_widths(dataframe): - # First we find the maximum length of the index column - idx_max = max([len(str(s)) for s in dataframe.index.values] + [len(str(dataframe.index.name))]) - # Then, we concatenate this to the max of the lengths of column name and its max value for each column, row-wise - return [idx_max] + [max(dataframe[col].astype(str).map(len).max(), len(col)) for col in dataframe.columns] + # Define a maximum width for any column to prevent excessively wide columns + max_allowed_width = 25 + + # Calculate widths for columns + widths = [] + + if isinstance(dataframe.columns, pd.MultiIndex): + # For MultiIndex, calculate max width considering the header and data + header_widths = [max(len(str(item)) for item in col) + 2 for col in dataframe.columns.values] # +2 for padding + for i, column in enumerate(dataframe.columns): + max_data_width = max(dataframe[column].astype(str).apply(len).max(), header_widths[i]) + widths.append(min(max_data_width, max_allowed_width)) + else: + # For non-MultiIndex, calculate width normally + for col in dataframe.columns: + # Calculate the max length of data or column name and limit it + max_length = max(dataframe[col].astype(str).apply(len).max(), len(str(col)) + 2) # +2 for padding + widths.append(min(max_length, max_allowed_width)) + + return widths def analyse_ha_data(outputs, loader): @@ -1671,42 +1687,13 @@ def analyse_ha_data(outputs, loader): :return: """ - eco4_rate = 1710 - gbis_rate = 600 - ha_analysis_results = [] - ha_revenue_results = [] for ha_name, datasets in outputs.items(): - inputs = [x for k, x in loader.data.items() if k == ha_name][0] - # TODO: This is placeholder because we don't have the schemes that the properties have been qualified for - # yet - # - import random - randomly_allocated_schemes = random.choices(["ECO4", "GBIS"], k=inputs["asset_list"].shape[0]) - inputs["asset_list"]["randomly_allocated_schemes"] = randomly_allocated_schemes - inputs["asset_list"]["funding_scheme"] = None - inputs["asset_list"]["funding_scheme"] = np.where( - inputs["asset_list"]["row_meaning"] == "identified potential eco works (CWI)", - inputs["asset_list"]["randomly_allocated_schemes"], - inputs["asset_list"]["funding_scheme"] - ) - - # TODO: Also temp, just for HA 6 - if ha_name == "ha_6": - inputs["survey_list"]["funding_scheme"] = None - inputs["survey_list"]["funding_scheme"] = np.where( - inputs["survey_list"][ - 'AFFORDABLE WARMTH OR EPC FOR HOUSING ASSOCIATION '] == "AFFORDABLE WARMTH", - "ECO4", - "GBIS" - ) - - # End placholder results_df = datasets["results_df"].copy() - analysis_data = inputs["asset_list"][['asset_list_row_id', "row_meaning", "funding_scheme"]].rename( + analysis_data = inputs["asset_list"][['asset_list_row_id', "ECO Eligibility"]].rename( columns={"row_meaning": "asset_identification_status"} ).merge( results_df, @@ -1715,293 +1702,236 @@ def analyse_ha_data(outputs, loader): left_on="asset_list_row_id" ) - # We now merge the survey list onto the analysis data and remove anything that is sold, to give us just what is - # remaining + ################################################################################################ + # We take the properties that strictly qualified under eco + ################################################################################################ - if inputs["matched_lookup"] is not None: - analysis_data = analysis_data.merge( - inputs["matched_lookup"], how="left", on="asset_list_row_id" + eco4_identified = analysis_data[analysis_data["ECO Eligibility"] == "eco4"].copy() + eco4_identified["identification_type"] = None + eco4_identified["identification_type"] = np.where( + (eco4_identified["eco4_eligible"] == True) & (eco4_identified["eco4_strict"] == True), + "strict", + eco4_identified["identification_type"] + ) + + eco4_identified["identification_type"] = np.where( + (eco4_identified["eco4_eligible"] == True) & (eco4_identified["eco4_strict"] == False), + "expansive", + eco4_identified["identification_type"] + ) + ################################################################################################ + # We take the properties dependent on CIGA + ################################################################################################ + + ciga_dependent_identified = analysis_data[ + analysis_data["ECO Eligibility"].isin( + [ + "eco4 (subject to ciga)", + "eco4 - passed ciga" + ] ) - # Drop any rows that have a survey_list_row_id - analysis_data = analysis_data[pd.isnull(analysis_data["survey_list_row_id"])] + ].copy() - # If we have a survey list, we merge this onto the results - n_properties_in_asset_list = analysis_data["asset_list_row_id"].nunique() - - properties_sold = ( - inputs["survey_list"].groupby("funding_scheme")["survey_list_row_id"].nunique().reset_index() if - inputs["survey_list"] is not None else pd.DataFrame(columns=["funding_scheme"]) - ) - properties_sold_eco4 = ( - properties_sold[properties_sold["funding_scheme"] == "ECO4"]["survey_list_row_id"].values[0] if - (not properties_sold.empty) and ("ECO4" in properties_sold["funding_scheme"].values) else 0 - ) - properties_sold_gbis = ( - properties_sold[properties_sold["funding_scheme"] == "GBIS"]["survey_list_row_id"].values[0] if - (not properties_sold.empty) and ("GBIS" in properties_sold["funding_scheme"].values) else 0 + # These are properties that show filled cavity + ciga_dependent_identified["identification_type"] = None + ciga_dependent_identified["identification_type"] = np.where( + ciga_dependent_identified["eco4_message"].isin( + [ + "Perfect suitability", + "Meets cavity and sap", + "Fails cavity, meets loft, fails SAP", + "Meets fabric, fails SAP check", + "Meets cavity, loft borderline, meets sap", + ] + ), + "strict", + ciga_dependent_identified["identification_type"] ) - # We now calculate the number of remaining properties, by scheme - remaining_properties = analysis_data[ - analysis_data["asset_identification_status"] == "identified potential eco works (CWI)" - ].copy() - remaining_properties["prospect_type"] = None - - remaining_properties_by_scheme = ( - remaining_properties.groupby("funding_scheme")["asset_list_row_id"].nunique().reset_index() + ciga_dependent_identified["identification_type"] = np.where( + (ciga_dependent_identified["eco4_message"].isin(["All conditions fail", "failed fabric check"])) & + (ciga_dependent_identified["walls"].isin(["Cavity wall, filled cavity"])), + "expansive", + ciga_dependent_identified["identification_type"] ) - n_remaining_properties_eco4 = remaining_properties_by_scheme[ - remaining_properties_by_scheme["funding_scheme"] == "ECO4" - ]["asset_list_row_id"].values[0] + ciga_dependent_identified["identification_type"] = np.where( + (ciga_dependent_identified["eco4_message"].isin(["Meets just cavity"])) | ( + ciga_dependent_identified["walls"].isin(["Cavity wall, filled cavity"]) + ), + "expansive", + ciga_dependent_identified["identification_type"] + ) - n_remaining_properties_gbis = remaining_properties_by_scheme[ - remaining_properties_by_scheme["funding_scheme"] == "GBIS" - ]["asset_list_row_id"].values[0] + ################################################################################################ + # We properties that qualified for gbis + ################################################################################################ + gbis_identified = analysis_data[analysis_data["ECO Eligibility"] == "gbis"].copy() + gbis_identified["identification_type"] = None + gbis_identified["identification_type"] = np.where( + (gbis_identified["gbis_eligible"] == True) & (gbis_identified["sap"] < 69), + "strict", + gbis_identified["identification_type"] + ) - # For the remaining properties, we use the results of the eligibility process to classify the property into - # one of multiple categories - # - # For properties that have been identified as ECO4 - # 1) Strict ECO4 candidate - Has required fabric and EPC is D or below. We consider D or below here, because - # Warmfront regularly re-surveys properties which then fall within the SAP requirement - # - This is not the very strictest definition of ECO4 eligible, but we aim to characterise the properties - # here and re-surveying is a common practicce by Warmfront. Additionally, many of the social homes have - # very old EPCs which may score lower when re-done - # 2) Meets Fabric requirements, not SAP - # Warmfront has identified the property as eligible, but the EPC is not D or below. We consider this but - # label is separately as not a strict - # 3) Subject to CIGA check - Meets loft conditions but shows a filled cavity. - # - we don't have a SAP constraint here because the EPC is (currently) showing what the property might - # actually look like after retrofit and so the EPC currently being a C or above means little, because - # the updated EPC, showing an empty cavity, could bring the property within - # 4) Loft insulation too thick - Meets empty cavity but shows a loft with between 101 and 270mm insulation. - # - No SAP constraint, for the same reason as in category 2) - # 5) Looks like GBIS instead - # 6) Does not look like ECO4 candidate - # - # For properties that have been identified as GBIS - # 1) Strict GBIS candidates - # 2) Properties that actually look like strict GBIS candidates - # 3) Subject to CIGA check - Filled cavity - # 4) Does not look like a GBIS candidate + gbis_identified["identification_type"] = np.where( + (gbis_identified["gbis_eligible"] == True) & (gbis_identified["sap"] >= 69), + "expansive", + gbis_identified["identification_type"] + ) - remaining_eco4_df = remaining_properties[ - remaining_properties["funding_scheme"] == "ECO4" - ].copy() + # Finally, we look at the properties that have not been identified by Warmfront + not_identified = analysis_data[ + analysis_data["ECO Eligibility"].isin( + [ + "not eligible" + ] + ) + ].copy() - #################################### + surplus_eco4 = not_identified[ + (not_identified["eco4_eligible"] == True) & (not_identified["eco4_message"].isin( + ["Perfect suitability", "Meets cavity, loft borderline, meets sap", "Near perfect suitability"] + )) + ] + + surplus_gbis = not_identified[ + (not_identified["gbis_eligible"] == True) & ( + ~not_identified["asset_list_row_id"].isin(surplus_eco4["asset_list_row_id"].values) + ) & (not_identified["sap"] < 69) & ( + (not_identified["cavity_type"].isin(["empty", "partial insulation"])) | ( + not_identified["walls"].str.contains("partial", case=False, na=False) + ) + ) + ] + surplus_gbis = surplus_gbis[surplus_gbis["is_estimated"] == False] + + # Output variables # ECO4 - #################################### - - # 1) We identify this if: - # - remaining_properties["eco4_eligible"] == True - - remaining_eco4_df["prospect_type"] = np.where( - (remaining_eco4_df["eco4_eligible"] == True), - "strict ECO4", - remaining_eco4_df["prospect_type"] + n_properties_in_asset_list = inputs["asset_list"].shape[0] + n_warmfront_identified_eco4 = eco4_identified.shape[0] + ciga_dependent_identified.shape[0] + eco4_of_which_identified_strict = ( + eco4_identified[eco4_identified["identification_type"] == "strict"].shape[0] + + ciga_dependent_identified[ciga_dependent_identified["identification_type"] == "strict"].shape[0] ) - - # 2) Meets fabric requirements - remaining_eco4_df["prospect_type"] = np.where( - ( - (remaining_eco4_df["eco4_message"] == "sap too high") & - remaining_eco4_df["eligibility_cavity_type"].isin(["partial", "empty"]) & - remaining_eco4_df["eligibility_loft_type"].isin(["0-100mm"]) & - pd.isnull(remaining_eco4_df["prospect_type"]) - ), - "ECO4 if SAP downgrade", - remaining_eco4_df["prospect_type"] + eco4_of_which_identified_expansive = ( + eco4_identified[eco4_identified["identification_type"] == "expansive"].shape[0] + + ciga_dependent_identified[ciga_dependent_identified["identification_type"] == "expansive"].shape[0] ) - - # 3) We identify this if it has a filled cavity but meets the loft conditions - # TODO: Consider if we should also allow 100-270mm or if we should add some slight tolerance (e.g. 150mm) - # to account for measurement error - remaining_eco4_df["prospect_type"] = np.where( - ( - remaining_eco4_df["eligibility_cavity_type"].isin(["full"]) & - remaining_eco4_df["eligibility_loft_type"].isin(["0-100mm"]) - ), - "ECO4 - Filled cavity - subject to CIGA check", - remaining_eco4_df["prospect_type"] - ) - - # 4) We identify this by ensuring the cavity if empty or partial, and the loft has between 101 and 270mm - remaining_eco4_df["prospect_type"] = np.where( - ( - remaining_eco4_df["eligibility_cavity_type"].isin(["empty", "partial"]) & - remaining_eco4_df["eligibility_loft_type"].isin(["100-270mm"]) - ), - "ECO4 prospect - empty cavity, loft insulation below regulation", - remaining_eco4_df["prospect_type"] - ) - - # 5) Looks like GBIS instead - remaining_eco4_df["prospect_type"] = np.where( - (remaining_eco4_df["gbis_eligible"] == True) & pd.isnull(remaining_eco4_df["prospect_type"]), - "Looks like GBIS", - remaining_eco4_df["prospect_type"] - ) - - # 6) This is everything else (i.e. both the cavity is full and the loft insulation is above 100mm) - remaining_eco4_df["prospect_type"] = remaining_eco4_df["prospect_type"].fillna( - "Does not look like ECO4 candidate" - ) - - #################################### # GBIS - #################################### - - remaining_gbis = remaining_properties[ - remaining_properties["funding_scheme"] == "GBIS" - ].copy() - - # 1) Strict GBIS candidates - remaining_gbis["prospect_type"] = np.where( - ( - (remaining_gbis["gbis_eligible"] == True) & (remaining_gbis["eco4_eligible"] == False) - ), - "strict GBIS", - remaining_gbis["prospect_type"] - ) - - # 2) GBIS candidates that look like strict ECO4 candidates - remaining_gbis["prospect_type"] = np.where( - (remaining_gbis["eco4_eligible"] == True), - "GBIS - Upgradable to ECO4", - remaining_gbis["prospect_type"] - ) - - # 3) Subject to CIGA check - Filled cavity - remaining_gbis["prospect_type"] = np.where( - ( - remaining_gbis["eligibility_cavity_type"].isin(["full"]) & - pd.isnull(remaining_gbis["prospect_type"]) - ), - "GBIS - Filled cavity - subject to CIGA check", - remaining_gbis["prospect_type"] - ) - - # 4) Everything else - remaining_gbis["prospect_type"] = remaining_gbis["prospect_type"].fillna( - "Does not look like GBIS candidate" - ) - - #################################### - # Surplus properties - #################################### - - # Take properties that were not identified by Warmfront and identify those that look like they would qualify - # under the strictest criteria - surplus_df = analysis_data[ - analysis_data["asset_identification_status"] != "identified potential eco works (CWI)" - ].copy() - - eco4_surplus = surplus_df[ - ( - (surplus_df["eco4_eligible"] == True) & (surplus_df["eco4_message"] == "subject to post retrofit sap") & - ( - surplus_df["eligibility_classification"].isin( - ["high confidence", "highest confidence", "medium confidence"] - ) - ) - ) - ].copy() - - gbis_surplus = surplus_df[ - ( - (surplus_df["gbis_eligible"] == True) & (surplus_df["eco4_eligible"] == False) & ( - surplus_df["eligibility_cavity_type"].isin(["empty", "partial"]) - ) - ) - ].copy() - - # Perform some checks to make sure we have all of the values - remaining_eco4_dict = remaining_eco4_df["prospect_type"].value_counts().to_dict() - if n_remaining_properties_eco4 != sum([v for k, v in remaining_eco4_dict.items()]): - raise ValueError( - "Number of remaining properties does not match the number of properties in remaining ECO4 dict" - ) - - remaining_gbis_dict = remaining_gbis["prospect_type"].value_counts().to_dict() - if n_remaining_properties_gbis != sum([v for k, v in remaining_gbis_dict.items()]): - raise ValueError( - "Number of remaining properties does not match the number of properties in remaining GBIS dict" - ) + n_warmfront_identified_gbis = gbis_identified.shape[0] + gbis_of_which_identified_strict = gbis_identified[gbis_identified["identification_type"] == "strict"].shape[0] + gbis_of_which_identified_expansive = \ + gbis_identified[gbis_identified["identification_type"] == "expansive"].shape[0] to_append = { - "ha_name": ha_name, - "n_properties_in_asset_list": n_properties_in_asset_list, + ("", "HA Name"): ha_name, + ("", "# Properties in asset list"): n_properties_in_asset_list, ############ # ECO4 ############ - "properties_sold_eco4": properties_sold_eco4, - "n_remaining_properties_eco4": n_remaining_properties_eco4, - **remaining_eco4_dict, + ("ECO4", "# Properties identieid by Warmfront"): n_warmfront_identified_eco4, + ("ECO4", "Of which identified by model - strict"): eco4_of_which_identified_strict, + ("ECO4", "Of which identified by model - expansive"): eco4_of_which_identified_expansive, + ("ECO4", "Of which identified by model - total"): ( + eco4_of_which_identified_strict + eco4_of_which_identified_expansive), + ("ECO4", "Additional properties"): surplus_eco4.shape[0], ############ # GBIS ############ - "properties_sold_gbis": properties_sold_gbis, - "n_remaining_properties_gbis": n_remaining_properties_gbis, - **remaining_gbis_dict, - ############ - # GBIS - ############ - "n_eco4_surplus": eco4_surplus.shape[0], - "n_gbis_surplus": gbis_surplus.shape[0], + ("GBIS", "# Properties identieid by Warmfront"): n_warmfront_identified_gbis, + ("GBIS", "Of which identified by model - strict"): gbis_of_which_identified_strict, + ("GBIS", "Of which identified by model - expansive"): gbis_of_which_identified_expansive, + ("GBIS", "Of which identified by model - total"): ( + gbis_of_which_identified_strict + gbis_of_which_identified_expansive + ), + ("GBIS", "Additional properties"): surplus_gbis.shape[0] } ha_analysis_results.append(to_append) - revenue_to_append = { - "ha_name": ha_name, - "£ Remaining from asset list": ( - n_remaining_properties_eco4 * eco4_rate + n_remaining_properties_gbis * gbis_rate - ), - "Of which: Strict": ( - to_append.get('strict ECO4', 0) * eco4_rate + to_append.get('strict GBIS', 0) * gbis_rate + - to_append.get('GBIS - Upgradable to ECO4', 0) * gbis_rate - ), - "Of which: Subject to CIGA": ( - to_append.get("ECO4 - Filled cavity - subject to CIGA check", 0) * eco4_rate + - to_append.get("GBIS - Filled cavity - subject to CIGA check", 0) * gbis_rate - ), - "Of which: Prospect, not perfect strict prospect": ( - to_append.get("ECO4 prospect - empty cavity, loft insulation below regulation", 0) * eco4_rate + - to_append.get("ECO4 if SAP downgrade", 0) * eco4_rate - ), - "Of which: Potential downgrade to GBIS": to_append["Looks like GBIS"] * eco4_rate, - "Of which: Does not look like prospect": ( - to_append.get("Does not look like ECO4 candidate", 0) * eco4_rate + - to_append.get("Does not look like GBIS candidate", 0) * gbis_rate - ), - "Surplus: Unidentified properties": eco4_surplus.shape[0] * eco4_rate + gbis_surplus.shape[0] * gbis_rate, - "Surplus: GBIS Updates to ECO4": to_append.get("GBIS - Upgradable to ECO4", 0) * (eco4_rate - gbis_rate) - } - - # Perform a quick check: - if revenue_to_append["£ Remaining from asset list"] - ( - revenue_to_append["Of which: Strict"] + revenue_to_append["Of which: Subject to CIGA"] + - revenue_to_append["Of which: Prospect, not perfect strict prospect"] + - revenue_to_append["Of which: Potential downgrade to GBIS"] + - revenue_to_append["Of which: Does not look like prospect"] - ) > 1: - raise ValueError("Error between top level revenue figures and breakdown - investigate me") - - ha_revenue_results.append(revenue_to_append) - ha_analysis_results = pd.DataFrame(ha_analysis_results) - ha_revenue_results = pd.DataFrame(ha_revenue_results) + ha_analysis_results.columns = pd.MultiIndex.from_tuples(ha_analysis_results.columns) + facts_and_figures = loader.facts_and_figures.copy() + facts_and_figures["ha_number"] = facts_and_figures["HA Name"].str.extract(r'(\d+)').astype(int) + facts_and_figures = facts_and_figures.sort_values("ha_number") + facts_and_figures = facts_and_figures.drop(columns=["ha_number"]) + + # Rename some of the cols + facts_and_figures = facts_and_figures.rename( + columns={ + # ECO4 cols + "ECO4": "ECO4 - December", + "GBIS": "GBIS - December", + "eco4 (subject to ciga)": "ECO4 - subject to ciga", + "eco4": "ECO4 - doesn't need CIGA", + "eco4 - passed ciga": "ECO4 - passed CIGA", + "failed ciga": "ECO4 - failed CIGA", + "ECO4 - partially cancelled": "ECO4 - Install downgrade to GBIS", + "ECO4 - in progress": "ECO4 - Install in progress", + "ECO4 - cancelled": "ECO4 - Install cancelled", + # GBIS cols + "gbis": "GBIS total (asset list)" + } + ) + # We calculate the eco4 total from the asset list + # 1) If ciga checks have been completed (i.e. ECO4 - passed ciga > 0) this sum is + # ECO4 - doesn't need CIGA + ECO4 - passed CIGA + # 2) if ciga checks haven't been completed (i.e. ECO4 - passed ciga is missing), this sum is + # ECO4 - doesn't need CIGA + ECO4 - subject to ciga + facts_and_figures["ECO4 total (asset list)"] = np.where( + facts_and_figures["ECO4 - passed CIGA"] > 0, + facts_and_figures["ECO4 - doesn't need CIGA"] + facts_and_figures["ECO4 - passed CIGA"], + facts_and_figures["ECO4 - doesn't need CIGA"] + facts_and_figures["ECO4 - subject to ciga"] + ) + + # Re-arrange the columns + facts_and_figures = facts_and_figures[ + [ + 'HA Name', + 'ECO4 - December', + 'GBIS - December', + 'ECO4 total (asset list)', + 'GBIS total (asset list)', + 'ECO4 - subject to ciga', + "ECO4 - doesn't need CIGA", + 'ECO4 - passed CIGA', + 'ECO4 - failed CIGA', + 'ECO4 - installed', + 'ECO4 - Install in progress', + 'ECO4 - Install cancelled', + 'ECO4 - partially installed', + 'ECO4 - Install downgrade to GBIS', + ] + ] + # Addd a note to flag any rows where ECO4 ( + # subject to ciga is greater than 0) and (ECO4 - passed ciga is greater than 0 + # ) + facts_and_figures["Missed CIGA checks opportunity"] = None + facts_and_figures["Missed CIGA checks opportunity"] = np.where( + (facts_and_figures["ECO4 - subject to ciga"] > 0) & (facts_and_figures["ECO4 - passed CIGA"] > 0), + "potential opportunity of " + facts_and_figures["ECO4 - subject to ciga"].astype( + str) + " ECO4 properties needing a CIGA check", + facts_and_figures["Missed CIGA checks opportunity"] + ) + + # Re arrage the columns + + # Also sort ha_analysis_results by ha number + ha_analysis_results["ha_number"] = ha_analysis_results[("", "HA Name")].str.extract(r'(\d+)').astype(int) + ha_analysis_results = ha_analysis_results.sort_values("ha_number") + ha_analysis_results = ha_analysis_results.drop(columns=["ha_number"]) + + # We save 2 sheets # Automate creation of the excel # Create a Pandas Excel writer using XlsxWriter as the engine - with pd.ExcelWriter('HA Analysis - batch3.xlsx', engine='xlsxwriter') as writer: + with pd.ExcelWriter('HA Analysis Results.xlsx', engine='xlsxwriter') as writer: # Write each dataframe to a different worksheet without the index - for df, sheet in [(ha_revenue_results, 'Total Revenue'), - (ha_analysis_results, 'By ECO4 and GBIS')]: + for df, sheet in [(facts_and_figures, 'HA Facts and Figures'), + (ha_analysis_results, 'Asset Identification')]: - df.to_excel(writer, sheet_name=sheet, index=False) + df.to_excel(writer, sheet_name=sheet) # Auto-adjust columns' width for i, width in enumerate(get_col_widths(df)): @@ -2134,7 +2064,7 @@ def app(): # Determines if we want to use the cached data in s3 use_cache = True # Determines if we want to perform the data pull - pull_data = True + pull_data = False # List all of the data in the folder directories = [str(list(entry.iterdir())[0]) for entry in DATA_FOLDER.iterdir() if entry.is_dir()] @@ -2173,43 +2103,3 @@ def app(): floor_area_decile_thresholds=floor_area_decile_thresholds, pull_data=pull_data ) - - # for ha_name, datasets in outputs.items(): - # datasets["results_df"] = datasets["results_df"].drop( - # columns=["eligibility_cavity_type", "eligibility_loft_type"] - # ) - # - # # Re-do - # res = [] - # for _, row in tqdm(datasets["results_df"].iterrows(), total=datasets["results_df"].shape[0]): - # epc = { - # "walls-description": row["walls"], - # "roof-description": row["roof"], - # "floor-description": "", - # "tenure": "", - # "current-energy-efficiency": row["sap"], - # } - # eligibility = Eligibility(epc=epc, cleaned=cleaned) - # eligibility.check_eco4_warmfront() - # res.append( - # { - # "row_id": row["row_id"], - # "eligibility_cavity_type": eligibility.eco4_warmfront["cavity_type"], - # "eligibility_loft_type": eligibility.eco4_warmfront["loft_type"] - # } - # ) - # - # # Merge back on - # res = pd.DataFrame(res) - # datasets["results_df"] = datasets["results_df"].merge(res, how="left", on="row_id") - # - # # Re-save in s3 - # save_pickle_to_s3( - # data={ - # "results_df": datasets["results_df"], - # "scoring_df": datasets["scoring_df"], - # "nodata": datasets["nodata"] - # }, - # bucket_name="retrofit-datalake-dev", - # s3_file_name=f"ha-analysis/{ha_name}/processed_results.pickle" - # ) From 8b8e2bf902f8cc6c588eab8b64253580f3364694 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 1 Mar 2024 16:29:19 +0000 Subject: [PATCH 038/248] working on new forecast approach for warmfront remaining sales --- .../ha_15_32/ha_analysis_batch_3.py | 811 +++++++++++++++++- utils/s3.py | 2 +- 2 files changed, 768 insertions(+), 45 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 61c4a243..bb27029e 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -17,6 +17,7 @@ from etl.eligibility.ha_15_32.app import prepare_model_data_row from backend.ml_models.api import ModelApi from etl.solar.SolarPhotoSupply import SolarPhotoSupply from recommendations.recommendation_utils import calculate_cavity_age +from etl.epc.Record import EPCRecord EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN") ENV_FILE = Path(__file__).parent / "etl" / "eligibility" / "ha_15_32" / ".env" @@ -181,25 +182,25 @@ class DataLoader: if ha_name in ["HA1", "HA6", "HA16", "HA24"]: asset_list["matching_address"] = asset_list[ self.COLUMN_CONFIG[ha_name]["address"] - ].str.lower().str.strip() + ].astype(str).str.lower().str.strip() asset_list["matching_postcode"] = asset_list[ self.COLUMN_CONFIG[ha_name]["postcode"] - ].str.lower().str.strip() + ].astype(str).str.lower().str.strip() elif ha_name == "HA7": # Create matching_address by concatenating Address 1, Address 2, Address 3, Address 4, Postcode - asset_list["matching_address"] = asset_list["Address"].str.lower().str.strip() + ", " + \ - asset_list["Address2"].str.lower().str.strip() + ", " + \ - asset_list["Address3"].str.lower().str.strip() + ", " + \ - asset_list["Postcode"].str.lower().str.strip() - asset_list["matching_postcode"] = asset_list["Postcode"].str.lower().str.strip() + asset_list["matching_address"] = asset_list["Address"].astype(str).str.lower().str.strip() + ", " + \ + asset_list["Address2"].astype(str).str.lower().str.strip() + ", " + \ + asset_list["Address3"].astype(str).str.lower().str.strip() + ", " + \ + asset_list["Postcode"].astype(str).str.lower().str.strip() + asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip() elif ha_name == "HA14": # Create matching_address by concatenating Address 1, Address 2, Address 3, Address 4, Postcode - asset_list["matching_address"] = asset_list["Address 1"].str.lower().str.strip() + ", " + \ - asset_list["Address 2"].str.lower().str.strip() + ", " + \ - asset_list["Address 3"].str.lower().str.strip() + ", " + \ - asset_list["Address 4"].str.lower().str.strip() + ", " + \ - asset_list["Postcode"].str.lower().str.strip() - asset_list["matching_postcode"] = asset_list["Postcode"].str.lower().str.strip() + asset_list["matching_address"] = asset_list["Address 1"].astype(str).str.lower().str.strip() + ", " + \ + asset_list["Address 2"].astype(str).str.lower().str.strip() + ", " + \ + asset_list["Address 3"].astype(str).str.lower().str.strip() + ", " + \ + asset_list["Address 4"].astype(str).str.lower().str.strip() + ", " + \ + asset_list["Postcode"].astype(str).str.lower().str.strip() + asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip() elif ha_name == "HA39": # Create matching_address by concatenating add_1, add_2, add_3, add_4, add_5, post_code @@ -209,7 +210,7 @@ class DataLoader: asset_list["add_4"].astype(str).str.lower().str.strip() + ", " + \ asset_list["add_5"].astype(str).str.lower().str.strip() + ", " + \ asset_list["post_code"].astype(str).str.lower().str.strip() - asset_list["matching_postcode"] = asset_list["post_code"].str.lower().str.strip() + asset_list["matching_postcode"] = asset_list["post_code"].astype(str).str.lower().str.strip() elif ha_name == "HA107": # Create matching_address by concatenating House No, Street, Town, District, Postcode asset_list["matching_address"] = asset_list["House No"].astype(str).str.lower().str.strip() + ", " + \ @@ -1098,8 +1099,8 @@ class DataLoader: self.december_figures = pd.read_csv(self.december_figures_filepath) # Remove the spaces in HA Name self.december_figures["HA Name"] = self.december_figures["HA Name"].str.replace(" ", "") - self.december_figures["ECO4"] = self.december_figures["ECO4"].astype("Int64") - self.december_figures["GBIS"] = self.december_figures["GBIS"].astype("Int64") + for col in ["ECO4", "GBIS", "ECO4 remaining", "GBIS remaining"]: + self.december_figures[col] = self.december_figures[col].astype("Int64") if self.use_cache: self.data = read_pickle_from_s3( @@ -1203,7 +1204,6 @@ class DataLoader: # Update the asset list with the categorisations and rename changes if asset_list.shape[0] != asset_list_starting_size: raise ValueError("The asset list has changed in size") - self.data[ha_name]["asset_list"] = asset_list # Report on sales sales_report = {} @@ -1259,7 +1259,31 @@ class DataLoader: ) # We get the sales - sales_report = survey_list["installation_status"].value_counts().to_dict() + sales_report = { + "ECO4 - surveys sold": survey_list.shape[0], + **survey_list["installation_status"].value_counts().to_dict() + } + + # We find some cases where properties have sold but are missing CIGA checks + survey_list_to_merge = survey_list[["asset_list_row_id"]].copy() + survey_list_to_merge["has_a_survey_record"] = True + survey_list_to_merge = survey_list_to_merge[~pd.isnull(survey_list_to_merge["asset_list_row_id"])] + + asset_list = asset_list.merge(survey_list_to_merge, how='left', on="asset_list_row_id") + asset_list["ECO Eligibility"] = np.where( + (asset_list["ECO Eligibility"] == "eco4 (subject to ciga)") & ( + asset_list["has_a_survey_record"] == True + ), + "eco4 - passed ciga", + asset_list["ECO Eligibility"] + ) + asset_list = asset_list.drop(columns=["has_a_survey_record"]) + + # Update the survey list with installation status + self.data[ha_name]["survey_list"] = survey_list + + # Insert updated asset list + self.data[ha_name]["asset_list"] = asset_list ha_facts_and_figures.append( { @@ -1687,7 +1711,21 @@ def analyse_ha_data(outputs, loader): :return: """ + eco4_rate = 1710 + gbis_rate = 600 + old_eco4_rate = 1456 + old_gbis_rate = 432 + + epc_c_threshold = 80 + scheme_map = { + "ECO4": "ECO4", + "AFFORDABLE WARMTH": "ECO4", + "ECO4 A/W": "ECO4", + "ECO4 GBIS (ECO+)": "GBIS" + } + ha_analysis_results = [] + total_revenue_results = [] for ha_name, datasets in outputs.items(): inputs = [x for k, x in loader.data.items() if k == ha_name][0] @@ -1702,6 +1740,88 @@ def analyse_ha_data(outputs, loader): left_on="asset_list_row_id" ) + analysis_data["is_remaining"] = True + + n_sold_eco4 = 0 + n_sold_gbis = 0 + if not inputs["survey_list"].empty: + # Merge on the survey list and signal everything that is remaining or not (i.e. anything that hasn't had + # a survey) + survey_list = inputs["survey_list"].copy() + + # TODO: TEMP + scheme_column = survey_list.columns[0] + # We clean up the survey list installation or cancelled + survey_list["installed_or_cancelled_clean"] = survey_list["INSTALLED OR CANCELLED"].str.lower() + # Remove all punctuation + survey_list["installed_or_cancelled_clean"] = survey_list["installed_or_cancelled_clean"].str.replace( + r'[^\w\s]', '', regex=True + ) + # Remove double spaces + survey_list["installed_or_cancelled_clean"] = survey_list["installed_or_cancelled_clean"].str.replace( + r'\s+', ' ', regex=True + ) + # Remove trailing spaces + survey_list["installed_or_cancelled_clean"] = survey_list["installed_or_cancelled_clean"].str.strip() + + # Remap the values in the scheme column + survey_list[scheme_column] = survey_list[scheme_column].replace(scheme_map) + + survey_list["installation_status"] = None + survey_list["installation_status"] = np.where( + survey_list["installed_or_cancelled_clean"].isin(["installed", "installed see notes"]), + "installed", + survey_list["installation_status"] + ) + survey_list["installation_status"] = np.where( + survey_list["installed_or_cancelled_clean"].isin(["cancelled"]), + "cancelled", + survey_list["installation_status"] + ) + # Find partial installations + survey_list["installation_status"] = np.where( + survey_list["installed_or_cancelled_clean"].str.contains("still to be installed"), + "partially installed", + survey_list["installation_status"] + ) + # Find partial cancellations + # TODO: We might have more indications of partial cancellations + survey_list["installation_status"] = np.where( + survey_list["installed_or_cancelled_clean"].isin(["loft cancelled"]), + "partially cancelled", + survey_list["installation_status"] + ) + + # Finally, for other cases, we set the status to "in progress" + survey_list["installation_status"] = survey_list["installation_status"].fillna("in progress") + + # We concatenate the scheme name with the installation status + survey_list["installation_status"] = ( + survey_list[scheme_column] + " - " + survey_list["installation_status"] + ) + + # TODO: END TEMP + + survey_list_to_merge = survey_list[["asset_list_row_id", scheme_column]].copy() + survey_list_to_merge["is_remaining"] = False + analysis_data = analysis_data.drop(columns="is_remaining").merge( + survey_list_to_merge, + how="left", on="asset_list_row_id" + ) + analysis_data["is_remaining"] = analysis_data["is_remaining"].fillna(True) + + n_sold_eco4 = survey_list_to_merge[survey_list_to_merge[scheme_column] == "ECO4"].shape[0] + n_sold_gbis = survey_list_to_merge[survey_list_to_merge[scheme_column] == "GBIS"].shape[0] + + # Take just remaining + analysis_data = analysis_data[analysis_data["is_remaining"]] + + # Also, if the HA has started selling, we remove any that are still subject to ciga + n_eco4_missed_subject_to_ciga = 0 + if not inputs["survey_list"].empty: + n_eco4_missed_subject_to_ciga = (analysis_data["ECO Eligibility"] == "eco4 (subject to ciga)").sum() + analysis_data = analysis_data[analysis_data["ECO Eligibility"] != "eco4 (subject to ciga)"] + ################################################################################################ # We take the properties that strictly qualified under eco ################################################################################################ @@ -1714,8 +1834,11 @@ def analyse_ha_data(outputs, loader): eco4_identified["identification_type"] ) + # For expansive, the property can be no higher than an EPC C eco4_identified["identification_type"] = np.where( - (eco4_identified["eco4_eligible"] == True) & (eco4_identified["eco4_strict"] == False), + (eco4_identified["eco4_eligible"] == True) & (eco4_identified["eco4_strict"] == False) & ( + eco4_identified["sap"] <= epc_c_threshold + ), "expansive", eco4_identified["identification_type"] ) @@ -1743,21 +1866,17 @@ def analyse_ha_data(outputs, loader): "Meets fabric, fails SAP check", "Meets cavity, loft borderline, meets sap", ] - ), + ) & (ciga_dependent_identified["sap"] <= epc_c_threshold), "strict", ciga_dependent_identified["identification_type"] ) ciga_dependent_identified["identification_type"] = np.where( - (ciga_dependent_identified["eco4_message"].isin(["All conditions fail", "failed fabric check"])) & - (ciga_dependent_identified["walls"].isin(["Cavity wall, filled cavity"])), - "expansive", - ciga_dependent_identified["identification_type"] - ) - - ciga_dependent_identified["identification_type"] = np.where( - (ciga_dependent_identified["eco4_message"].isin(["Meets just cavity"])) | ( + ((ciga_dependent_identified["eco4_message"].isin(["Meets just cavity"])) | ( ciga_dependent_identified["walls"].isin(["Cavity wall, filled cavity"]) + )) & ( + (ciga_dependent_identified["sap"] <= epc_c_threshold) & + pd.isnull(ciga_dependent_identified["identification_type"]) ), "expansive", ciga_dependent_identified["identification_type"] @@ -1775,7 +1894,9 @@ def analyse_ha_data(outputs, loader): ) gbis_identified["identification_type"] = np.where( - (gbis_identified["gbis_eligible"] == True) & (gbis_identified["sap"] >= 69), + (gbis_identified["gbis_eligible"] == True) & (gbis_identified["sap"] <= epc_c_threshold) & ( + pd.isnull(gbis_identified["identification_type"]) + ), "expansive", gbis_identified["identification_type"] ) @@ -1806,9 +1927,16 @@ def analyse_ha_data(outputs, loader): ] surplus_gbis = surplus_gbis[surplus_gbis["is_estimated"] == False] - # Output variables + # Output variables - the data was sent to us in December, but the remaining figures are + # what was in November + november_remaining = loader.december_figures[loader.december_figures["HA Name"] == ha_name] + # ECO4 - n_properties_in_asset_list = inputs["asset_list"].shape[0] + n_properties_remaining_in_asset_list = inputs["asset_list"].shape[0] + november_eco4_remaining = max(november_remaining["ECO4 remaining"].values[0], 0) + november_eco4_sold = november_remaining["No. of Tech surveys complete - Eco 4"].values[0] + eco4_sales_since_november = n_sold_eco4 - november_eco4_sold + n_warmfront_identified_eco4 = eco4_identified.shape[0] + ciga_dependent_identified.shape[0] eco4_of_which_identified_strict = ( eco4_identified[eco4_identified["identification_type"] == "strict"].shape[0] + @@ -1820,26 +1948,37 @@ def analyse_ha_data(outputs, loader): ) # GBIS n_warmfront_identified_gbis = gbis_identified.shape[0] + november_gbis_remaining = max(november_remaining["GBIS remaining"].values[0], 0) + november_gbis_sold = november_remaining["No. of Tech surveys complete - GBIS"].values[0] + gbis_sales_since_november = n_sold_gbis - november_gbis_sold gbis_of_which_identified_strict = gbis_identified[gbis_identified["identification_type"] == "strict"].shape[0] gbis_of_which_identified_expansive = \ gbis_identified[gbis_identified["identification_type"] == "expansive"].shape[0] to_append = { ("", "HA Name"): ha_name, - ("", "# Properties in asset list"): n_properties_in_asset_list, + ("", "# properties in asset list"): n_properties_remaining_in_asset_list, ############ # ECO4 ############ - ("ECO4", "# Properties identieid by Warmfront"): n_warmfront_identified_eco4, + ("ECO4", "# remaining November file"): november_eco4_remaining, + ("ECO4", "# sold in November file"): november_eco4_sold, + ("ECO4", "# sold (survey list)"): n_sold_eco4, + ("ECO4", "# that missed CIGA check"): n_eco4_missed_subject_to_ciga, + ("ECO4", "# Remaining properties (asset list)"): n_warmfront_identified_eco4, ("ECO4", "Of which identified by model - strict"): eco4_of_which_identified_strict, ("ECO4", "Of which identified by model - expansive"): eco4_of_which_identified_expansive, ("ECO4", "Of which identified by model - total"): ( - eco4_of_which_identified_strict + eco4_of_which_identified_expansive), + eco4_of_which_identified_strict + eco4_of_which_identified_expansive + ), ("ECO4", "Additional properties"): surplus_eco4.shape[0], ############ # GBIS ############ - ("GBIS", "# Properties identieid by Warmfront"): n_warmfront_identified_gbis, + ("GBIS", "# remaining November file"): november_gbis_remaining, + ("GBIS", "# sold in November file"): november_gbis_sold, + ("GBIS", "# sold (survey list)"): n_sold_gbis, + ("GBIS", "# Remaining properties (asset list)"): n_warmfront_identified_gbis, ("GBIS", "Of which identified by model - strict"): gbis_of_which_identified_strict, ("GBIS", "Of which identified by model - expansive"): gbis_of_which_identified_expansive, ("GBIS", "Of which identified by model - total"): ( @@ -1850,6 +1989,24 @@ def analyse_ha_data(outputs, loader): ha_analysis_results.append(to_append) + # Calculate the revenue results + to_append_revenue = { + ("", "HA Name"): ha_name, + # Eco4 revenue + ("ECO4", "£ remaining November file"): november_eco4_remaining * eco4_rate, + ("ECO4", "£ sold November file"): november_eco4_sold * old_eco4_rate, + ("ECO4", "£ sold since November"): eco4_sales_since_november * eco4_rate, + ("ECO4", "£ stuck at ciga check"): n_eco4_missed_subject_to_ciga * eco4_rate, + ("ECO4", "£ remaining (asset list)"): n_warmfront_identified_eco4 * eco4_rate, + ("ECO4", "Of which identified by model - strict"): eco4_of_which_identified_strict * eco4_rate, + ("ECO4", "Of which identified by model - expansive"): eco4_of_which_identified_expansive * eco4_rate, + ("ECO4", "Of which identified by model - total"): eco4_rate * ( + eco4_of_which_identified_strict + eco4_of_which_identified_expansive + ), + ("ECO4", "Additional properties"): eco4_rate * surplus_eco4.shape[0], + } + total_revenue_results.append(to_append_revenue) + ha_analysis_results = pd.DataFrame(ha_analysis_results) ha_analysis_results.columns = pd.MultiIndex.from_tuples(ha_analysis_results.columns) @@ -1862,8 +2019,8 @@ def analyse_ha_data(outputs, loader): facts_and_figures = facts_and_figures.rename( columns={ # ECO4 cols - "ECO4": "ECO4 - December", - "GBIS": "GBIS - December", + "ECO4": "ECO4 - November", + "GBIS": "GBIS - November", "eco4 (subject to ciga)": "ECO4 - subject to ciga", "eco4": "ECO4 - doesn't need CIGA", "eco4 - passed ciga": "ECO4 - passed CIGA", @@ -1880,19 +2037,27 @@ def analyse_ha_data(outputs, loader): # ECO4 - doesn't need CIGA + ECO4 - passed CIGA # 2) if ciga checks haven't been completed (i.e. ECO4 - passed ciga is missing), this sum is # ECO4 - doesn't need CIGA + ECO4 - subject to ciga - facts_and_figures["ECO4 total (asset list)"] = np.where( + facts_and_figures["ECO4 total (asset list - pre ciga)"] = ( + facts_and_figures["ECO4 - doesn't need CIGA"] + + facts_and_figures["ECO4 - subject to ciga"] + + facts_and_figures["ECO4 - passed CIGA"] + ) + + facts_and_figures["ECO4 total (asset list - post ciga)"] = None + facts_and_figures["ECO4 total (asset list - post ciga)"] = np.where( facts_and_figures["ECO4 - passed CIGA"] > 0, facts_and_figures["ECO4 - doesn't need CIGA"] + facts_and_figures["ECO4 - passed CIGA"], - facts_and_figures["ECO4 - doesn't need CIGA"] + facts_and_figures["ECO4 - subject to ciga"] + facts_and_figures["ECO4 total (asset list - post ciga)"] ) # Re-arrange the columns facts_and_figures = facts_and_figures[ [ 'HA Name', - 'ECO4 - December', - 'GBIS - December', - 'ECO4 total (asset list)', + 'ECO4 - November', + 'GBIS - November', + 'ECO4 total (asset list - pre ciga)', + 'ECO4 total (asset list - post ciga)', 'GBIS total (asset list)', 'ECO4 - subject to ciga', "ECO4 - doesn't need CIGA", @@ -1916,6 +2081,8 @@ def analyse_ha_data(outputs, loader): facts_and_figures["Missed CIGA checks opportunity"] ) + facts_and_figures.to_csv("Facts and figures sample.csv") + # Re arrage the columns # Also sort ha_analysis_results by ha number @@ -1937,6 +2104,333 @@ def analyse_ha_data(outputs, loader): for i, width in enumerate(get_col_widths(df)): writer.sheets[sheet].set_column(i, i, width) + # Inspection: - Looking into the proportion of homes with "cavity, as built, insulated (assumed)" as their + # description, and what proportion of time they get identified via non-invasive surveys + + # true_eco4_assets = [] + # ciga_dependent_assets = [] + # not_eligible = [] + # as_built_insulated = [] + # date_cols = { + # "HA39": "date_built", + # "HA14": "Built In Year", + # "HA6": "Construction Year", + # "HA1": "Build Date", + # "HA107": "YEAR BUILT" + # } + # for ha_name, data_objects in outputs.items(): + # inputs = [x for k, x in loader.data.items() if k == ha_name][0] + # + # date_col = date_cols[ha_name] + # results_df = data_objects["results_df"].copy() + # df = inputs["asset_list"][['asset_list_row_id', "ECO Eligibility", date_col]].rename( + # columns={"row_meaning": "asset_identification_status", date_col: "date_built"} + # ).merge( + # results_df, + # how="left", + # right_on="row_id", + # left_on="asset_list_row_id" + # ) + # + # # take the true ECO4 + # true_eco4 = df[df["ECO Eligibility"] == "eco4"].copy() + # ciga_dependent = df[ + # df["ECO Eligibility"].isin( + # [ + # "eco4 (subject to ciga)", + # "failed ciga", + # "eco4 - passed ciga" + # ] + # ) + # ] + # insulated_assumed = df[df["walls"] == "Cavity wall, as built, insulated"].copy() + # # We convert date built to datetime + # try: + # insulated_assumed = insulated_assumed[~pd.isnull(insulated_assumed["date_built"])] + # insulated_assumed["year_built"] = pd.to_datetime(insulated_assumed["date_built"].astype(str)).dt.year + # as_built_insulated.append(insulated_assumed) + # except Exception as e: + # print("oh well") + # + # true_eco4_assets.append(true_eco4) + # ciga_dependent_assets.append(ciga_dependent) + # + # true_eco4_assets = pd.concat(true_eco4_assets) + # ciga_dependent_assets = pd.concat(ciga_dependent_assets) + # as_built_insulated = pd.concat(as_built_insulated) + # + # true_eco4_assets["walls"].value_counts(normalize=True) + # ciga_dependent_assets["walls"].value_counts(normalize=True) + # + # from recommendations.recommendation_utils import extract_insulation_thickness + # + # true_eco4_assets["roof_insulation_thickness"] = true_eco4_assets["roof"].apply( + # lambda x: extract_insulation_thickness(x) + # ) + # + # true_eco4_assets["e"] = true_eco4_assets.merge( + # pd.DataFrame(cleaned["roof-description"])[["original_description", "insulation_thickness"]], + # how="left", + # left_on="roof", + # right_on="original_description" + # ) + # + # true_eco4_assets["sap"].mean() + # + # true_eco4_assets["insulation_thickness"].isin( + # ["250", "150", "200", "100", "75", "50"] + # ).sum() / true_eco4_assets.shape[0] + # + # true_eco4_assets["insulation_thickness"].isin( + # ["100"] + # ).sum() / true_eco4_assets.shape[0] + # + # as_built_insulated.groupby("property_type")["ECO Eligibility"].value_counts(normalize=True) + + +def get_propensity_model_data( + loader, cleaned, cleaning_data, created_at, photo_supply_lookup, + floor_area_decile_thresholds, pull_data=True +): + # TODO: Set a seed! + model_data = [] + for ha_name, data_assets in loader.data.items(): + + logger.info("Processing HA: %s", ha_name) + if data_assets["survey_list"].empty: + continue + + number_sold = data_assets["survey_list"].shape[0] + + # For each HA, we read pull in the data required, and store in S3 + asset_list = data_assets["asset_list"].copy() + # We determine the number of properties that we should select that are eligible + asset_list_size = asset_list.shape[0] + # Number eligible + n_eligibile = asset_list[asset_list["ECO Eligibility"] != "not eligible"].shape[0] + success_rate = n_eligibile / asset_list_size + needed_sample_size = np.ceil(number_sold / success_rate) + number_negative_samples = int(needed_sample_size - number_sold) + + sold_asset_list_ids = data_assets["survey_list"]["asset_list_row_id"].tolist() + negative_sample_asset_list_ids = asset_list["asset_list_row_id"].sample(number_negative_samples).tolist() + sample_ids = sold_asset_list_ids + negative_sample_asset_list_ids + + sample_asset_list = asset_list[asset_list["asset_list_row_id"].isin(sample_ids)] + + # In order to have the most confidence, we should take just properties that have 1 EPC. We might need to + # cut down the number of properties that we include because of this + # Note: This is an imbalanced problem so we will need to build a model accomadating of that + + data = [] + errors = [] + for index, property_meta in tqdm(sample_asset_list.iterrows(), total=len(sample_asset_list)): + + if property_meta["matching_postcode"] is None: + continue + + property_type, built_form = get_property_type_and_built_form( + property_meta=property_meta, ha_name=ha_name + ) + + searcher = SearchEpc( + address1=str(property_meta["HouseNo"]), + postcode=property_meta["matching_postcode"], + auth_token=EPC_AUTH_TOKEN, + os_api_key="", + full_address=property_meta["matching_address"] + ) + searcher.ordnance_survey_client.property_type = property_type + searcher.ordnance_survey_client.built_form = built_form + searcher.find_property(skip_os=True) + + if searcher.newest_epc is None: + continue + + if searcher.newest_epc.get("estimated"): + # We insert the row ID as our proxy for UPRN + searcher.newest_epc["uprn"] = int(property_meta["asset_list_row_id"].split(ha_name)[1]) + + newest_epc = searcher.newest_epc + older_epcs = searcher.older_epcs + full_sap_epc = searcher.full_sap_epc + + # If we have more than 1 EPC for the moment we just continue + if older_epcs or full_sap_epc: + continue + try: + + # We clean up the data + epc_records = { + 'original_epc': newest_epc.copy(), + 'full_sap_epc': full_sap_epc.copy(), + 'old_data': older_epcs.copy(), + } + + epc_record = EPCRecord( + epc_records=epc_records, + run_mode="newdata", + cleaning_data=cleaning_data + ) + + # If we have some data, continue + data.append( + { + "ECO Eligibility": property_meta["ECO Eligibility"], + "asset_list_row_id": property_meta["asset_list_row_id"], + **epc_record.get("prepared_epc") + } + ) + except Exception as e: + errors.append( + { + "error": str(e), + "asset_list_row_id": property_meta["asset_list_row_id"], + "matching_postcode": property_meta["matching_postcode"], + "matching_address": property_meta["matching_address"] + } + ) + + data = pd.DataFrame(data) + # We store the results in S3 as a pickle + save_pickle_to_s3( + data=data, + bucket_name="retrofit-datalake-dev", + s3_file_name=f"propensity_model_data/{ha_name}/train.pickle" + ) + + # Store the errors + if errors: + save_pickle_to_s3( + data=errors, + bucket_name="retrofit-datalake-dev", + s3_file_name=f"propensity_model_data/{ha_name}/errors.pickle" + ) + + model_data.append(data) + + return model_data + + +def conversion_model(loader): + # Read in the model data + + model_data = [] + for ha_name in loader.data.keys(): + try: + picked = read_pickle_from_s3( + bucket_name="retrofit-datalake-dev", + s3_file_name=f"propensity_model_data/{ha_name}/train.pickle" + ) + data = pd.DataFrame(picked) + + # We merge on the sales data + sales_data = loader.data[ha_name]["survey_list"].copy() + data = data.merge( + sales_data[["asset_list_row_id", "installation_status"]], + how="left", + on="asset_list_row_id" + ) + data["ha_name"] = ha_name + + except Exception as e: + logger.error("Error reading in the data for %s", ha_name) + continue + + model_data.append(data) + + model_data = pd.concat(model_data) + + model_data["response"] = model_data["installation_status"].isin( + [ + "ECO4 - in progress", + "ECO4 - installed" + ] + ).astype(int) + + # Because of how we pulled the data, we need to re-balance the sample + ha_names = model_data["ha_name"].unique() + + balanced_sample = [] + for ha_name in ha_names: + df = model_data[model_data["ha_name"] == ha_name] + positive_samples = df[df["response"] == 1] + negative_samples = df[df["response"] != 1] + + inputs = [x for k, x in loader.data.items() if k == ha_name][0] + asset_list = inputs["asset_list"].copy() + asset_list_size = asset_list.shape[0] + n_eligibile = asset_list[asset_list["ECO Eligibility"] != "not eligible"].shape[0] + success_rate = n_eligibile / asset_list_size + needed_sample_size = np.ceil(positive_samples.shape[0] / success_rate) + number_negative_samples = int(needed_sample_size - positive_samples.shape[0]) + negative_samples_subset = negative_samples.sample(number_negative_samples) + + output = pd.concat([positive_samples, negative_samples_subset]) + + balanced_sample.append(output) + + balanced_sample = pd.concat(balanced_sample) + + # We work with a small sample + # Drop the ECO Eligibility column and installation_status column + # We keep the ID column + balanced_sample = balanced_sample.drop( + columns=['ECO Eligibility', 'asset_list_row_id', 'address', 'uprn_source', 'address3', 'local_authority_label', + 'county', 'postcode', 'constituency', 'local_authority', 'inspection_date', 'address1', + 'constituency_label', 'building_reference_number', 'address2', 'posttown', 'lodgement_datetime', + 'uprn', 'lodgement_date', 'lmk_key', 'installation_status', 'ha_name'] + ) + + # POC model + df = balanced_sample.copy() + # FIll missings with means, if they exist + numeric_cols = df.select_dtypes(include=['float64', 'int64']).columns + df[numeric_cols] = df[numeric_cols].fillna(df[numeric_cols].mean()) + + categorical_cols = df.select_dtypes(include=['object', 'category']).columns + df[categorical_cols] = df[categorical_cols].fillna("other") + + # Reduce the number of categories to a specific number and the rest to other + max_n_categories = 10 + for col in categorical_cols: + top_categories = df[col].value_counts().nlargest(max_n_categories).index + df[col] = df[col].where(df[col].isin(top_categories), other="other") + + # Use a model based approach to feature selection + import xgboost as xgb + from sklearn.model_selection import train_test_split + + # Assuming your outcome column is named 'target' + X = df.drop(columns=['response']) + y = df['response'] + df["low_energy_fixed_light_count"].va + + # Encoding categorical variables if not already done + X = pd.get_dummies(X, drop_first=True) + + # Splitting the data into train and test sets + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) + + # Initialize an XGBoost classifier + model = xgb.XGBClassifier() + + # Fit the model + model.fit(X_train, y_train) + + # Get feature importances + feature_importances = model.feature_importances_ + + # Map feature importances to their corresponding column names + feature_importance_dict = {feature: importance for feature, importance in zip(X.columns, feature_importances)} + + # Sort features by importance + sorted_features = sorted(feature_importance_dict.items(), key=lambda item: item[1], reverse=True) + + # Display sorted features + for feature, importance in sorted_features: + print(f"{feature}: {importance}") + def patch_cleaned(cleaned): # Patch to handle the a missing description @@ -2054,6 +2548,218 @@ def patch_cleaned(cleaned): return cleaned +def forecast_remaining_sales(loader): + # Assumptions: + # We cap the ciga conversion rate at 75% because I expect future HAs to have a lower CIGA conversion rate + # and I don't want the numbers to change too much, depenent on the CIGA conversation rate + maximum_ciga_conversion = 0.75 + + gbis_rate = 600 + eco4_rate = 1710 + old_gbis_rate = 432 + old_eco4_rate = 1456 + + # 1) Calculate the conversion rate from passed CIGA to actual sale + converted_ciga_jobs = [] + for ha_name, input_data in loader.data.items(): + asset_list = input_data["asset_list"].copy() + survey_list = input_data["survey_list"].copy() + + if survey_list.empty: + continue + + ciga_dependent_assets = asset_list[ + asset_list["ECO Eligibility"] == "eco4 - passed ciga" + ] + + # These are now the ciga dependent assets at installation + ciga_dependent_assets_at_installation = ciga_dependent_assets.merge( + survey_list[["asset_list_row_id", "installation_status"]], + how="inner", + on="asset_list_row_id" + ) + + # We then calculate how many get cancelled + ciga_dependent_assets_sold = ciga_dependent_assets_at_installation[ + ciga_dependent_assets_at_installation["installation_status"].isin( + [ + "ECO4 - installed", "ECO4 - in progress" + ] + ) + ] + + ciga_dependent_assets_failed = ciga_dependent_assets_at_installation[ + ~ciga_dependent_assets_at_installation["installation_status"].isin( + [ + "ECO4 - installed", "ECO4 - in progress" + ] + ) + ] + + converted_ciga_jobs.append( + { + "HA Name": ha_name, + "# Ciga dependent at installation": ciga_dependent_assets_at_installation.shape[0], + "# Ciga dependent successfully installed": ciga_dependent_assets_sold.shape[0], + "# Ciga dependent failed install": ciga_dependent_assets_failed.shape[0] + } + ) + + converted_ciga_jobs = pd.DataFrame(converted_ciga_jobs) + + # We calculate a ciga pass to install conversaion rate + median_ciga_pass_to_install = ( + converted_ciga_jobs["# Ciga dependent successfully installed"].sum() / + converted_ciga_jobs["# Ciga dependent at installation"].sum() + ) + + # 2) Calculate the conversion rate from CIGA dependent ciga passed + ciga_passrates = [] + for ha_name, input_data in loader.data.items(): + + # If we don't have a ciga list, we can't do anything + if input_data["ciga_list"].empty: + continue + + # 1) Calculate the conversion rate for CIGA to actual sale + asset_list = input_data["asset_list"].copy() + + ciga_completed_assets = asset_list[ + asset_list["ECO Eligibility"].isin( + [ + "eco4 - passed ciga", + "failed ciga" + ] + ) + ] + + ciga_passed = ciga_completed_assets[ + ciga_completed_assets["ECO Eligibility"].isin( + [ + "eco4 - passed ciga" + ] + ) + ] + + ciga_passrates.append( + { + "Ha Name": ha_name, + "# CIGA dependent": ciga_completed_assets.shape[0], + "# CIGA passed": ciga_passed.shape[0], + } + ) + + ciga_passrates = pd.DataFrame(ciga_passrates) + + median_ciga_pass_to_install = ciga_passrates["# CIGA passed"].sum() / ciga_passrates["# CIGA dependent"].sum() + + # 3) Calculate the conversion rate of an ECO4 and a GBISjob, that doesn't need ciga, to install + eco4_ciga_independent_passrates = [] + gbis_ciga_independent_passrates = [] + for ha_name, input_data in loader.data.items(): + asset_list = input_data["asset_list"].copy() + survey_list = input_data["survey_list"].copy() + + if survey_list.empty: + continue + + # For properties that were identified as a typical ECO4 job, we calculate the number of properties that + # installed + # vs cancelled + + typical_eco4 = asset_list[asset_list["ECO Eligibility"] == "eco4"] + typical_gbis = asset_list[asset_list["ECO Eligibility"] == "gbis"] + + # Merge on the surveys + typical_eco4_installed = typical_eco4.merge( + survey_list[["asset_list_row_id", "installation_status"]], how="inner", on="asset_list_row_id" + ) + + if not typical_eco4_installed.empty: + typical_eco4_sold = typical_eco4_installed[ + typical_eco4_installed["installation_status"].isin( + [ + "ECO4 - installed", "ECO4 - in progress" + ] + ) + ] + + eco4_ciga_independent_passrates.append( + { + "Ha Name": ha_name, + "# ECO4 at install stage": typical_eco4_installed.shape[0], + "# ECO4 successfully installed": typical_eco4_sold.shape[0] + } + ) + + typical_gbis_installed = typical_gbis.merge( + survey_list[["asset_list_row_id", "installation_status"]], how="inner", on="asset_list_row_id" + ) + if not typical_gbis_installed.empty: + typical_gbis_sold = typical_gbis_installed[ + typical_gbis_installed["installation_status"].isin( + [ + "GBIS - in progress", "GBIS - installed" + ] + ) + ] + + gbis_ciga_independent_passrates.append( + { + "Ha Name": ha_name, + "# GBIS at install stage": typical_gbis_installed.shape[0], + "# GBIS successfully installed": typical_gbis_sold.shape[0] + } + ) + + eco4_ciga_independent_passrates = pd.DataFrame(eco4_ciga_independent_passrates) + gbis_ciga_independent_passrates = pd.DataFrame(gbis_ciga_independent_passrates) + + median_eco4_to_install = ( + eco4_ciga_independent_passrates["# ECO4 successfully installed"].sum() / + eco4_ciga_independent_passrates["# ECO4 at install stage"].sum() + ) + + median_gbis_to_install = ( + gbis_ciga_independent_passrates["# GBIS successfully installed"].sum() / + gbis_ciga_independent_passrates["# GBIS at install stage"].sum() + ) + + # Produce the final output + december_figures = loader.december_figures.copy() + december_figures = december_figures.fillna(0) + results = [] + for ha_name, input_data in loader.data.items(): + # Original warmfront figures + original_warmfront_estimates = december_figures[december_figures["HA Name"] == ha_name] + + original_warmfront_eco4 = original_warmfront_estimates["ECO4"].values[0] + original_warmfront_remaining_eco4 = original_warmfront_estimates["ECO4 remaining"].values[0] + original_warmfront_gbis = original_warmfront_estimates["GBIS"].values[0] + original_warmfront_remaining_gbis = original_warmfront_estimates["GBIS remaining"].values[0] + + original_warmfront_eco4_revenue = ( + original_warmfront_remaining_eco4 * eco4_rate + + (original_warmfront_eco4 - original_warmfront_remaining_eco4) * old_eco4_rate + ) + original_warmfront_remaining_eco4_revenue = original_warmfront_remaining_eco4 * eco4_rate + + original_warmfront_gbis_revenue = ( + original_warmfront_remaining_gbis * gbis_rate + + (original_warmfront_gbis - original_warmfront_remaining_gbis) * old_gbis_rate + ) + + results.append( + { + ("", "", "HA Name"): ha_name, + ("Original Warmfront estimate", "Total - #", "ECO4 - November"): original_warmfront_eco4, + ("", "Remaining - #", ""): original_warmfront_remaining_eco4, + ("", "Total - £", ""): original_warmfront_eco4_revenue, + ("", "Remaining - £", ""): original_warmfront_remaining_eco4_revenue, + } + ) + + def app(): """ This app contains the housin association analysis for HAs 1, 6, 14, 39 and 107. @@ -2067,11 +2773,14 @@ def app(): pull_data = False # List all of the data in the folder - directories = [str(list(entry.iterdir())[0]) for entry in DATA_FOLDER.iterdir() if entry.is_dir()] + + directories = [str(file) for entry in DATA_FOLDER.iterdir() if entry.is_dir() + for file in entry.iterdir() if file.suffix == '.xlsx'] # Grab the December HA figures filepath december_figures_filepath = "local_data/ha_data/HA_December_figures.csv" - priority_has = ["HA1", "HA6", "HA7", "HA14", "HA16", "HA24", "HA39", "HA107"] + # priority_has = ["HA1", "HA6", "HA7", "HA14", "HA16", "HA24", "HA39", "HA107"] + priority_has = ["HA1", "HA6", "HA7", "HA14", "HA16", "HA39", "HA107"] # Filter down the directories to only the priority HAs directories = [d for d in directories if d.split("/")[2] in priority_has] @@ -2103,3 +2812,17 @@ def app(): floor_area_decile_thresholds=floor_area_decile_thresholds, pull_data=pull_data ) + + analyse_ha_data(outputs, loader) + + # import pickle + # with open("ha_analysis.pickle", "wb") as f: + # pickle.dump({"outputs": outputs, "loader": loader}, f) + + # To read: + # import pickle + # with open("ha_analysis.pickle", "rb") as f: + # outputs = pickle.load(f)["outputs"] + # + # with open("loader.pickle", "rb") as f: + # loader = pickle.load(f) diff --git a/utils/s3.py b/utils/s3.py index cb55094a..8d36bdb3 100644 --- a/utils/s3.py +++ b/utils/s3.py @@ -184,7 +184,7 @@ def read_pickle_from_s3(bucket_name, s3_file_name): logger.errpr("Incomplete credentials provided.") return None except Exception as e: - logger.errpr(f'Failed to download data from {bucket_name}/{s3_file_name}: {str(e)}') + logger.error(f'Failed to download data from {bucket_name}/{s3_file_name}: {str(e)}') return None # Deserialize data from pickle format From 9e679bd3fdb6e38a263f804ffdb07dda3892e7b1 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 1 Mar 2024 16:59:22 +0000 Subject: [PATCH 039/248] working on new forecast methodology --- .../ha_15_32/ha_analysis_batch_3.py | 81 +++++++++++++++++-- 1 file changed, 73 insertions(+), 8 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index bb27029e..21af73ff 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -2728,15 +2728,22 @@ def forecast_remaining_sales(loader): # Produce the final output december_figures = loader.december_figures.copy() december_figures = december_figures.fillna(0) + # If we have negative remaining, it means that actually sold more gbis than they initially thought so we set + # remaining to 0 + december_figures["ECO4 remaining"] = np.where( + december_figures["ECO4 remaining"] < 0, 0, december_figures["ECO4 remaining"] + ) + december_figures["GBIS remaining"] = np.where( + december_figures["GBIS remaining"] < 0, 0, december_figures["GBIS remaining"] + ) + results = [] for ha_name, input_data in loader.data.items(): - # Original warmfront figures + # Original warmfront figures - ECO4 original_warmfront_estimates = december_figures[december_figures["HA Name"] == ha_name] original_warmfront_eco4 = original_warmfront_estimates["ECO4"].values[0] original_warmfront_remaining_eco4 = original_warmfront_estimates["ECO4 remaining"].values[0] - original_warmfront_gbis = original_warmfront_estimates["GBIS"].values[0] - original_warmfront_remaining_gbis = original_warmfront_estimates["GBIS remaining"].values[0] original_warmfront_eco4_revenue = ( original_warmfront_remaining_eco4 * eco4_rate + @@ -2744,21 +2751,79 @@ def forecast_remaining_sales(loader): ) original_warmfront_remaining_eco4_revenue = original_warmfront_remaining_eco4 * eco4_rate + # Original warmfront figures - GBIS + + original_warmfront_gbis = original_warmfront_estimates["GBIS"].values[0] + original_warmfront_remaining_gbis = original_warmfront_estimates["GBIS remaining"].values[0] + original_warmfront_gbis_revenue = ( original_warmfront_remaining_gbis * gbis_rate + (original_warmfront_gbis - original_warmfront_remaining_gbis) * old_gbis_rate ) + original_warmfront_remaining_gbis_revenue = original_warmfront_remaining_gbis * gbis_rate + + # Asset list + asset_list = input_data["asset_list"].copy() + survey_list = input_data["survey_list"].copy() + + asset_list_remaining = asset_list.merge( + survey_list[["asset_list_row_id", "installation_status"]], + how="left", + on="asset_list_row_id" + ) + asset_list_remaining = asset_list_remaining[pd.isnull(asset_list_remaining["installation_status"])] + + eligiblity_counts = pd.DataFrame(asset_list["ECO Eligibility"].value_counts()).reset_index() + eligiblity_counts_remaining = pd.DataFrame(asset_list_remaining["ECO Eligibility"].value_counts()).reset_index() + + eco4_pre_ciga = eligiblity_counts[ + eligiblity_counts["ECO Eligibility"].isin( + ["eco4", "eco4 (subject to ciga)", "eco4 - passed ciga", "failed ciga"] + ) + ]["count"].sum() + + eco4_pre_ciga_remaining = eligiblity_counts_remaining[ + eligiblity_counts["ECO Eligibility"].isin( + ["eco4", "eco4 (subject to ciga)", "eco4 - passed ciga", "failed ciga"] + ) + ]["count"].sum() + + eco4_pre_ciga_revenue = eco4_pre_ciga * eco4_rate + eco4_pre_ciga_remaining_revenue = eco4_pre_ciga_remaining * eco4_rate + + # We check if the property has done a CIGA check + has_ciga_check = not input_data["ciga_list"].empty + + if has_ciga_check: + eco4_post_ciga = eligiblity_counts[ + eligiblity_counts["ECO Eligibility"].isin( + ["eco4", "eco4 (subject to ciga)", "eco4 - passed ciga", "failed ciga"] + ) + ]["count"].sum() results.append( { - ("", "", "HA Name"): ha_name, - ("Original Warmfront estimate", "Total - #", "ECO4 - November"): original_warmfront_eco4, - ("", "Remaining - #", ""): original_warmfront_remaining_eco4, - ("", "Total - £", ""): original_warmfront_eco4_revenue, - ("", "Remaining - £", ""): original_warmfront_remaining_eco4_revenue, + ("", "", "", "HA Name"): ha_name, + # ECO4 - original warmfront figures + ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): original_warmfront_eco4, + ("ECO4", "", "Remaining - #", ""): original_warmfront_remaining_eco4, + ("ECO4", "", "Total - £", ""): original_warmfront_eco4_revenue, + ("ECO4", "", "Remaining - £", ""): original_warmfront_remaining_eco4_revenue, + # GBIS - original warmfront figures + ("", "Original Warmfront estimate", "Total - #", "GBIS - November"): original_warmfront_gbis, + ("GBIS", "", "Remaining - #", ""): original_warmfront_gbis, + ("GBIS", "", "Total - £", ""): original_warmfront_gbis_revenue, + ("GBIS", "", "Remaining - £", ""): original_warmfront_remaining_gbis_revenue, + # ECO4 - asset list + ("", "Warmfront post code list", "Total #", "ECO4 total (pre-ciga)"): eco4_pre_ciga, + ("ECO4", "", "Remaining - #", ""): eco4_pre_ciga_remaining, + ("ECO4", "", "Total - £", ""): eco4_pre_ciga_revenue, + ("ECO4", "", "Remaining - £", ""): eco4_pre_ciga_remaining_revenue, } ) + results = pd.DataFrame(results) + def app(): """ From a81f1f2520479e706479bada1761aaa92bb01a44 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 1 Mar 2024 17:37:57 +0000 Subject: [PATCH 040/248] Adding in eligible properties left estimation --- .../ha_15_32/ha_analysis_batch_3.py | 101 ++++++++++++------ 1 file changed, 69 insertions(+), 32 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 21af73ff..cf9dfa53 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -2613,7 +2613,7 @@ def forecast_remaining_sales(loader): converted_ciga_jobs["# Ciga dependent at installation"].sum() ) - # 2) Calculate the conversion rate from CIGA dependent ciga passed + # 2) Calculate the conversion rate from CIGA dependent to ciga passed ciga_passrates = [] for ha_name, input_data in loader.data.items(): @@ -2651,7 +2651,7 @@ def forecast_remaining_sales(loader): ciga_passrates = pd.DataFrame(ciga_passrates) - median_ciga_pass_to_install = ciga_passrates["# CIGA passed"].sum() / ciga_passrates["# CIGA dependent"].sum() + median_ciga_success_rate = ciga_passrates["# CIGA passed"].sum() / ciga_passrates["# CIGA dependent"].sum() # 3) Calculate the conversion rate of an ECO4 and a GBISjob, that doesn't need ciga, to install eco4_ciga_independent_passrates = [] @@ -2762,16 +2762,20 @@ def forecast_remaining_sales(loader): ) original_warmfront_remaining_gbis_revenue = original_warmfront_remaining_gbis * gbis_rate - # Asset list + # Asset list - ECO4 asset_list = input_data["asset_list"].copy() survey_list = input_data["survey_list"].copy() - asset_list_remaining = asset_list.merge( - survey_list[["asset_list_row_id", "installation_status"]], - how="left", - on="asset_list_row_id" - ) - asset_list_remaining = asset_list_remaining[pd.isnull(asset_list_remaining["installation_status"])] + if survey_list.empty: + asset_list_remaining = asset_list.copy() + else: + asset_list_remaining = asset_list.merge( + survey_list[["asset_list_row_id", "installation_status"]], + how="left", + on="asset_list_row_id" + ) + asset_list_remaining = asset_list_remaining[pd.isnull(asset_list_remaining["installation_status"])] + asset_list_remaining = asset_list_remaining.drop(columns=["installation_status"]) eligiblity_counts = pd.DataFrame(asset_list["ECO Eligibility"].value_counts()).reset_index() eligiblity_counts_remaining = pd.DataFrame(asset_list_remaining["ECO Eligibility"].value_counts()).reset_index() @@ -2791,36 +2795,69 @@ def forecast_remaining_sales(loader): eco4_pre_ciga_revenue = eco4_pre_ciga * eco4_rate eco4_pre_ciga_remaining_revenue = eco4_pre_ciga_remaining * eco4_rate - # We check if the property has done a CIGA check - has_ciga_check = not input_data["ciga_list"].empty + # Total Eligible - this is what passed ciga checks + strict. If we don't have what passed CIGA, we estimate + # We check if the HA has done a CIGA check. Also, if we have assets dormant at CIGA, we estimate what will + # convert + # We estimate a conversion for anything left post CIGA + ha_ciga_conversion = ciga_passrates[ciga_passrates["Ha Name"] == ha_name] + if not ha_ciga_conversion.empty: + ha_ciga_conversion_rate = ( + ha_ciga_conversion["# CIGA passed"].values[0] / ha_ciga_conversion["# CIGA dependent"].values[0] + ) + else: + ha_ciga_conversion_rate = ( + median_ciga_success_rate if median_ciga_success_rate <= median_ciga_success_rate else + median_ciga_success_rate + ) + remaining_needing_ciga_check = eligiblity_counts[ + eligiblity_counts["ECO Eligibility"] == "eco4 (subject to ciga)" + ]["count"].sum() + + has_ciga_check = not input_data["ciga_list"].empty if has_ciga_check: eco4_post_ciga = eligiblity_counts[ eligiblity_counts["ECO Eligibility"].isin( - ["eco4", "eco4 (subject to ciga)", "eco4 - passed ciga", "failed ciga"] + ["eco4", "eco4 - passed ciga", "failed ciga"] ) ]["count"].sum() - results.append( - { - ("", "", "", "HA Name"): ha_name, - # ECO4 - original warmfront figures - ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): original_warmfront_eco4, - ("ECO4", "", "Remaining - #", ""): original_warmfront_remaining_eco4, - ("ECO4", "", "Total - £", ""): original_warmfront_eco4_revenue, - ("ECO4", "", "Remaining - £", ""): original_warmfront_remaining_eco4_revenue, - # GBIS - original warmfront figures - ("", "Original Warmfront estimate", "Total - #", "GBIS - November"): original_warmfront_gbis, - ("GBIS", "", "Remaining - #", ""): original_warmfront_gbis, - ("GBIS", "", "Total - £", ""): original_warmfront_gbis_revenue, - ("GBIS", "", "Remaining - £", ""): original_warmfront_remaining_gbis_revenue, - # ECO4 - asset list - ("", "Warmfront post code list", "Total #", "ECO4 total (pre-ciga)"): eco4_pre_ciga, - ("ECO4", "", "Remaining - #", ""): eco4_pre_ciga_remaining, - ("ECO4", "", "Total - £", ""): eco4_pre_ciga_revenue, - ("ECO4", "", "Remaining - £", ""): eco4_pre_ciga_remaining_revenue, - } - ) + if remaining_needing_ciga_check > 0: + # We update the eco4 post ciga with the converted remaining + eco4_post_ciga += np.round(remaining_needing_ciga_check * ha_ciga_conversion_rate) + else: + eco4_post_ciga = eligiblity_counts[ + eligiblity_counts["ECO Eligibility"] == "eco4" + ]["count"].sum() + np.round(remaining_needing_ciga_check * ha_ciga_conversion_rate) + + eco4_post_ciga = int(eco4_post_ciga) + + to_append = { + ("", "", "", "HA Name"): ha_name, + # ECO4 - original warmfront figures + ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): original_warmfront_eco4, + ("ECO4 original", "", "Remaining - #", ""): original_warmfront_remaining_eco4, + ("ECO4 original", "", "Total - £", ""): original_warmfront_eco4_revenue, + ("ECO4 original", "", "Remaining - £", ""): original_warmfront_remaining_eco4_revenue, + # GBIS - original warmfront figures + ("", "Original Warmfront estimate", "Total - #", "GBIS - November"): original_warmfront_gbis, + ("GBIS original", "", "Remaining - #", ""): original_warmfront_gbis, + ("GBIS original", "", "Total - £", ""): original_warmfront_gbis_revenue, + ("GBIS original", "", "Remaining - £", ""): original_warmfront_remaining_gbis_revenue, + # ECO4 - asset list, pre-ciga + ("", "Warmfront post code list", "Total #", "ECO4 total (pre-ciga)"): eco4_pre_ciga, + ("ECO4 pre-ciga", "", "Remaining - #", ""): eco4_pre_ciga_remaining, + ("ECO4 pre-ciga", "", "Total - £", ""): eco4_pre_ciga_revenue, + ("ECO4 pre-ciga", "", "Remaining - £", ""): eco4_pre_ciga_remaining_revenue, + # ECO4 - asset list, post ciga + ("ECO4 post-ciga", "", "Estimated total eligible - #", ""): eco4_post_ciga, + } + + # Make sure nothing is forgotten due to duplicate multi-index keys + if len(to_append) != 14: + raise ValueError("Something went wrong") + + results.append(to_append) results = pd.DataFrame(results) From 6544adc6c3c9d811f789a0372a33a19bd32beb78 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 1 Mar 2024 17:47:51 +0000 Subject: [PATCH 041/248] Added eligibility calculations --- .../ha_15_32/ha_analysis_batch_3.py | 55 ++++++++++++------- 1 file changed, 35 insertions(+), 20 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index cf9dfa53..8a46703e 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -2548,6 +2548,33 @@ def patch_cleaned(cleaned): return cleaned +def calculate_eco4_post_ciga(eligiblity_counts, input_data, ha_ciga_conversion_rate, eco4_rate): + remaining_needing_ciga_check = eligiblity_counts[ + eligiblity_counts["ECO Eligibility"] == "eco4 (subject to ciga)" + ]["count"].sum() + + has_ciga_check = not input_data["ciga_list"].empty + if has_ciga_check: + eco4_post_ciga = eligiblity_counts[ + eligiblity_counts["ECO Eligibility"].isin( + ["eco4", "eco4 - passed ciga", "failed ciga"] + ) + ]["count"].sum() + + if remaining_needing_ciga_check > 0: + # We update the eco4 post ciga with the converted remaining + eco4_post_ciga += np.round(remaining_needing_ciga_check * ha_ciga_conversion_rate) + else: + eco4_post_ciga = ( + eligiblity_counts[eligiblity_counts["ECO Eligibility"] == "eco4"]["count"].sum() + + np.round(remaining_needing_ciga_check * ha_ciga_conversion_rate) + ) + eco4_post_ciga = int(eco4_post_ciga) + eco4_post_ciga_revenue = eco4_post_ciga * eco4_rate + + return eco4_post_ciga, eco4_post_ciga_revenue + + def forecast_remaining_sales(loader): # Assumptions: # We cap the ciga conversion rate at 75% because I expect future HAs to have a lower CIGA conversion rate @@ -2810,27 +2837,13 @@ def forecast_remaining_sales(loader): median_ciga_success_rate ) - remaining_needing_ciga_check = eligiblity_counts[ - eligiblity_counts["ECO Eligibility"] == "eco4 (subject to ciga)" - ]["count"].sum() + eco4_post_ciga, eco4_post_ciga_revenue = calculate_eco4_post_ciga( + eligiblity_counts, input_data, ha_ciga_conversion_rate, eco4_rate + ) - has_ciga_check = not input_data["ciga_list"].empty - if has_ciga_check: - eco4_post_ciga = eligiblity_counts[ - eligiblity_counts["ECO Eligibility"].isin( - ["eco4", "eco4 - passed ciga", "failed ciga"] - ) - ]["count"].sum() - - if remaining_needing_ciga_check > 0: - # We update the eco4 post ciga with the converted remaining - eco4_post_ciga += np.round(remaining_needing_ciga_check * ha_ciga_conversion_rate) - else: - eco4_post_ciga = eligiblity_counts[ - eligiblity_counts["ECO Eligibility"] == "eco4" - ]["count"].sum() + np.round(remaining_needing_ciga_check * ha_ciga_conversion_rate) - - eco4_post_ciga = int(eco4_post_ciga) + eco4_post_ciga_remaining, eco4_post_ciga_remaining_revenue = calculate_eco4_post_ciga( + eligiblity_counts_remaining, input_data, ha_ciga_conversion_rate, eco4_rate + ) to_append = { ("", "", "", "HA Name"): ha_name, @@ -2851,6 +2864,8 @@ def forecast_remaining_sales(loader): ("ECO4 pre-ciga", "", "Remaining - £", ""): eco4_pre_ciga_remaining_revenue, # ECO4 - asset list, post ciga ("ECO4 post-ciga", "", "Estimated total eligible - #", ""): eco4_post_ciga, + ("ECO4 post-ciga", "", "Estimated remaining eligible - #", ""): eco4_post_ciga_remaining, + ("ECO4 post-ciga", "", "Estimated total eligible - £", ""): eco4_post_ciga_revenue, } # Make sure nothing is forgotten due to duplicate multi-index keys From 5c686f5ec471b3c5c84b307e0851e2a0462934c0 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 1 Mar 2024 17:56:45 +0000 Subject: [PATCH 042/248] working on forecast --- etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 8a46703e..0bf34e70 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -2841,6 +2841,9 @@ def forecast_remaining_sales(loader): eligiblity_counts, input_data, ha_ciga_conversion_rate, eco4_rate ) + # Calculate the delta compared to Warmfront's original estimate + eco4_delta_vs_original_estimate = 200 * (eco4_post_ciga - original_warmfront_eco4) / original_warmfront_eco4 + eco4_post_ciga_remaining, eco4_post_ciga_remaining_revenue = calculate_eco4_post_ciga( eligiblity_counts_remaining, input_data, ha_ciga_conversion_rate, eco4_rate ) @@ -2862,14 +2865,17 @@ def forecast_remaining_sales(loader): ("ECO4 pre-ciga", "", "Remaining - #", ""): eco4_pre_ciga_remaining, ("ECO4 pre-ciga", "", "Total - £", ""): eco4_pre_ciga_revenue, ("ECO4 pre-ciga", "", "Remaining - £", ""): eco4_pre_ciga_remaining_revenue, - # ECO4 - asset list, post ciga + # ECO4 - asset list, post ciga, total ("ECO4 post-ciga", "", "Estimated total eligible - #", ""): eco4_post_ciga, - ("ECO4 post-ciga", "", "Estimated remaining eligible - #", ""): eco4_post_ciga_remaining, ("ECO4 post-ciga", "", "Estimated total eligible - £", ""): eco4_post_ciga_revenue, + ("ECO4 post-ciga", "", "Delta vs original estimate", ""): eco4_delta_vs_original_estimate, + # ECO4 - asset list, post ciga, remaining + ("ECO4 post-ciga", "", "Estimated remaining eligible - #", ""): eco4_post_ciga_remaining, + ("ECO4 post-ciga", "", "Estimated remaining total eligible - £", ""): eco4_post_ciga_remaining_revenue, } # Make sure nothing is forgotten due to duplicate multi-index keys - if len(to_append) != 14: + if len(to_append) != 18: raise ValueError("Something went wrong") results.append(to_append) From c47af474b92282a1159c2866e8810e8e883db7bd Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 1 Mar 2024 18:13:11 +0000 Subject: [PATCH 043/248] Added in remaining breakdowns into forecast and confirmed --- .../ha_15_32/ha_analysis_batch_3.py | 59 ++++++++++++++----- 1 file changed, 45 insertions(+), 14 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 0bf34e70..77c18e80 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -2555,24 +2555,40 @@ def calculate_eco4_post_ciga(eligiblity_counts, input_data, ha_ciga_conversion_r has_ciga_check = not input_data["ciga_list"].empty if has_ciga_check: - eco4_post_ciga = eligiblity_counts[ + eco4_confirmed = eligiblity_counts[ eligiblity_counts["ECO Eligibility"].isin( - ["eco4", "eco4 - passed ciga", "failed ciga"] + ["eco4", "eco4 - passed ciga"] ) ]["count"].sum() if remaining_needing_ciga_check > 0: # We update the eco4 post ciga with the converted remaining - eco4_post_ciga += np.round(remaining_needing_ciga_check * ha_ciga_conversion_rate) + eco4_remaining_forecast = np.round(remaining_needing_ciga_check * ha_ciga_conversion_rate) + eco4_post_ciga = eco4_confirmed + eco4_remaining_forecast + else: + eco4_remaining_forecast = 0 + eco4_post_ciga = eco4_confirmed else: + eco4_confirmed = 0 + eco4_remaining_forecast = np.round(remaining_needing_ciga_check * ha_ciga_conversion_rate) eco4_post_ciga = ( - eligiblity_counts[eligiblity_counts["ECO Eligibility"] == "eco4"]["count"].sum() + - np.round(remaining_needing_ciga_check * ha_ciga_conversion_rate) + eligiblity_counts[eligiblity_counts["ECO Eligibility"] == "eco4"]["count"].sum() + eco4_remaining_forecast ) eco4_post_ciga = int(eco4_post_ciga) - eco4_post_ciga_revenue = eco4_post_ciga * eco4_rate + eco4_remaining_forecast = int(eco4_remaining_forecast) - return eco4_post_ciga, eco4_post_ciga_revenue + results = { + # Counts + "ECO4 - post CIGA - #": eco4_post_ciga, + "Of which confirmed - #": eco4_confirmed, + "Of which forecast - #": eco4_remaining_forecast, + # Revenue + "ECO4 - post CIGA - £": eco4_post_ciga * eco4_rate, + "Of which confirmed - £": eco4_confirmed * eco4_rate, + "Of which forecast - £": eco4_remaining_forecast * eco4_rate, + } + + return results def forecast_remaining_sales(loader): @@ -2837,14 +2853,16 @@ def forecast_remaining_sales(loader): median_ciga_success_rate ) - eco4_post_ciga, eco4_post_ciga_revenue = calculate_eco4_post_ciga( + eco4_post_ciga_total_results = calculate_eco4_post_ciga( eligiblity_counts, input_data, ha_ciga_conversion_rate, eco4_rate ) # Calculate the delta compared to Warmfront's original estimate - eco4_delta_vs_original_estimate = 200 * (eco4_post_ciga - original_warmfront_eco4) / original_warmfront_eco4 + eco4_delta_vs_original_estimate = 100 * ( + eco4_post_ciga_total_results["ECO4 - post CIGA - #"] - original_warmfront_eco4 + ) / original_warmfront_eco4 - eco4_post_ciga_remaining, eco4_post_ciga_remaining_revenue = calculate_eco4_post_ciga( + eco4_post_ciga_remaining_results = calculate_eco4_post_ciga( eligiblity_counts_remaining, input_data, ha_ciga_conversion_rate, eco4_rate ) @@ -2866,12 +2884,25 @@ def forecast_remaining_sales(loader): ("ECO4 pre-ciga", "", "Total - £", ""): eco4_pre_ciga_revenue, ("ECO4 pre-ciga", "", "Remaining - £", ""): eco4_pre_ciga_remaining_revenue, # ECO4 - asset list, post ciga, total - ("ECO4 post-ciga", "", "Estimated total eligible - #", ""): eco4_post_ciga, - ("ECO4 post-ciga", "", "Estimated total eligible - £", ""): eco4_post_ciga_revenue, + ("ECO4 post-ciga", "", "Estimated total eligible - #", ""): eco4_post_ciga_total_results[ + "ECO4 - post CIGA - #"], + ("ECO4 post-ciga", "", "Estimated total eligible - £", ""): eco4_post_ciga_total_results[ + "ECO4 - post CIGA - £"], ("ECO4 post-ciga", "", "Delta vs original estimate", ""): eco4_delta_vs_original_estimate, # ECO4 - asset list, post ciga, remaining - ("ECO4 post-ciga", "", "Estimated remaining eligible - #", ""): eco4_post_ciga_remaining, - ("ECO4 post-ciga", "", "Estimated remaining total eligible - £", ""): eco4_post_ciga_remaining_revenue, + ("ECO4 post-ciga", "", "Estimated remaining eligible - #", ""): eco4_post_ciga_remaining_results[ + "ECO4 - post CIGA - #"], + ("ECO4 post-ciga", "", "Estimated remaining total eligible - £", ""): eco4_post_ciga_remaining_results[ + "ECO4 - post CIGA - £"], + ("ECO4 post-ciga", "", "Of which - confirmed (post CIGA or no CIGA required) - #", ""): + eco4_post_ciga_remaining_results["Of which confirmed - #"], + ("ECO4 post-ciga", "", "Of which - confirmed (post CIGA or no CIGA required) - £", ""): + eco4_post_ciga_remaining_results["Of which confirmed - £"], + ("ECO4 post-ciga", "", "Of which forecast - #", ""): + eco4_post_ciga_remaining_results["Of which forecast - #"], + ("ECO4 post-ciga", "", "Of which forecast - £", ""): + eco4_post_ciga_remaining_results["Of which forecast - £"], + # CIGA failures } # Make sure nothing is forgotten due to duplicate multi-index keys From 752f0b0f8384a1082161abf31c18638864c45f1e Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 1 Mar 2024 18:37:47 +0000 Subject: [PATCH 044/248] splitting out post ciga figures --- .../ha_15_32/ha_analysis_batch_3.py | 71 +++++++++++++++---- 1 file changed, 59 insertions(+), 12 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 77c18e80..4f33bf34 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -2548,34 +2548,52 @@ def patch_cleaned(cleaned): return cleaned -def calculate_eco4_post_ciga(eligiblity_counts, input_data, ha_ciga_conversion_rate, eco4_rate): +def calculate_eco4_post_ciga( + eligiblity_counts, input_data, ha_ciga_conversion_rate, ha_ciga_pass_to_sale_rate, ha_eco4_to_sale_rate, + eco4_rate +): remaining_needing_ciga_check = eligiblity_counts[ eligiblity_counts["ECO Eligibility"] == "eco4 (subject to ciga)" ]["count"].sum() has_ciga_check = not input_data["ciga_list"].empty if has_ciga_check: - eco4_confirmed = eligiblity_counts[ - eligiblity_counts["ECO Eligibility"].isin( - ["eco4", "eco4 - passed ciga"] - ) - ]["count"].sum() + + eco4_no_ciga_needed = eligiblity_counts[ + eligiblity_counts["ECO Eligibility"] == "eco4" + ]["count"].sum() + + eco4_ciga_passed = eligiblity_counts[ + eligiblity_counts["ECO Eligibility"] == "eco4 - passed ciga" + ]["count"].sum() + + eco4_confirmed = (eco4_no_ciga_needed * ha_eco4_to_sale_rate) + (eco4_ciga_passed * ha_ciga_pass_to_sale_rate) + eco4_confirmed = np.round(eco4_confirmed) if remaining_needing_ciga_check > 0: # We update the eco4 post ciga with the converted remaining - eco4_remaining_forecast = np.round(remaining_needing_ciga_check * ha_ciga_conversion_rate) + eco4_remaining_forecast = np.round( + remaining_needing_ciga_check * ha_ciga_conversion_rate * ha_ciga_pass_to_sale_rate + ) eco4_post_ciga = eco4_confirmed + eco4_remaining_forecast else: eco4_remaining_forecast = 0 eco4_post_ciga = eco4_confirmed else: - eco4_confirmed = 0 - eco4_remaining_forecast = np.round(remaining_needing_ciga_check * ha_ciga_conversion_rate) + eco4_no_ciga_needed = eligiblity_counts[ + eligiblity_counts["ECO Eligibility"] == "eco4" + ]["count"].sum() + eco4_confirmed = np.round(eco4_no_ciga_needed * ha_eco4_to_sale_rate) + eco4_remaining_forecast = np.round( + remaining_needing_ciga_check * ha_ciga_conversion_rate * ha_ciga_pass_to_sale_rate + ) eco4_post_ciga = ( eligiblity_counts[eligiblity_counts["ECO Eligibility"] == "eco4"]["count"].sum() + eco4_remaining_forecast ) + eco4_post_ciga = int(eco4_post_ciga) eco4_remaining_forecast = int(eco4_remaining_forecast) + eco4_confirmed = int(eco4_confirmed) results = { # Counts @@ -2853,8 +2871,32 @@ def forecast_remaining_sales(loader): median_ciga_success_rate ) + # We also need the ha ciga passed to install success rate + ha_ciga_pass_to_sale = converted_ciga_jobs[converted_ciga_jobs["HA Name"] == ha_name] + if not ha_ciga_pass_to_sale.empty: + ha_ciga_pass_to_sale_rate = ( + ha_ciga_pass_to_sale["# Ciga dependent successfully installed"].values[0] / + ha_ciga_pass_to_sale["# Ciga dependent at installation"].values[0] + ) + else: + ha_ciga_pass_to_sale_rate = median_ciga_pass_to_install + + ha_eco4_to_sale = eco4_ciga_independent_passrates[eco4_ciga_independent_passrates["Ha Name"] == ha_name] + if not ha_eco4_to_sale.empty: + ha_eco4_to_sale_rate = ( + ha_eco4_to_sale['# ECO4 successfully installed'].values[0] / + ha_eco4_to_sale['# ECO4 at install stage'].values[0] + ) + else: + ha_eco4_to_sale_rate = median_eco4_to_install + eco4_post_ciga_total_results = calculate_eco4_post_ciga( - eligiblity_counts, input_data, ha_ciga_conversion_rate, eco4_rate + eligiblity_counts=eligiblity_counts, + input_data=input_data, + ha_ciga_conversion_rate=ha_ciga_conversion_rate, + ha_ciga_pass_to_sale_rate=ha_ciga_pass_to_sale_rate, + ha_eco4_to_sale_rate=ha_eco4_to_sale_rate, + eco4_rate=eco4_rate ) # Calculate the delta compared to Warmfront's original estimate @@ -2863,7 +2905,12 @@ def forecast_remaining_sales(loader): ) / original_warmfront_eco4 eco4_post_ciga_remaining_results = calculate_eco4_post_ciga( - eligiblity_counts_remaining, input_data, ha_ciga_conversion_rate, eco4_rate + eligiblity_counts=eligiblity_counts_remaining, + input_data=input_data, + ha_ciga_conversion_rate=ha_ciga_conversion_rate, + ha_ciga_pass_to_sale_rate=ha_ciga_pass_to_sale_rate, + ha_eco4_to_sale_rate=ha_eco4_to_sale_rate, + eco4_rate=eco4_rate ) to_append = { @@ -2906,7 +2953,7 @@ def forecast_remaining_sales(loader): } # Make sure nothing is forgotten due to duplicate multi-index keys - if len(to_append) != 18: + if len(to_append) != 22: raise ValueError("Something went wrong") results.append(to_append) From 56ee7224f58e7363a1732ed46aaebd29a71f7acd Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 1 Mar 2024 19:53:28 +0000 Subject: [PATCH 045/248] Added gbis remaining columns --- .../ha_15_32/ha_analysis_batch_3.py | 1100 +++++++++-------- 1 file changed, 592 insertions(+), 508 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 4f33bf34..191ca74c 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -1692,500 +1692,500 @@ def get_col_widths(dataframe): return widths -def analyse_ha_data(outputs, loader): - """ - The approach we take within this function is the following: - For properties that have been identified by warmfront as eligible properties, characterise them by scheme. The - characterisation can be broken down as the following: - 1) The property has been identified by Warmfront and is eligible for ECO4/GBIS work, under the strictest criteria - 2) The property has been identified by Warmfront, however it has a full cavity, and therefore would be subject to - a CIGA check - 3) The property has been identified by Warmfront, but the EPC shows that the property has more than 100mm loft - insulation - 4) The property has been identified by Warmfront, but doesn't look like a property that would likely qualify under - any cirsumstances, given the available data - - Then, for any property that has NOT been identifid by Warmfront, we identify properties that look like they would - qualify under the strictest criteria, and mark these as potential additional opportunities. - - :return: - """ - - eco4_rate = 1710 - gbis_rate = 600 - old_eco4_rate = 1456 - old_gbis_rate = 432 - - epc_c_threshold = 80 - scheme_map = { - "ECO4": "ECO4", - "AFFORDABLE WARMTH": "ECO4", - "ECO4 A/W": "ECO4", - "ECO4 GBIS (ECO+)": "GBIS" - } - - ha_analysis_results = [] - total_revenue_results = [] - for ha_name, datasets in outputs.items(): - inputs = [x for k, x in loader.data.items() if k == ha_name][0] - - results_df = datasets["results_df"].copy() - - analysis_data = inputs["asset_list"][['asset_list_row_id', "ECO Eligibility"]].rename( - columns={"row_meaning": "asset_identification_status"} - ).merge( - results_df, - how="left", - right_on="row_id", - left_on="asset_list_row_id" - ) - - analysis_data["is_remaining"] = True - - n_sold_eco4 = 0 - n_sold_gbis = 0 - if not inputs["survey_list"].empty: - # Merge on the survey list and signal everything that is remaining or not (i.e. anything that hasn't had - # a survey) - survey_list = inputs["survey_list"].copy() - - # TODO: TEMP - scheme_column = survey_list.columns[0] - # We clean up the survey list installation or cancelled - survey_list["installed_or_cancelled_clean"] = survey_list["INSTALLED OR CANCELLED"].str.lower() - # Remove all punctuation - survey_list["installed_or_cancelled_clean"] = survey_list["installed_or_cancelled_clean"].str.replace( - r'[^\w\s]', '', regex=True - ) - # Remove double spaces - survey_list["installed_or_cancelled_clean"] = survey_list["installed_or_cancelled_clean"].str.replace( - r'\s+', ' ', regex=True - ) - # Remove trailing spaces - survey_list["installed_or_cancelled_clean"] = survey_list["installed_or_cancelled_clean"].str.strip() - - # Remap the values in the scheme column - survey_list[scheme_column] = survey_list[scheme_column].replace(scheme_map) - - survey_list["installation_status"] = None - survey_list["installation_status"] = np.where( - survey_list["installed_or_cancelled_clean"].isin(["installed", "installed see notes"]), - "installed", - survey_list["installation_status"] - ) - survey_list["installation_status"] = np.where( - survey_list["installed_or_cancelled_clean"].isin(["cancelled"]), - "cancelled", - survey_list["installation_status"] - ) - # Find partial installations - survey_list["installation_status"] = np.where( - survey_list["installed_or_cancelled_clean"].str.contains("still to be installed"), - "partially installed", - survey_list["installation_status"] - ) - # Find partial cancellations - # TODO: We might have more indications of partial cancellations - survey_list["installation_status"] = np.where( - survey_list["installed_or_cancelled_clean"].isin(["loft cancelled"]), - "partially cancelled", - survey_list["installation_status"] - ) - - # Finally, for other cases, we set the status to "in progress" - survey_list["installation_status"] = survey_list["installation_status"].fillna("in progress") - - # We concatenate the scheme name with the installation status - survey_list["installation_status"] = ( - survey_list[scheme_column] + " - " + survey_list["installation_status"] - ) - - # TODO: END TEMP - - survey_list_to_merge = survey_list[["asset_list_row_id", scheme_column]].copy() - survey_list_to_merge["is_remaining"] = False - analysis_data = analysis_data.drop(columns="is_remaining").merge( - survey_list_to_merge, - how="left", on="asset_list_row_id" - ) - analysis_data["is_remaining"] = analysis_data["is_remaining"].fillna(True) - - n_sold_eco4 = survey_list_to_merge[survey_list_to_merge[scheme_column] == "ECO4"].shape[0] - n_sold_gbis = survey_list_to_merge[survey_list_to_merge[scheme_column] == "GBIS"].shape[0] - - # Take just remaining - analysis_data = analysis_data[analysis_data["is_remaining"]] - - # Also, if the HA has started selling, we remove any that are still subject to ciga - n_eco4_missed_subject_to_ciga = 0 - if not inputs["survey_list"].empty: - n_eco4_missed_subject_to_ciga = (analysis_data["ECO Eligibility"] == "eco4 (subject to ciga)").sum() - analysis_data = analysis_data[analysis_data["ECO Eligibility"] != "eco4 (subject to ciga)"] - - ################################################################################################ - # We take the properties that strictly qualified under eco - ################################################################################################ - - eco4_identified = analysis_data[analysis_data["ECO Eligibility"] == "eco4"].copy() - eco4_identified["identification_type"] = None - eco4_identified["identification_type"] = np.where( - (eco4_identified["eco4_eligible"] == True) & (eco4_identified["eco4_strict"] == True), - "strict", - eco4_identified["identification_type"] - ) - - # For expansive, the property can be no higher than an EPC C - eco4_identified["identification_type"] = np.where( - (eco4_identified["eco4_eligible"] == True) & (eco4_identified["eco4_strict"] == False) & ( - eco4_identified["sap"] <= epc_c_threshold - ), - "expansive", - eco4_identified["identification_type"] - ) - ################################################################################################ - # We take the properties dependent on CIGA - ################################################################################################ - - ciga_dependent_identified = analysis_data[ - analysis_data["ECO Eligibility"].isin( - [ - "eco4 (subject to ciga)", - "eco4 - passed ciga" - ] - ) - ].copy() - - # These are properties that show filled cavity - ciga_dependent_identified["identification_type"] = None - ciga_dependent_identified["identification_type"] = np.where( - ciga_dependent_identified["eco4_message"].isin( - [ - "Perfect suitability", - "Meets cavity and sap", - "Fails cavity, meets loft, fails SAP", - "Meets fabric, fails SAP check", - "Meets cavity, loft borderline, meets sap", - ] - ) & (ciga_dependent_identified["sap"] <= epc_c_threshold), - "strict", - ciga_dependent_identified["identification_type"] - ) - - ciga_dependent_identified["identification_type"] = np.where( - ((ciga_dependent_identified["eco4_message"].isin(["Meets just cavity"])) | ( - ciga_dependent_identified["walls"].isin(["Cavity wall, filled cavity"]) - )) & ( - (ciga_dependent_identified["sap"] <= epc_c_threshold) & - pd.isnull(ciga_dependent_identified["identification_type"]) - ), - "expansive", - ciga_dependent_identified["identification_type"] - ) - - ################################################################################################ - # We properties that qualified for gbis - ################################################################################################ - gbis_identified = analysis_data[analysis_data["ECO Eligibility"] == "gbis"].copy() - gbis_identified["identification_type"] = None - gbis_identified["identification_type"] = np.where( - (gbis_identified["gbis_eligible"] == True) & (gbis_identified["sap"] < 69), - "strict", - gbis_identified["identification_type"] - ) - - gbis_identified["identification_type"] = np.where( - (gbis_identified["gbis_eligible"] == True) & (gbis_identified["sap"] <= epc_c_threshold) & ( - pd.isnull(gbis_identified["identification_type"]) - ), - "expansive", - gbis_identified["identification_type"] - ) - - # Finally, we look at the properties that have not been identified by Warmfront - not_identified = analysis_data[ - analysis_data["ECO Eligibility"].isin( - [ - "not eligible" - ] - ) - ].copy() - - surplus_eco4 = not_identified[ - (not_identified["eco4_eligible"] == True) & (not_identified["eco4_message"].isin( - ["Perfect suitability", "Meets cavity, loft borderline, meets sap", "Near perfect suitability"] - )) - ] - - surplus_gbis = not_identified[ - (not_identified["gbis_eligible"] == True) & ( - ~not_identified["asset_list_row_id"].isin(surplus_eco4["asset_list_row_id"].values) - ) & (not_identified["sap"] < 69) & ( - (not_identified["cavity_type"].isin(["empty", "partial insulation"])) | ( - not_identified["walls"].str.contains("partial", case=False, na=False) - ) - ) - ] - surplus_gbis = surplus_gbis[surplus_gbis["is_estimated"] == False] - - # Output variables - the data was sent to us in December, but the remaining figures are - # what was in November - november_remaining = loader.december_figures[loader.december_figures["HA Name"] == ha_name] - - # ECO4 - n_properties_remaining_in_asset_list = inputs["asset_list"].shape[0] - november_eco4_remaining = max(november_remaining["ECO4 remaining"].values[0], 0) - november_eco4_sold = november_remaining["No. of Tech surveys complete - Eco 4"].values[0] - eco4_sales_since_november = n_sold_eco4 - november_eco4_sold - - n_warmfront_identified_eco4 = eco4_identified.shape[0] + ciga_dependent_identified.shape[0] - eco4_of_which_identified_strict = ( - eco4_identified[eco4_identified["identification_type"] == "strict"].shape[0] + - ciga_dependent_identified[ciga_dependent_identified["identification_type"] == "strict"].shape[0] - ) - eco4_of_which_identified_expansive = ( - eco4_identified[eco4_identified["identification_type"] == "expansive"].shape[0] + - ciga_dependent_identified[ciga_dependent_identified["identification_type"] == "expansive"].shape[0] - ) - # GBIS - n_warmfront_identified_gbis = gbis_identified.shape[0] - november_gbis_remaining = max(november_remaining["GBIS remaining"].values[0], 0) - november_gbis_sold = november_remaining["No. of Tech surveys complete - GBIS"].values[0] - gbis_sales_since_november = n_sold_gbis - november_gbis_sold - gbis_of_which_identified_strict = gbis_identified[gbis_identified["identification_type"] == "strict"].shape[0] - gbis_of_which_identified_expansive = \ - gbis_identified[gbis_identified["identification_type"] == "expansive"].shape[0] - - to_append = { - ("", "HA Name"): ha_name, - ("", "# properties in asset list"): n_properties_remaining_in_asset_list, - ############ - # ECO4 - ############ - ("ECO4", "# remaining November file"): november_eco4_remaining, - ("ECO4", "# sold in November file"): november_eco4_sold, - ("ECO4", "# sold (survey list)"): n_sold_eco4, - ("ECO4", "# that missed CIGA check"): n_eco4_missed_subject_to_ciga, - ("ECO4", "# Remaining properties (asset list)"): n_warmfront_identified_eco4, - ("ECO4", "Of which identified by model - strict"): eco4_of_which_identified_strict, - ("ECO4", "Of which identified by model - expansive"): eco4_of_which_identified_expansive, - ("ECO4", "Of which identified by model - total"): ( - eco4_of_which_identified_strict + eco4_of_which_identified_expansive - ), - ("ECO4", "Additional properties"): surplus_eco4.shape[0], - ############ - # GBIS - ############ - ("GBIS", "# remaining November file"): november_gbis_remaining, - ("GBIS", "# sold in November file"): november_gbis_sold, - ("GBIS", "# sold (survey list)"): n_sold_gbis, - ("GBIS", "# Remaining properties (asset list)"): n_warmfront_identified_gbis, - ("GBIS", "Of which identified by model - strict"): gbis_of_which_identified_strict, - ("GBIS", "Of which identified by model - expansive"): gbis_of_which_identified_expansive, - ("GBIS", "Of which identified by model - total"): ( - gbis_of_which_identified_strict + gbis_of_which_identified_expansive - ), - ("GBIS", "Additional properties"): surplus_gbis.shape[0] - } - - ha_analysis_results.append(to_append) - - # Calculate the revenue results - to_append_revenue = { - ("", "HA Name"): ha_name, - # Eco4 revenue - ("ECO4", "£ remaining November file"): november_eco4_remaining * eco4_rate, - ("ECO4", "£ sold November file"): november_eco4_sold * old_eco4_rate, - ("ECO4", "£ sold since November"): eco4_sales_since_november * eco4_rate, - ("ECO4", "£ stuck at ciga check"): n_eco4_missed_subject_to_ciga * eco4_rate, - ("ECO4", "£ remaining (asset list)"): n_warmfront_identified_eco4 * eco4_rate, - ("ECO4", "Of which identified by model - strict"): eco4_of_which_identified_strict * eco4_rate, - ("ECO4", "Of which identified by model - expansive"): eco4_of_which_identified_expansive * eco4_rate, - ("ECO4", "Of which identified by model - total"): eco4_rate * ( - eco4_of_which_identified_strict + eco4_of_which_identified_expansive - ), - ("ECO4", "Additional properties"): eco4_rate * surplus_eco4.shape[0], - } - total_revenue_results.append(to_append_revenue) - - ha_analysis_results = pd.DataFrame(ha_analysis_results) - ha_analysis_results.columns = pd.MultiIndex.from_tuples(ha_analysis_results.columns) - - facts_and_figures = loader.facts_and_figures.copy() - facts_and_figures["ha_number"] = facts_and_figures["HA Name"].str.extract(r'(\d+)').astype(int) - facts_and_figures = facts_and_figures.sort_values("ha_number") - facts_and_figures = facts_and_figures.drop(columns=["ha_number"]) - - # Rename some of the cols - facts_and_figures = facts_and_figures.rename( - columns={ - # ECO4 cols - "ECO4": "ECO4 - November", - "GBIS": "GBIS - November", - "eco4 (subject to ciga)": "ECO4 - subject to ciga", - "eco4": "ECO4 - doesn't need CIGA", - "eco4 - passed ciga": "ECO4 - passed CIGA", - "failed ciga": "ECO4 - failed CIGA", - "ECO4 - partially cancelled": "ECO4 - Install downgrade to GBIS", - "ECO4 - in progress": "ECO4 - Install in progress", - "ECO4 - cancelled": "ECO4 - Install cancelled", - # GBIS cols - "gbis": "GBIS total (asset list)" - } - ) - # We calculate the eco4 total from the asset list - # 1) If ciga checks have been completed (i.e. ECO4 - passed ciga > 0) this sum is - # ECO4 - doesn't need CIGA + ECO4 - passed CIGA - # 2) if ciga checks haven't been completed (i.e. ECO4 - passed ciga is missing), this sum is - # ECO4 - doesn't need CIGA + ECO4 - subject to ciga - facts_and_figures["ECO4 total (asset list - pre ciga)"] = ( - facts_and_figures["ECO4 - doesn't need CIGA"] + - facts_and_figures["ECO4 - subject to ciga"] + - facts_and_figures["ECO4 - passed CIGA"] - ) - - facts_and_figures["ECO4 total (asset list - post ciga)"] = None - facts_and_figures["ECO4 total (asset list - post ciga)"] = np.where( - facts_and_figures["ECO4 - passed CIGA"] > 0, - facts_and_figures["ECO4 - doesn't need CIGA"] + facts_and_figures["ECO4 - passed CIGA"], - facts_and_figures["ECO4 total (asset list - post ciga)"] - ) - - # Re-arrange the columns - facts_and_figures = facts_and_figures[ - [ - 'HA Name', - 'ECO4 - November', - 'GBIS - November', - 'ECO4 total (asset list - pre ciga)', - 'ECO4 total (asset list - post ciga)', - 'GBIS total (asset list)', - 'ECO4 - subject to ciga', - "ECO4 - doesn't need CIGA", - 'ECO4 - passed CIGA', - 'ECO4 - failed CIGA', - 'ECO4 - installed', - 'ECO4 - Install in progress', - 'ECO4 - Install cancelled', - 'ECO4 - partially installed', - 'ECO4 - Install downgrade to GBIS', - ] - ] - # Addd a note to flag any rows where ECO4 ( - # subject to ciga is greater than 0) and (ECO4 - passed ciga is greater than 0 - # ) - facts_and_figures["Missed CIGA checks opportunity"] = None - facts_and_figures["Missed CIGA checks opportunity"] = np.where( - (facts_and_figures["ECO4 - subject to ciga"] > 0) & (facts_and_figures["ECO4 - passed CIGA"] > 0), - "potential opportunity of " + facts_and_figures["ECO4 - subject to ciga"].astype( - str) + " ECO4 properties needing a CIGA check", - facts_and_figures["Missed CIGA checks opportunity"] - ) - - facts_and_figures.to_csv("Facts and figures sample.csv") - - # Re arrage the columns - - # Also sort ha_analysis_results by ha number - ha_analysis_results["ha_number"] = ha_analysis_results[("", "HA Name")].str.extract(r'(\d+)').astype(int) - ha_analysis_results = ha_analysis_results.sort_values("ha_number") - ha_analysis_results = ha_analysis_results.drop(columns=["ha_number"]) - - # We save 2 sheets - # Automate creation of the excel - # Create a Pandas Excel writer using XlsxWriter as the engine - with pd.ExcelWriter('HA Analysis Results.xlsx', engine='xlsxwriter') as writer: - # Write each dataframe to a different worksheet without the index - for df, sheet in [(facts_and_figures, 'HA Facts and Figures'), - (ha_analysis_results, 'Asset Identification')]: - - df.to_excel(writer, sheet_name=sheet) - - # Auto-adjust columns' width - for i, width in enumerate(get_col_widths(df)): - writer.sheets[sheet].set_column(i, i, width) - - # Inspection: - Looking into the proportion of homes with "cavity, as built, insulated (assumed)" as their - # description, and what proportion of time they get identified via non-invasive surveys - - # true_eco4_assets = [] - # ciga_dependent_assets = [] - # not_eligible = [] - # as_built_insulated = [] - # date_cols = { - # "HA39": "date_built", - # "HA14": "Built In Year", - # "HA6": "Construction Year", - # "HA1": "Build Date", - # "HA107": "YEAR BUILT" - # } - # for ha_name, data_objects in outputs.items(): - # inputs = [x for k, x in loader.data.items() if k == ha_name][0] - # - # date_col = date_cols[ha_name] - # results_df = data_objects["results_df"].copy() - # df = inputs["asset_list"][['asset_list_row_id', "ECO Eligibility", date_col]].rename( - # columns={"row_meaning": "asset_identification_status", date_col: "date_built"} - # ).merge( - # results_df, - # how="left", - # right_on="row_id", - # left_on="asset_list_row_id" - # ) - # - # # take the true ECO4 - # true_eco4 = df[df["ECO Eligibility"] == "eco4"].copy() - # ciga_dependent = df[ - # df["ECO Eligibility"].isin( - # [ - # "eco4 (subject to ciga)", - # "failed ciga", - # "eco4 - passed ciga" - # ] - # ) - # ] - # insulated_assumed = df[df["walls"] == "Cavity wall, as built, insulated"].copy() - # # We convert date built to datetime - # try: - # insulated_assumed = insulated_assumed[~pd.isnull(insulated_assumed["date_built"])] - # insulated_assumed["year_built"] = pd.to_datetime(insulated_assumed["date_built"].astype(str)).dt.year - # as_built_insulated.append(insulated_assumed) - # except Exception as e: - # print("oh well") - # - # true_eco4_assets.append(true_eco4) - # ciga_dependent_assets.append(ciga_dependent) - # - # true_eco4_assets = pd.concat(true_eco4_assets) - # ciga_dependent_assets = pd.concat(ciga_dependent_assets) - # as_built_insulated = pd.concat(as_built_insulated) - # - # true_eco4_assets["walls"].value_counts(normalize=True) - # ciga_dependent_assets["walls"].value_counts(normalize=True) - # - # from recommendations.recommendation_utils import extract_insulation_thickness - # - # true_eco4_assets["roof_insulation_thickness"] = true_eco4_assets["roof"].apply( - # lambda x: extract_insulation_thickness(x) - # ) - # - # true_eco4_assets["e"] = true_eco4_assets.merge( - # pd.DataFrame(cleaned["roof-description"])[["original_description", "insulation_thickness"]], - # how="left", - # left_on="roof", - # right_on="original_description" - # ) - # - # true_eco4_assets["sap"].mean() - # - # true_eco4_assets["insulation_thickness"].isin( - # ["250", "150", "200", "100", "75", "50"] - # ).sum() / true_eco4_assets.shape[0] - # - # true_eco4_assets["insulation_thickness"].isin( - # ["100"] - # ).sum() / true_eco4_assets.shape[0] - # - # as_built_insulated.groupby("property_type")["ECO Eligibility"].value_counts(normalize=True) +# def analyse_ha_data(outputs, loader): +# """ +# The approach we take within this function is the following: +# For properties that have been identified by warmfront as eligible properties, characterise them by scheme. The +# characterisation can be broken down as the following: +# 1) The property has been identified by Warmfront and is eligible for ECO4/GBIS work, under the strictest criteria +# 2) The property has been identified by Warmfront, however it has a full cavity, and therefore would be subject to +# a CIGA check +# 3) The property has been identified by Warmfront, but the EPC shows that the property has more than 100mm loft +# insulation +# 4) The property has been identified by Warmfront, but doesn't look like a property that would likely qualify under +# any cirsumstances, given the available data +# +# Then, for any property that has NOT been identifid by Warmfront, we identify properties that look like they would +# qualify under the strictest criteria, and mark these as potential additional opportunities. +# +# :return: +# """ +# +# eco4_rate = 1710 +# gbis_rate = 600 +# # old_eco4_rate = 1456 +# old_gbis_rate = 432 +# +# epc_c_threshold = 80 +# scheme_map = { +# "ECO4": "ECO4", +# "AFFORDABLE WARMTH": "ECO4", +# "ECO4 A/W": "ECO4", +# "ECO4 GBIS (ECO+)": "GBIS" +# } +# +# ha_analysis_results = [] +# total_revenue_results = [] +# for ha_name, datasets in outputs.items(): +# inputs = [x for k, x in loader.data.items() if k == ha_name][0] +# +# results_df = datasets["results_df"].copy() +# +# analysis_data = inputs["asset_list"][['asset_list_row_id', "ECO Eligibility"]].rename( +# columns={"row_meaning": "asset_identification_status"} +# ).merge( +# results_df, +# how="left", +# right_on="row_id", +# left_on="asset_list_row_id" +# ) +# +# analysis_data["is_remaining"] = True +# +# n_sold_eco4 = 0 +# n_sold_gbis = 0 +# if not inputs["survey_list"].empty: +# # Merge on the survey list and signal everything that is remaining or not (i.e. anything that hasn't had +# # a survey) +# survey_list = inputs["survey_list"].copy() +# +# # TODO: TEMP +# scheme_column = survey_list.columns[0] +# # We clean up the survey list installation or cancelled +# survey_list["installed_or_cancelled_clean"] = survey_list["INSTALLED OR CANCELLED"].str.lower() +# # Remove all punctuation +# survey_list["installed_or_cancelled_clean"] = survey_list["installed_or_cancelled_clean"].str.replace( +# r'[^\w\s]', '', regex=True +# ) +# # Remove double spaces +# survey_list["installed_or_cancelled_clean"] = survey_list["installed_or_cancelled_clean"].str.replace( +# r'\s+', ' ', regex=True +# ) +# # Remove trailing spaces +# survey_list["installed_or_cancelled_clean"] = survey_list["installed_or_cancelled_clean"].str.strip() +# +# # Remap the values in the scheme column +# survey_list[scheme_column] = survey_list[scheme_column].replace(scheme_map) +# +# survey_list["installation_status"] = None +# survey_list["installation_status"] = np.where( +# survey_list["installed_or_cancelled_clean"].isin(["installed", "installed see notes"]), +# "installed", +# survey_list["installation_status"] +# ) +# survey_list["installation_status"] = np.where( +# survey_list["installed_or_cancelled_clean"].isin(["cancelled"]), +# "cancelled", +# survey_list["installation_status"] +# ) +# # Find partial installations +# survey_list["installation_status"] = np.where( +# survey_list["installed_or_cancelled_clean"].str.contains("still to be installed"), +# "partially installed", +# survey_list["installation_status"] +# ) +# # Find partial cancellations +# # TODO: We might have more indications of partial cancellations +# survey_list["installation_status"] = np.where( +# survey_list["installed_or_cancelled_clean"].isin(["loft cancelled"]), +# "partially cancelled", +# survey_list["installation_status"] +# ) +# +# # Finally, for other cases, we set the status to "in progress" +# survey_list["installation_status"] = survey_list["installation_status"].fillna("in progress") +# +# # We concatenate the scheme name with the installation status +# survey_list["installation_status"] = ( +# survey_list[scheme_column] + " - " + survey_list["installation_status"] +# ) +# +# # TODO: END TEMP +# +# survey_list_to_merge = survey_list[["asset_list_row_id", scheme_column]].copy() +# survey_list_to_merge["is_remaining"] = False +# analysis_data = analysis_data.drop(columns="is_remaining").merge( +# survey_list_to_merge, +# how="left", on="asset_list_row_id" +# ) +# analysis_data["is_remaining"] = analysis_data["is_remaining"].fillna(True) +# +# n_sold_eco4 = survey_list_to_merge[survey_list_to_merge[scheme_column] == "ECO4"].shape[0] +# n_sold_gbis = survey_list_to_merge[survey_list_to_merge[scheme_column] == "GBIS"].shape[0] +# +# # Take just remaining +# analysis_data = analysis_data[analysis_data["is_remaining"]] +# +# # Also, if the HA has started selling, we remove any that are still subject to ciga +# n_eco4_missed_subject_to_ciga = 0 +# if not inputs["survey_list"].empty: +# n_eco4_missed_subject_to_ciga = (analysis_data["ECO Eligibility"] == "eco4 (subject to ciga)").sum() +# analysis_data = analysis_data[analysis_data["ECO Eligibility"] != "eco4 (subject to ciga)"] +# +# ################################################################################################ +# # We take the properties that strictly qualified under eco +# ################################################################################################ +# +# eco4_identified = analysis_data[analysis_data["ECO Eligibility"] == "eco4"].copy() +# eco4_identified["identification_type"] = None +# eco4_identified["identification_type"] = np.where( +# (eco4_identified["eco4_eligible"] == True) & (eco4_identified["eco4_strict"] == True), +# "strict", +# eco4_identified["identification_type"] +# ) +# +# # For expansive, the property can be no higher than an EPC C +# eco4_identified["identification_type"] = np.where( +# (eco4_identified["eco4_eligible"] == True) & (eco4_identified["eco4_strict"] == False) & ( +# eco4_identified["sap"] <= epc_c_threshold +# ), +# "expansive", +# eco4_identified["identification_type"] +# ) +# ################################################################################################ +# # We take the properties dependent on CIGA +# ################################################################################################ +# +# ciga_dependent_identified = analysis_data[ +# analysis_data["ECO Eligibility"].isin( +# [ +# "eco4 (subject to ciga)", +# "eco4 - passed ciga" +# ] +# ) +# ].copy() +# +# # These are properties that show filled cavity +# ciga_dependent_identified["identification_type"] = None +# ciga_dependent_identified["identification_type"] = np.where( +# ciga_dependent_identified["eco4_message"].isin( +# [ +# "Perfect suitability", +# "Meets cavity and sap", +# "Fails cavity, meets loft, fails SAP", +# "Meets fabric, fails SAP check", +# "Meets cavity, loft borderline, meets sap", +# ] +# ) & (ciga_dependent_identified["sap"] <= epc_c_threshold), +# "strict", +# ciga_dependent_identified["identification_type"] +# ) +# +# ciga_dependent_identified["identification_type"] = np.where( +# ((ciga_dependent_identified["eco4_message"].isin(["Meets just cavity"])) | ( +# ciga_dependent_identified["walls"].isin(["Cavity wall, filled cavity"]) +# )) & ( +# (ciga_dependent_identified["sap"] <= epc_c_threshold) & +# pd.isnull(ciga_dependent_identified["identification_type"]) +# ), +# "expansive", +# ciga_dependent_identified["identification_type"] +# ) +# +# ################################################################################################ +# # We properties that qualified for gbis +# ################################################################################################ +# gbis_identified = analysis_data[analysis_data["ECO Eligibility"] == "gbis"].copy() +# gbis_identified["identification_type"] = None +# gbis_identified["identification_type"] = np.where( +# (gbis_identified["gbis_eligible"] == True) & (gbis_identified["sap"] < 69), +# "strict", +# gbis_identified["identification_type"] +# ) +# +# gbis_identified["identification_type"] = np.where( +# (gbis_identified["gbis_eligible"] == True) & (gbis_identified["sap"] <= epc_c_threshold) & ( +# pd.isnull(gbis_identified["identification_type"]) +# ), +# "expansive", +# gbis_identified["identification_type"] +# ) +# +# # Finally, we look at the properties that have not been identified by Warmfront +# not_identified = analysis_data[ +# analysis_data["ECO Eligibility"].isin( +# [ +# "not eligible" +# ] +# ) +# ].copy() +# +# surplus_eco4 = not_identified[ +# (not_identified["eco4_eligible"] == True) & (not_identified["eco4_message"].isin( +# ["Perfect suitability", "Meets cavity, loft borderline, meets sap", "Near perfect suitability"] +# )) +# ] +# +# surplus_gbis = not_identified[ +# (not_identified["gbis_eligible"] == True) & ( +# ~not_identified["asset_list_row_id"].isin(surplus_eco4["asset_list_row_id"].values) +# ) & (not_identified["sap"] < 69) & ( +# (not_identified["cavity_type"].isin(["empty", "partial insulation"])) | ( +# not_identified["walls"].str.contains("partial", case=False, na=False) +# ) +# ) +# ] +# surplus_gbis = surplus_gbis[surplus_gbis["is_estimated"] == False] +# +# # Output variables - the data was sent to us in December, but the remaining figures are +# # what was in November +# november_remaining = loader.december_figures[loader.december_figures["HA Name"] == ha_name] +# +# # ECO4 +# n_properties_remaining_in_asset_list = inputs["asset_list"].shape[0] +# november_eco4_remaining = max(november_remaining["ECO4 remaining"].values[0], 0) +# november_eco4_sold = november_remaining["No. of Tech surveys complete - Eco 4"].values[0] +# eco4_sales_since_november = n_sold_eco4 - november_eco4_sold +# +# n_warmfront_identified_eco4 = eco4_identified.shape[0] + ciga_dependent_identified.shape[0] +# eco4_of_which_identified_strict = ( +# eco4_identified[eco4_identified["identification_type"] == "strict"].shape[0] + +# ciga_dependent_identified[ciga_dependent_identified["identification_type"] == "strict"].shape[0] +# ) +# eco4_of_which_identified_expansive = ( +# eco4_identified[eco4_identified["identification_type"] == "expansive"].shape[0] + +# ciga_dependent_identified[ciga_dependent_identified["identification_type"] == "expansive"].shape[0] +# ) +# # GBIS +# n_warmfront_identified_gbis = gbis_identified.shape[0] +# november_gbis_remaining = max(november_remaining["GBIS remaining"].values[0], 0) +# november_gbis_sold = november_remaining["No. of Tech surveys complete - GBIS"].values[0] +# gbis_sales_since_november = n_sold_gbis - november_gbis_sold +# gbis_of_which_identified_strict = gbis_identified[gbis_identified["identification_type"] == "strict"].shape[0] +# gbis_of_which_identified_expansive = \ +# gbis_identified[gbis_identified["identification_type"] == "expansive"].shape[0] +# +# to_append = { +# ("", "HA Name"): ha_name, +# ("", "# properties in asset list"): n_properties_remaining_in_asset_list, +# ############ +# # ECO4 +# ############ +# ("ECO4", "# remaining November file"): november_eco4_remaining, +# ("ECO4", "# sold in November file"): november_eco4_sold, +# ("ECO4", "# sold (survey list)"): n_sold_eco4, +# ("ECO4", "# that missed CIGA check"): n_eco4_missed_subject_to_ciga, +# ("ECO4", "# Remaining properties (asset list)"): n_warmfront_identified_eco4, +# ("ECO4", "Of which identified by model - strict"): eco4_of_which_identified_strict, +# ("ECO4", "Of which identified by model - expansive"): eco4_of_which_identified_expansive, +# ("ECO4", "Of which identified by model - total"): ( +# eco4_of_which_identified_strict + eco4_of_which_identified_expansive +# ), +# ("ECO4", "Additional properties"): surplus_eco4.shape[0], +# ############ +# # GBIS +# ############ +# ("GBIS", "# remaining November file"): november_gbis_remaining, +# ("GBIS", "# sold in November file"): november_gbis_sold, +# ("GBIS", "# sold (survey list)"): n_sold_gbis, +# ("GBIS", "# Remaining properties (asset list)"): n_warmfront_identified_gbis, +# ("GBIS", "Of which identified by model - strict"): gbis_of_which_identified_strict, +# ("GBIS", "Of which identified by model - expansive"): gbis_of_which_identified_expansive, +# ("GBIS", "Of which identified by model - total"): ( +# gbis_of_which_identified_strict + gbis_of_which_identified_expansive +# ), +# ("GBIS", "Additional properties"): surplus_gbis.shape[0] +# } +# +# ha_analysis_results.append(to_append) +# +# # Calculate the revenue results +# to_append_revenue = { +# ("", "HA Name"): ha_name, +# # Eco4 revenue +# ("ECO4", "£ remaining November file"): november_eco4_remaining * eco4_rate, +# ("ECO4", "£ sold November file"): november_eco4_sold * old_eco4_rate, +# ("ECO4", "£ sold since November"): eco4_sales_since_november * eco4_rate, +# ("ECO4", "£ stuck at ciga check"): n_eco4_missed_subject_to_ciga * eco4_rate, +# ("ECO4", "£ remaining (asset list)"): n_warmfront_identified_eco4 * eco4_rate, +# ("ECO4", "Of which identified by model - strict"): eco4_of_which_identified_strict * eco4_rate, +# ("ECO4", "Of which identified by model - expansive"): eco4_of_which_identified_expansive * eco4_rate, +# ("ECO4", "Of which identified by model - total"): eco4_rate * ( +# eco4_of_which_identified_strict + eco4_of_which_identified_expansive +# ), +# ("ECO4", "Additional properties"): eco4_rate * surplus_eco4.shape[0], +# } +# total_revenue_results.append(to_append_revenue) +# +# ha_analysis_results = pd.DataFrame(ha_analysis_results) +# ha_analysis_results.columns = pd.MultiIndex.from_tuples(ha_analysis_results.columns) +# +# facts_and_figures = loader.facts_and_figures.copy() +# facts_and_figures["ha_number"] = facts_and_figures["HA Name"].str.extract(r'(\d+)').astype(int) +# facts_and_figures = facts_and_figures.sort_values("ha_number") +# facts_and_figures = facts_and_figures.drop(columns=["ha_number"]) +# +# # Rename some of the cols +# facts_and_figures = facts_and_figures.rename( +# columns={ +# # ECO4 cols +# "ECO4": "ECO4 - November", +# "GBIS": "GBIS - November", +# "eco4 (subject to ciga)": "ECO4 - subject to ciga", +# "eco4": "ECO4 - doesn't need CIGA", +# "eco4 - passed ciga": "ECO4 - passed CIGA", +# "failed ciga": "ECO4 - failed CIGA", +# "ECO4 - partially cancelled": "ECO4 - Install downgrade to GBIS", +# "ECO4 - in progress": "ECO4 - Install in progress", +# "ECO4 - cancelled": "ECO4 - Install cancelled", +# # GBIS cols +# "gbis": "GBIS total (asset list)" +# } +# ) +# # We calculate the eco4 total from the asset list +# # 1) If ciga checks have been completed (i.e. ECO4 - passed ciga > 0) this sum is +# # ECO4 - doesn't need CIGA + ECO4 - passed CIGA +# # 2) if ciga checks haven't been completed (i.e. ECO4 - passed ciga is missing), this sum is +# # ECO4 - doesn't need CIGA + ECO4 - subject to ciga +# facts_and_figures["ECO4 total (asset list - pre ciga)"] = ( +# facts_and_figures["ECO4 - doesn't need CIGA"] + +# facts_and_figures["ECO4 - subject to ciga"] + +# facts_and_figures["ECO4 - passed CIGA"] +# ) +# +# facts_and_figures["ECO4 total (asset list - post ciga)"] = None +# facts_and_figures["ECO4 total (asset list - post ciga)"] = np.where( +# facts_and_figures["ECO4 - passed CIGA"] > 0, +# facts_and_figures["ECO4 - doesn't need CIGA"] + facts_and_figures["ECO4 - passed CIGA"], +# facts_and_figures["ECO4 total (asset list - post ciga)"] +# ) +# +# # Re-arrange the columns +# facts_and_figures = facts_and_figures[ +# [ +# 'HA Name', +# 'ECO4 - November', +# 'GBIS - November', +# 'ECO4 total (asset list - pre ciga)', +# 'ECO4 total (asset list - post ciga)', +# 'GBIS total (asset list)', +# 'ECO4 - subject to ciga', +# "ECO4 - doesn't need CIGA", +# 'ECO4 - passed CIGA', +# 'ECO4 - failed CIGA', +# 'ECO4 - installed', +# 'ECO4 - Install in progress', +# 'ECO4 - Install cancelled', +# 'ECO4 - partially installed', +# 'ECO4 - Install downgrade to GBIS', +# ] +# ] +# # Addd a note to flag any rows where ECO4 ( +# # subject to ciga is greater than 0) and (ECO4 - passed ciga is greater than 0 +# # ) +# facts_and_figures["Missed CIGA checks opportunity"] = None +# facts_and_figures["Missed CIGA checks opportunity"] = np.where( +# (facts_and_figures["ECO4 - subject to ciga"] > 0) & (facts_and_figures["ECO4 - passed CIGA"] > 0), +# "potential opportunity of " + facts_and_figures["ECO4 - subject to ciga"].astype( +# str) + " ECO4 properties needing a CIGA check", +# facts_and_figures["Missed CIGA checks opportunity"] +# ) +# +# facts_and_figures.to_csv("Facts and figures sample.csv") +# +# # Re arrage the columns +# +# # Also sort ha_analysis_results by ha number +# ha_analysis_results["ha_number"] = ha_analysis_results[("", "HA Name")].str.extract(r'(\d+)').astype(int) +# ha_analysis_results = ha_analysis_results.sort_values("ha_number") +# ha_analysis_results = ha_analysis_results.drop(columns=["ha_number"]) +# +# # We save 2 sheets +# # Automate creation of the excel +# # Create a Pandas Excel writer using XlsxWriter as the engine +# with pd.ExcelWriter('HA Analysis Results.xlsx', engine='xlsxwriter') as writer: +# # Write each dataframe to a different worksheet without the index +# for df, sheet in [(facts_and_figures, 'HA Facts and Figures'), +# (ha_analysis_results, 'Asset Identification')]: +# +# df.to_excel(writer, sheet_name=sheet) +# +# # Auto-adjust columns' width +# for i, width in enumerate(get_col_widths(df)): +# writer.sheets[sheet].set_column(i, i, width) +# +# # Inspection: - Looking into the proportion of homes with "cavity, as built, insulated (assumed)" as their +# # description, and what proportion of time they get identified via non-invasive surveys +# +# # true_eco4_assets = [] +# # ciga_dependent_assets = [] +# # not_eligible = [] +# # as_built_insulated = [] +# # date_cols = { +# # "HA39": "date_built", +# # "HA14": "Built In Year", +# # "HA6": "Construction Year", +# # "HA1": "Build Date", +# # "HA107": "YEAR BUILT" +# # } +# # for ha_name, data_objects in outputs.items(): +# # inputs = [x for k, x in loader.data.items() if k == ha_name][0] +# # +# # date_col = date_cols[ha_name] +# # results_df = data_objects["results_df"].copy() +# # df = inputs["asset_list"][['asset_list_row_id', "ECO Eligibility", date_col]].rename( +# # columns={"row_meaning": "asset_identification_status", date_col: "date_built"} +# # ).merge( +# # results_df, +# # how="left", +# # right_on="row_id", +# # left_on="asset_list_row_id" +# # ) +# # +# # # take the true ECO4 +# # true_eco4 = df[df["ECO Eligibility"] == "eco4"].copy() +# # ciga_dependent = df[ +# # df["ECO Eligibility"].isin( +# # [ +# # "eco4 (subject to ciga)", +# # "failed ciga", +# # "eco4 - passed ciga" +# # ] +# # ) +# # ] +# # insulated_assumed = df[df["walls"] == "Cavity wall, as built, insulated"].copy() +# # # We convert date built to datetime +# # try: +# # insulated_assumed = insulated_assumed[~pd.isnull(insulated_assumed["date_built"])] +# # insulated_assumed["year_built"] = pd.to_datetime(insulated_assumed["date_built"].astype(str)).dt.year +# # as_built_insulated.append(insulated_assumed) +# # except Exception as e: +# # print("oh well") +# # +# # true_eco4_assets.append(true_eco4) +# # ciga_dependent_assets.append(ciga_dependent) +# # +# # true_eco4_assets = pd.concat(true_eco4_assets) +# # ciga_dependent_assets = pd.concat(ciga_dependent_assets) +# # as_built_insulated = pd.concat(as_built_insulated) +# # +# # true_eco4_assets["walls"].value_counts(normalize=True) +# # ciga_dependent_assets["walls"].value_counts(normalize=True) +# # +# # from recommendations.recommendation_utils import extract_insulation_thickness +# # +# # true_eco4_assets["roof_insulation_thickness"] = true_eco4_assets["roof"].apply( +# # lambda x: extract_insulation_thickness(x) +# # ) +# # +# # true_eco4_assets["e"] = true_eco4_assets.merge( +# # pd.DataFrame(cleaned["roof-description"])[["original_description", "insulation_thickness"]], +# # how="left", +# # left_on="roof", +# # right_on="original_description" +# # ) +# # +# # true_eco4_assets["sap"].mean() +# # +# # true_eco4_assets["insulation_thickness"].isin( +# # ["250", "150", "200", "100", "75", "50"] +# # ).sum() / true_eco4_assets.shape[0] +# # +# # true_eco4_assets["insulation_thickness"].isin( +# # ["100"] +# # ).sum() / true_eco4_assets.shape[0] +# # +# # as_built_insulated.groupby("property_type")["ECO Eligibility"].value_counts(normalize=True) def get_propensity_model_data( @@ -2567,29 +2567,39 @@ def calculate_eco4_post_ciga( eligiblity_counts["ECO Eligibility"] == "eco4 - passed ciga" ]["count"].sum() + eco4_confirmed_ciga_failures = eligiblity_counts[ + eligiblity_counts["ECO Eligibility"] == "failed ciga" + ]["count"].sum() + eco4_confirmed = (eco4_no_ciga_needed * ha_eco4_to_sale_rate) + (eco4_ciga_passed * ha_ciga_pass_to_sale_rate) eco4_confirmed = np.round(eco4_confirmed) if remaining_needing_ciga_check > 0: # We update the eco4 post ciga with the converted remaining + eco4_ciga_expected_remaining_to_pass = np.round(remaining_needing_ciga_check * ha_ciga_conversion_rate) eco4_remaining_forecast = np.round( - remaining_needing_ciga_check * ha_ciga_conversion_rate * ha_ciga_pass_to_sale_rate + eco4_ciga_expected_remaining_to_pass * ha_ciga_pass_to_sale_rate ) + eco4_estimated_ciga_failures = remaining_needing_ciga_check - eco4_ciga_expected_remaining_to_pass eco4_post_ciga = eco4_confirmed + eco4_remaining_forecast else: eco4_remaining_forecast = 0 + eco4_estimated_ciga_failures = 0 eco4_post_ciga = eco4_confirmed else: eco4_no_ciga_needed = eligiblity_counts[ eligiblity_counts["ECO Eligibility"] == "eco4" ]["count"].sum() + eco4_confirmed_ciga_failures = 0 + # Multiply by sale conversion eco4_confirmed = np.round(eco4_no_ciga_needed * ha_eco4_to_sale_rate) + eco4_ciga_expected_remaining_to_pass = np.round(remaining_needing_ciga_check * ha_ciga_conversion_rate) + eco4_estimated_ciga_failures = remaining_needing_ciga_check - eco4_ciga_expected_remaining_to_pass + eco4_remaining_forecast = np.round( - remaining_needing_ciga_check * ha_ciga_conversion_rate * ha_ciga_pass_to_sale_rate - ) - eco4_post_ciga = ( - eligiblity_counts[eligiblity_counts["ECO Eligibility"] == "eco4"]["count"].sum() + eco4_remaining_forecast + eco4_ciga_expected_remaining_to_pass * ha_ciga_pass_to_sale_rate ) + eco4_post_ciga = eco4_confirmed + eco4_remaining_forecast eco4_post_ciga = int(eco4_post_ciga) eco4_remaining_forecast = int(eco4_remaining_forecast) @@ -2604,6 +2614,16 @@ def calculate_eco4_post_ciga( "ECO4 - post CIGA - £": eco4_post_ciga * eco4_rate, "Of which confirmed - £": eco4_confirmed * eco4_rate, "Of which forecast - £": eco4_remaining_forecast * eco4_rate, + # Ciga failures + "Estimated total - failed CIGA": int(eco4_confirmed_ciga_failures + eco4_estimated_ciga_failures), + "Confirmed CIGA failures": eco4_confirmed_ciga_failures, + "Estimated CIGA failures": int(eco4_estimated_ciga_failures), + # Ciga failures cost + "Estimated total - failed CIGA - £": int( + (eco4_confirmed_ciga_failures + eco4_estimated_ciga_failures) * eco4_rate + ), + "Confirmed CIGA failures - £": int(eco4_confirmed_ciga_failures * eco4_rate), + "Estimated CIGA failures - £": int(eco4_estimated_ciga_failures * eco4_rate), } return results @@ -2617,8 +2637,8 @@ def forecast_remaining_sales(loader): gbis_rate = 600 eco4_rate = 1710 - old_gbis_rate = 432 - old_eco4_rate = 1456 + # old_gbis_rate = 432 + # old_eco4_rate = 1456 # 1) Calculate the conversion rate from passed CIGA to actual sale converted_ciga_jobs = [] @@ -2800,16 +2820,18 @@ def forecast_remaining_sales(loader): results = [] for ha_name, input_data in loader.data.items(): + # Original warmfront figures - ECO4 original_warmfront_estimates = december_figures[december_figures["HA Name"] == ha_name] original_warmfront_eco4 = original_warmfront_estimates["ECO4"].values[0] original_warmfront_remaining_eco4 = original_warmfront_estimates["ECO4 remaining"].values[0] - original_warmfront_eco4_revenue = ( - original_warmfront_remaining_eco4 * eco4_rate + - (original_warmfront_eco4 - original_warmfront_remaining_eco4) * old_eco4_rate - ) + # original_warmfront_eco4_revenue = ( + # original_warmfront_remaining_eco4 * eco4_rate + + # (original_warmfront_eco4 - original_warmfront_remaining_eco4) * old_eco4_rate + # ) + original_warmfront_eco4_revenue = original_warmfront_eco4 * eco4_rate original_warmfront_remaining_eco4_revenue = original_warmfront_remaining_eco4 * eco4_rate # Original warmfront figures - GBIS @@ -2817,9 +2839,12 @@ def forecast_remaining_sales(loader): original_warmfront_gbis = original_warmfront_estimates["GBIS"].values[0] original_warmfront_remaining_gbis = original_warmfront_estimates["GBIS remaining"].values[0] + # original_warmfront_gbis_revenue = ( + # original_warmfront_remaining_gbis * gbis_rate + + # (original_warmfront_gbis - original_warmfront_remaining_gbis) * old_gbis_rate + # ) original_warmfront_gbis_revenue = ( - original_warmfront_remaining_gbis * gbis_rate + - (original_warmfront_gbis - original_warmfront_remaining_gbis) * old_gbis_rate + original_warmfront_gbis * gbis_rate ) original_warmfront_remaining_gbis_revenue = original_warmfront_remaining_gbis * gbis_rate @@ -2835,6 +2860,7 @@ def forecast_remaining_sales(loader): how="left", on="asset_list_row_id" ) + # Anything that has an installation has gone to installation, and therefore is not remaining asset_list_remaining = asset_list_remaining[pd.isnull(asset_list_remaining["installation_status"])] asset_list_remaining = asset_list_remaining.drop(columns=["installation_status"]) @@ -2913,6 +2939,32 @@ def forecast_remaining_sales(loader): eco4_rate=eco4_rate ) + # GBIS Figures + # Estimate the GBIS conversion rate + ha_gbis_sale_conversion = gbis_ciga_independent_passrates[ + gbis_ciga_independent_passrates["Ha Name"] == ha_name + ] + + if not ha_gbis_sale_conversion.empty: + ha_gbis_sale_conversion = ( + ha_gbis_sale_conversion["# GBIS successfully installed"].values[0] / + ha_gbis_sale_conversion["# GBIS at install stage"].values[0] + ) + else: + ha_gbis_sale_conversion = median_gbis_to_install + + gbis_total = eligiblity_counts[ + eligiblity_counts["ECO Eligibility"] == "gbis" + ]["count"].sum() + gbis_total = np.round(gbis_total * ha_gbis_sale_conversion) + gbis_total_revenue = gbis_total * gbis_rate + + gbis_remaining = eligiblity_counts_remaining[ + eligiblity_counts["ECO Eligibility"] == "gbis" + ]["count"].sum() + gbis_remaining = np.round(gbis_remaining * ha_gbis_sale_conversion) + gbis_remaining_revenue = gbis_remaining * gbis_rate + to_append = { ("", "", "", "HA Name"): ha_name, # ECO4 - original warmfront figures @@ -2950,16 +3002,48 @@ def forecast_remaining_sales(loader): ("ECO4 post-ciga", "", "Of which forecast - £", ""): eco4_post_ciga_remaining_results["Of which forecast - £"], # CIGA failures + ("ECO4 CIGA failures", "", "Estimated total - failed CIGA - #", ""): eco4_post_ciga_remaining_results[ + 'Estimated total - failed CIGA' + ], + ("ECO4 CIGA failures", "", "Estimated total - failed CIGA - £", ""): eco4_post_ciga_remaining_results[ + 'Estimated total - failed CIGA - £' + ], + ("ECO4 CIGA failures", "", "Confirmed failures - #", ""): eco4_post_ciga_remaining_results[ + "Confirmed CIGA failures" + ], + ("ECO4 CIGA failures", "", "Confirmed failures - £", ""): eco4_post_ciga_remaining_results[ + "Confirmed CIGA failures - £" + ], + ("ECO4 CIGA failures", "", "Estimated failures - #", ""): eco4_post_ciga_remaining_results[ + "Estimated CIGA failures" + ], + ("ECO4 CIGA failures", "", "Estimated failures - £", ""): eco4_post_ciga_remaining_results[ + "Estimated CIGA failures - £" + ], + # GBIS postcode list + ("", "Warmfront post code list", "Total - #", "GBIS total"): gbis_total, + ("", "Warmfront post code list", "Remaining - #", "GBIS total"): gbis_remaining, + ("", "Warmfront post code list", "Total - £", "GBIS total"): gbis_total_revenue, + ("", "Warmfront post code list", "Remaining - £", "GBIS total"): gbis_remaining_revenue, } # Make sure nothing is forgotten due to duplicate multi-index keys - if len(to_append) != 22: + if len(to_append) != 32: raise ValueError("Something went wrong") results.append(to_append) results = pd.DataFrame(results) + # TODO: Add a blank row and then a total row + + assumptions = { + "ECO4 new rate": eco4_rate, + "GBIS new rate": gbis_rate, + # "ECO4 old rate": old_eco4_rate, + # "GBIS old rate": old_gbis_rate, + } + def app(): """ From 2ba37d55e65a746fdb58588aa2768851a83a3887 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 1 Mar 2024 20:06:57 +0000 Subject: [PATCH 046/248] Added assumptions table --- .../ha_15_32/ha_analysis_batch_3.py | 45 ++++++++++++++----- 1 file changed, 35 insertions(+), 10 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 191ca74c..ac4d3a0c 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -2956,14 +2956,14 @@ def forecast_remaining_sales(loader): gbis_total = eligiblity_counts[ eligiblity_counts["ECO Eligibility"] == "gbis" ]["count"].sum() - gbis_total = np.round(gbis_total * ha_gbis_sale_conversion) - gbis_total_revenue = gbis_total * gbis_rate + gbis_total = int(np.round(gbis_total * ha_gbis_sale_conversion)) + gbis_total_revenue = int(gbis_total * gbis_rate) gbis_remaining = eligiblity_counts_remaining[ eligiblity_counts["ECO Eligibility"] == "gbis" ]["count"].sum() - gbis_remaining = np.round(gbis_remaining * ha_gbis_sale_conversion) - gbis_remaining_revenue = gbis_remaining * gbis_rate + gbis_remaining = int(np.round(gbis_remaining * ha_gbis_sale_conversion)) + gbis_remaining_revenue = int(gbis_remaining * gbis_rate) to_append = { ("", "", "", "HA Name"): ha_name, @@ -3037,12 +3037,37 @@ def forecast_remaining_sales(loader): # TODO: Add a blank row and then a total row - assumptions = { - "ECO4 new rate": eco4_rate, - "GBIS new rate": gbis_rate, - # "ECO4 old rate": old_eco4_rate, - # "GBIS old rate": old_gbis_rate, - } + assumptions = [ + { + ("", "", "", "HA Name"): "ECO4 rate", + ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): "£" + str(eco4_rate) + }, + { + ("", "", "", "HA Name"): "GBIS rate", + ("ECO4 original", "", "Remaining - #", ""): "£" + str(gbis_rate) + }, + { + ("", "", "", "HA Name"): "Median CIGA pass rate", + ("ECO4 original", "", "Total - £", ""): str(round(median_ciga_success_rate * 100, 1)) + "%", + }, + { + ("", "", "", "HA Name"): "Maximum allowed CIGA pass rate", + ("ECO4 original", "", "Total - £", ""): str(round(maximum_ciga_conversion * 100, 1)) + "%", + ("ECO4 original", "", "Remaining - £", ""): "- Maximum allowed CIGA conversion for HAs without CIGA checks" + }, + { + ("", "", "", "HA Name"): "Median ECO4 (no CIGA) sales conversion rate", + ("ECO4 original", "", "Total - £", ""): str(round(median_eco4_to_install * 100, 1)) + "%", + ("ECO4 original", "", "Remaining - £", + ""): " - Sales conversion rate for a ECO4 property that didn't need a CIGA check" + }, + { + ("", "", "", "HA Name"): "Median ECO4 (subect to CIGA) sales conversion rate", + ("ECO4 original", "", "Total - £", ""): str(round(median_ciga_pass_to_install * 100, 1)) + "%", + ("ECO4 original", "", "Remaining - £", + ""): " - Sales conversion rate for a ECO4 property that passed a CIGA check" + } + ] def app(): From 57a7edf62511207f7d7af176414b5b269f3b1aa1 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 1 Mar 2024 20:18:44 +0000 Subject: [PATCH 047/248] collating results --- .../ha_15_32/ha_analysis_batch_3.py | 22 ++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index ac4d3a0c..7da6bb3a 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -3035,9 +3035,21 @@ def forecast_remaining_sales(loader): results = pd.DataFrame(results) - # TODO: Add a blank row and then a total row + totals_row = {} + for col in results.columns: + if col == ('', '', '', 'HA Name'): + totals_row[col] = "Total" + elif col == ("ECO4 post-ciga", "", "Delta vs original estimate", ""): + totals_row[col] = results[col].mean() + else: + totals_row[col] = results[col].sum() + + blank_row = pd.DataFrame([{col: "" for col in results.columns}]) assumptions = [ + { + ("", "", "", "HA Name"): "Assumptions", + }, { ("", "", "", "HA Name"): "ECO4 rate", ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): "£" + str(eco4_rate) @@ -3059,16 +3071,20 @@ def forecast_remaining_sales(loader): ("", "", "", "HA Name"): "Median ECO4 (no CIGA) sales conversion rate", ("ECO4 original", "", "Total - £", ""): str(round(median_eco4_to_install * 100, 1)) + "%", ("ECO4 original", "", "Remaining - £", - ""): " - Sales conversion rate for a ECO4 property that didn't need a CIGA check" + ""): " - Sales conversion rate for a ECO4 property that didn't need a CIGA check. Job must not cancel" }, { ("", "", "", "HA Name"): "Median ECO4 (subect to CIGA) sales conversion rate", ("ECO4 original", "", "Total - £", ""): str(round(median_ciga_pass_to_install * 100, 1)) + "%", ("ECO4 original", "", "Remaining - £", - ""): " - Sales conversion rate for a ECO4 property that passed a CIGA check" + ""): " - Sales conversion rate for a ECO4 property that passed a CIGA check. Job must not cancel" } ] + results = pd.concat( + [results, pd.DataFrame([totals_row]), blank_row, blank_row, pd.DataFrame(assumptions)] + ) + def app(): """ From 028c2edce7ab951987379a7c653324e5863426ae Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 1 Mar 2024 20:48:43 +0000 Subject: [PATCH 048/248] Added headlines --- .../ha_15_32/ha_analysis_batch_3.py | 129 +++++++++++++++++- 1 file changed, 126 insertions(+), 3 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 7da6bb3a..1c320f9c 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -2991,7 +2991,7 @@ def forecast_remaining_sales(loader): # ECO4 - asset list, post ciga, remaining ("ECO4 post-ciga", "", "Estimated remaining eligible - #", ""): eco4_post_ciga_remaining_results[ "ECO4 - post CIGA - #"], - ("ECO4 post-ciga", "", "Estimated remaining total eligible - £", ""): eco4_post_ciga_remaining_results[ + ("ECO4 post-ciga", "", "Estimated remaining eligible - £", ""): eco4_post_ciga_remaining_results[ "ECO4 - post CIGA - £"], ("ECO4 post-ciga", "", "Of which - confirmed (post CIGA or no CIGA required) - #", ""): eco4_post_ciga_remaining_results["Of which confirmed - #"], @@ -3046,6 +3046,126 @@ def forecast_remaining_sales(loader): blank_row = pd.DataFrame([{col: "" for col in results.columns}]) + # Put together a Warmfront original remaining ECO4 vs asset list remaining ECO4 and same for GBIS, as well as totals + + # ECO4 Headlines + headline_eco4_original_remaining = totals_row[("ECO4 original", "", "Remaining - #", "")] + headline_eco4_original_remaining_revenue = totals_row[("ECO4 original", "", "Remaining - £", "")] + headline_eco4_postcode_list_remaining = totals_row[("ECO4 post-ciga", "", "Estimated remaining eligible - #", "")] + headline_eco4_postcode_list_remaining_revenue = totals_row[ + ("ECO4 post-ciga", "", "Estimated remaining eligible - £", "") + ] + headline_eco4_delta = 100 * ( + (headline_eco4_postcode_list_remaining - headline_eco4_original_remaining) / + headline_eco4_original_remaining + ) + headline_eco4_delta = round(headline_eco4_delta, 1) + + # GBIS Headlines + headline_gbis_original_remaining = totals_row[("GBIS original", "", "Remaining - #", "")] + headline_gbis_original_remaining_revenue = totals_row[("GBIS original", "", "Remaining - £", "")] + headline_gbis_postcode_list_remaining = totals_row[("", "Warmfront post code list", "Remaining - #", "GBIS total")] + headline_gbis_postcode_list_remaining_revenue = totals_row[ + ("", "Warmfront post code list", "Remaining - £", "GBIS total") + ] + headline_gbis_delta = 100 * ( + (headline_gbis_postcode_list_remaining - headline_gbis_original_remaining) / + headline_gbis_original_remaining + ) + headline_gbis_delta = round(headline_gbis_delta, 1) + + headline_original_total_revenue_remaining = ( + headline_eco4_original_remaining_revenue + headline_gbis_original_remaining_revenue + ) + + headline_postcode_list_total_revenue_remaining = ( + headline_eco4_postcode_list_remaining_revenue + headline_gbis_postcode_list_remaining_revenue + ) + headline_total_delta = 100 * ( + (headline_postcode_list_total_revenue_remaining - headline_original_total_revenue_remaining) / + headline_original_total_revenue_remaining + ) + headline_total_delta = round(headline_total_delta, 1) + + headlines = [ + { + ("", "", "", "HA Name"): "Headlines", + }, + { + ("", "", "", "HA Name"): "ECO4 Remaining - November - #", + ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): str( + headline_eco4_original_remaining + ) + }, + { + ("", "", "", "HA Name"): "ECO4 Remaining - November - £", + ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): "£" + str( + headline_eco4_original_remaining_revenue + ) + }, + { + ("", "", "", "HA Name"): "ECO4 Remaining - postcode list - #", + ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): str( + headline_eco4_postcode_list_remaining + ) + }, + { + ("", "", "", "HA Name"): "ECO4 Remaining - postcode list - £", + ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): "£" + str( + headline_eco4_postcode_list_remaining_revenue + ) + }, + { + ("", "", "", "HA Name"): "ECO4 delta %", + ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): str(headline_eco4_delta) + "%" + }, + { + ("", "", "", "HA Name"): "GBIS Remaining - November - #", + ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): str( + headline_gbis_original_remaining + ) + }, + { + ("", "", "", "HA Name"): "GBIS Remaining - November - £", + ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): "£" + str( + headline_gbis_original_remaining_revenue + ) + }, + { + ("", "", "", "HA Name"): "GBIS Remaining - post code list - #", + ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): str( + headline_gbis_postcode_list_remaining + ) + }, + { + ("", "", "", "HA Name"): "GBIS Remaining - post code list - £", + ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): "£" + str( + headline_gbis_postcode_list_remaining_revenue + ) + }, + { + ("", "", "", "HA Name"): "GBIS delta %", + ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): str(headline_gbis_delta) + "%" + }, + # Total revenue + { + ("", "", "", "HA Name"): "Total Remaining - November - £", + ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): "£" + str( + headline_original_total_revenue_remaining + ) + }, + { + ("", "", "", "HA Name"): "Total Remaining - post code list - £", + ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): "£" + str( + headline_postcode_list_total_revenue_remaining + ) + }, + { + ("", "", "", "HA Name"): "Total Remaining delta %", + ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): str(headline_total_delta) + "%" + }, + ] + assumptions = [ { ("", "", "", "HA Name"): "Assumptions", @@ -3065,7 +3185,9 @@ def forecast_remaining_sales(loader): { ("", "", "", "HA Name"): "Maximum allowed CIGA pass rate", ("ECO4 original", "", "Total - £", ""): str(round(maximum_ciga_conversion * 100, 1)) + "%", - ("ECO4 original", "", "Remaining - £", ""): "- Maximum allowed CIGA conversion for HAs without CIGA checks" + ("ECO4 original", "", "Remaining - £", + ""): "- Maximum allowed CIGA conversion for HAs without CIGA checks We do not allow above this to be " + "conservative" }, { ("", "", "", "HA Name"): "Median ECO4 (no CIGA) sales conversion rate", @@ -3082,7 +3204,8 @@ def forecast_remaining_sales(loader): ] results = pd.concat( - [results, pd.DataFrame([totals_row]), blank_row, blank_row, pd.DataFrame(assumptions)] + [results, pd.DataFrame([headlines]), pd.DataFrame([totals_row]), blank_row, blank_row, + pd.DataFrame(assumptions)] ) From 721bfb19fcc3bd70fe02081e14e4abde22f9a13e Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 1 Mar 2024 21:33:45 +0000 Subject: [PATCH 049/248] Added totals percentages aggregations --- .../ha_15_32/ha_analysis_batch_3.py | 74 ++++++++++++++++--- 1 file changed, 64 insertions(+), 10 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 1c320f9c..3341e34c 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -2965,6 +2965,14 @@ def forecast_remaining_sales(loader): gbis_remaining = int(np.round(gbis_remaining * ha_gbis_sale_conversion)) gbis_remaining_revenue = int(gbis_remaining * gbis_rate) + # GBIS delta + if original_warmfront_gbis == 0: + gbis_delta_vs_original_estimate = 100 * gbis_total + else: + gbis_delta_vs_original_estimate = 100 * ( + gbis_total - original_warmfront_gbis + ) / original_warmfront_gbis + to_append = { ("", "", "", "HA Name"): ha_name, # ECO4 - original warmfront figures @@ -2987,7 +2995,7 @@ def forecast_remaining_sales(loader): "ECO4 - post CIGA - #"], ("ECO4 post-ciga", "", "Estimated total eligible - £", ""): eco4_post_ciga_total_results[ "ECO4 - post CIGA - £"], - ("ECO4 post-ciga", "", "Delta vs original estimate", ""): eco4_delta_vs_original_estimate, + ("ECO4 post-ciga", "", "Delta vs original estimate - %", ""): eco4_delta_vs_original_estimate, # ECO4 - asset list, post ciga, remaining ("ECO4 post-ciga", "", "Estimated remaining eligible - #", ""): eco4_post_ciga_remaining_results[ "ECO4 - post CIGA - #"], @@ -3021,14 +3029,15 @@ def forecast_remaining_sales(loader): "Estimated CIGA failures - £" ], # GBIS postcode list - ("", "Warmfront post code list", "Total - #", "GBIS total"): gbis_total, - ("", "Warmfront post code list", "Remaining - #", "GBIS total"): gbis_remaining, - ("", "Warmfront post code list", "Total - £", "GBIS total"): gbis_total_revenue, - ("", "Warmfront post code list", "Remaining - £", "GBIS total"): gbis_remaining_revenue, + ("GBIS Postcode list", "Warmfront post code list", "Total - #", "GBIS total"): gbis_total, + ("GBIS Postcode list", "Warmfront post code list", "Total - £", "GBIS total"): gbis_total_revenue, + ("GBIS Postcode list", "", "Delta vs original estimate - %", ""): gbis_delta_vs_original_estimate, + ("GBIS Postcode list", "Warmfront post code list", "Remaining - #", "GBIS total"): gbis_remaining, + ("GBIS Postcode list", "Warmfront post code list", "Remaining - £", "GBIS total"): gbis_remaining_revenue, } # Make sure nothing is forgotten due to duplicate multi-index keys - if len(to_append) != 32: + if len(to_append) != 33: raise ValueError("Something went wrong") results.append(to_append) @@ -3039,11 +3048,31 @@ def forecast_remaining_sales(loader): for col in results.columns: if col == ('', '', '', 'HA Name'): totals_row[col] = "Total" - elif col == ("ECO4 post-ciga", "", "Delta vs original estimate", ""): - totals_row[col] = results[col].mean() + elif col in [ + ("ECO4 post-ciga", "", "Delta vs original estimate - %", ""), + ("GBIS Postcode list", "", "Delta vs original estimate - %", "") + ]: + totals_row[col] = None else: totals_row[col] = results[col].sum() + # For the delta columns, we calculate the delta on the totals + totals_row[("ECO4 post-ciga", "", "Delta vs original estimate - %", "")] = round( + 100 * ( + totals_row[("ECO4 post-ciga", "", "Estimated total eligible - #", "")] - + totals_row[("", "Original Warmfront estimate", "Total - #", "ECO4 - November")] + ) / totals_row[("", "Original Warmfront estimate", "Total - #", "ECO4 - November")], + 1 + ) + + totals_row[("GBIS Postcode list", "", "Delta vs original estimate - %", "")] = round( + 100 * ( + totals_row[("GBIS Postcode list", "Warmfront post code list", "Total - #", "GBIS total")] - + totals_row[("", "Original Warmfront estimate", "Total - #", "GBIS - November")] + ) / totals_row[("", "Original Warmfront estimate", "Total - #", "GBIS - November")], + 1 + ) + blank_row = pd.DataFrame([{col: "" for col in results.columns}]) # Put together a Warmfront original remaining ECO4 vs asset list remaining ECO4 and same for GBIS, as well as totals @@ -3204,10 +3233,35 @@ def forecast_remaining_sales(loader): ] results = pd.concat( - [results, pd.DataFrame([headlines]), pd.DataFrame([totals_row]), blank_row, blank_row, - pd.DataFrame(assumptions)] + [ + results, + pd.DataFrame([totals_row]), + pd.DataFrame(headlines), + blank_row, + blank_row, + pd.DataFrame(assumptions) + ] ) + # header_rows = [ + # [name[0] for name in results.columns.values], + # [name[1] for name in results.columns.values], + # [name[2] for name in results.columns.values], + # [name[3] for name in results.columns.values] + # ] + + # Step 2: Write the transformed header and DataFrame data to CSV. + # Open the file in write mode. + import csv + with open("HA Remaining Analysis.csv", "w", newline="") as file: + # writer = csv.writer(file) + + # Write the header rows. + # writer.writerows(header_rows) + + # Write the DataFrame data without the index (adjust if you want the index). + results.to_csv(file, header=True, index=False) + def app(): """ From f9d1a90689ef742fd32217b606c6a919b766d974 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 1 Mar 2024 22:17:11 +0000 Subject: [PATCH 050/248] Fixing some formatting bugs --- .../ha_15_32/ha_analysis_batch_3.py | 86 +++++++++++-------- 1 file changed, 48 insertions(+), 38 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 3341e34c..6309d2e2 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -2820,6 +2820,8 @@ def forecast_remaining_sales(loader): results = [] for ha_name, input_data in loader.data.items(): + if ha_name == "HA16": + dew # Original warmfront figures - ECO4 original_warmfront_estimates = december_figures[december_figures["HA Name"] == ha_name] @@ -2991,8 +2993,9 @@ def forecast_remaining_sales(loader): ("ECO4 pre-ciga", "", "Total - £", ""): eco4_pre_ciga_revenue, ("ECO4 pre-ciga", "", "Remaining - £", ""): eco4_pre_ciga_remaining_revenue, # ECO4 - asset list, post ciga, total - ("ECO4 post-ciga", "", "Estimated total eligible - #", ""): eco4_post_ciga_total_results[ - "ECO4 - post CIGA - #"], + ("ECO4 post-ciga", "", "Estimated total eligible - #", "ECO4 total (post-ciga)"): + eco4_post_ciga_total_results[ + "ECO4 - post CIGA - #"], ("ECO4 post-ciga", "", "Estimated total eligible - £", ""): eco4_post_ciga_total_results[ "ECO4 - post CIGA - £"], ("ECO4 post-ciga", "", "Delta vs original estimate - %", ""): eco4_delta_vs_original_estimate, @@ -3059,7 +3062,7 @@ def forecast_remaining_sales(loader): # For the delta columns, we calculate the delta on the totals totals_row[("ECO4 post-ciga", "", "Delta vs original estimate - %", "")] = round( 100 * ( - totals_row[("ECO4 post-ciga", "", "Estimated total eligible - #", "")] - + totals_row[("ECO4 post-ciga", "", "Estimated total eligible - #", "ECO4 total (post-ciga)")] - totals_row[("", "Original Warmfront estimate", "Total - #", "ECO4 - November")] ) / totals_row[("", "Original Warmfront estimate", "Total - #", "ECO4 - November")], 1 @@ -3093,9 +3096,11 @@ def forecast_remaining_sales(loader): # GBIS Headlines headline_gbis_original_remaining = totals_row[("GBIS original", "", "Remaining - #", "")] headline_gbis_original_remaining_revenue = totals_row[("GBIS original", "", "Remaining - £", "")] - headline_gbis_postcode_list_remaining = totals_row[("", "Warmfront post code list", "Remaining - #", "GBIS total")] + headline_gbis_postcode_list_remaining = totals_row[ + ("GBIS Postcode list", "Warmfront post code list", "Remaining - #", "GBIS total") + ] headline_gbis_postcode_list_remaining_revenue = totals_row[ - ("", "Warmfront post code list", "Remaining - £", "GBIS total") + ("GBIS Postcode list", "Warmfront post code list", "Remaining - £", "GBIS total") ] headline_gbis_delta = 100 * ( (headline_gbis_postcode_list_remaining - headline_gbis_original_remaining) / @@ -3205,29 +3210,33 @@ def forecast_remaining_sales(loader): }, { ("", "", "", "HA Name"): "GBIS rate", - ("ECO4 original", "", "Remaining - #", ""): "£" + str(gbis_rate) + ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): "£" + str(gbis_rate) }, { ("", "", "", "HA Name"): "Median CIGA pass rate", - ("ECO4 original", "", "Total - £", ""): str(round(median_ciga_success_rate * 100, 1)) + "%", + ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): str( + round(median_ciga_success_rate * 100, 1)) + "%", }, { ("", "", "", "HA Name"): "Maximum allowed CIGA pass rate", - ("ECO4 original", "", "Total - £", ""): str(round(maximum_ciga_conversion * 100, 1)) + "%", - ("ECO4 original", "", "Remaining - £", + ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): str( + round(maximum_ciga_conversion * 100, 1)) + "%", + ("ECO4 original", "", "Remaining - #", ""): "- Maximum allowed CIGA conversion for HAs without CIGA checks We do not allow above this to be " "conservative" }, { ("", "", "", "HA Name"): "Median ECO4 (no CIGA) sales conversion rate", - ("ECO4 original", "", "Total - £", ""): str(round(median_eco4_to_install * 100, 1)) + "%", - ("ECO4 original", "", "Remaining - £", + ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): str( + round(median_eco4_to_install * 100, 1)) + "%", + ("ECO4 original", "", "Remaining - #", ""): " - Sales conversion rate for a ECO4 property that didn't need a CIGA check. Job must not cancel" }, { ("", "", "", "HA Name"): "Median ECO4 (subect to CIGA) sales conversion rate", - ("ECO4 original", "", "Total - £", ""): str(round(median_ciga_pass_to_install * 100, 1)) + "%", - ("ECO4 original", "", "Remaining - £", + ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): str( + round(median_ciga_pass_to_install * 100, 1)) + "%", + ("ECO4 original", "", "Remaining - #", ""): " - Sales conversion rate for a ECO4 property that passed a CIGA check. Job must not cancel" } ] @@ -3236,6 +3245,7 @@ def forecast_remaining_sales(loader): [ results, pd.DataFrame([totals_row]), + blank_row, pd.DataFrame(headlines), blank_row, blank_row, @@ -3291,32 +3301,32 @@ def app(): loader.load() loader.ha_facts_and_figures() + forecast_remaining_sales(loader) + # We load in the additional data required to perform the analysis - cleaned = read_from_s3( - s3_file_name="cleaned_epc_data/cleaned.bson", - bucket_name="retrofit-data-dev" - ) - cleaned = msgpack.unpackb(cleaned, raw=False) - cleaned = patch_cleaned(cleaned) - - cleaning_data = read_dataframe_from_s3_parquet( - bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet", - ) - created_at = datetime.now().isoformat() - - photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev") - - outputs = get_epc_data( - loader=loader, - cleaned=cleaned, - cleaning_data=cleaning_data, - created_at=created_at, - photo_supply_lookup=photo_supply_lookup, - floor_area_decile_thresholds=floor_area_decile_thresholds, - pull_data=pull_data - ) - - analyse_ha_data(outputs, loader) + # cleaned = read_from_s3( + # s3_file_name="cleaned_epc_data/cleaned.bson", + # bucket_name="retrofit-data-dev" + # ) + # cleaned = msgpack.unpackb(cleaned, raw=False) + # cleaned = patch_cleaned(cleaned) + # + # cleaning_data = read_dataframe_from_s3_parquet( + # bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet", + # ) + # created_at = datetime.now().isoformat() + # + # photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev") + # + # outputs = get_epc_data( + # loader=loader, + # cleaned=cleaned, + # cleaning_data=cleaning_data, + # created_at=created_at, + # photo_supply_lookup=photo_supply_lookup, + # floor_area_decile_thresholds=floor_area_decile_thresholds, + # pull_data=pull_data + # ) # import pickle # with open("ha_analysis.pickle", "wb") as f: From 0497290b7cac36b4519b3db4c0f9d1d1be4932b5 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 1 Mar 2024 22:17:51 +0000 Subject: [PATCH 051/248] removed temp code --- etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 6309d2e2..ec9469dc 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -2820,8 +2820,6 @@ def forecast_remaining_sales(loader): results = [] for ha_name, input_data in loader.data.items(): - if ha_name == "HA16": - dew # Original warmfront figures - ECO4 original_warmfront_estimates = december_figures[december_figures["HA Name"] == ha_name] From fbd808a54d3314d9821d5fad5456e951558959c9 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 1 Mar 2024 22:27:50 +0000 Subject: [PATCH 052/248] re-formatting percentages --- .../ha_15_32/ha_analysis_batch_3.py | 64 ++++++++----------- 1 file changed, 27 insertions(+), 37 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index ec9469dc..0daf239b 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -2926,9 +2926,10 @@ def forecast_remaining_sales(loader): ) # Calculate the delta compared to Warmfront's original estimate - eco4_delta_vs_original_estimate = 100 * ( - eco4_post_ciga_total_results["ECO4 - post CIGA - #"] - original_warmfront_eco4 - ) / original_warmfront_eco4 + eco4_delta_vs_original_estimate = ( + eco4_post_ciga_total_results[ + "ECO4 - post CIGA - #"] - original_warmfront_eco4 + ) / original_warmfront_eco4 eco4_post_ciga_remaining_results = calculate_eco4_post_ciga( eligiblity_counts=eligiblity_counts_remaining, @@ -2967,11 +2968,11 @@ def forecast_remaining_sales(loader): # GBIS delta if original_warmfront_gbis == 0: - gbis_delta_vs_original_estimate = 100 * gbis_total + gbis_delta_vs_original_estimate = gbis_total else: - gbis_delta_vs_original_estimate = 100 * ( - gbis_total - original_warmfront_gbis - ) / original_warmfront_gbis + gbis_delta_vs_original_estimate = ( + gbis_total - original_warmfront_gbis + ) / original_warmfront_gbis to_append = { ("", "", "", "HA Name"): ha_name, @@ -3125,27 +3126,23 @@ def forecast_remaining_sales(loader): }, { ("", "", "", "HA Name"): "ECO4 Remaining - November - #", - ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): str( - headline_eco4_original_remaining - ) + ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): headline_eco4_original_remaining + }, { ("", "", "", "HA Name"): "ECO4 Remaining - November - £", - ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): "£" + str( - headline_eco4_original_remaining_revenue - ) + ( + "", "Original Warmfront estimate", "Total - #", + "ECO4 - November"): headline_eco4_original_remaining_revenue }, { ("", "", "", "HA Name"): "ECO4 Remaining - postcode list - #", - ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): str( - headline_eco4_postcode_list_remaining - ) + ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): headline_eco4_postcode_list_remaining }, { ("", "", "", "HA Name"): "ECO4 Remaining - postcode list - £", - ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): "£" + str( - headline_eco4_postcode_list_remaining_revenue - ) + ("", "Original Warmfront estimate", "Total - #", + "ECO4 - November"): headline_eco4_postcode_list_remaining_revenue }, { ("", "", "", "HA Name"): "ECO4 delta %", @@ -3153,27 +3150,22 @@ def forecast_remaining_sales(loader): }, { ("", "", "", "HA Name"): "GBIS Remaining - November - #", - ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): str( - headline_gbis_original_remaining - ) + ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): headline_gbis_original_remaining }, { ("", "", "", "HA Name"): "GBIS Remaining - November - £", - ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): "£" + str( - headline_gbis_original_remaining_revenue - ) + ( + "", "Original Warmfront estimate", "Total - #", + "ECO4 - November"): headline_gbis_original_remaining_revenue }, { ("", "", "", "HA Name"): "GBIS Remaining - post code list - #", - ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): str( - headline_gbis_postcode_list_remaining - ) + ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): headline_gbis_postcode_list_remaining }, { ("", "", "", "HA Name"): "GBIS Remaining - post code list - £", - ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): "£" + str( - headline_gbis_postcode_list_remaining_revenue - ) + ("", "Original Warmfront estimate", "Total - #", + "ECO4 - November"): headline_gbis_postcode_list_remaining_revenue }, { ("", "", "", "HA Name"): "GBIS delta %", @@ -3182,15 +3174,13 @@ def forecast_remaining_sales(loader): # Total revenue { ("", "", "", "HA Name"): "Total Remaining - November - £", - ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): "£" + str( - headline_original_total_revenue_remaining - ) + ("", "Original Warmfront estimate", "Total - #", + "ECO4 - November"): headline_original_total_revenue_remaining }, { ("", "", "", "HA Name"): "Total Remaining - post code list - £", - ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): "£" + str( - headline_postcode_list_total_revenue_remaining - ) + ("", "Original Warmfront estimate", "Total - #", + "ECO4 - November"): headline_postcode_list_total_revenue_remaining }, { ("", "", "", "HA Name"): "Total Remaining delta %", From 46f5ee8ea43e719dc4f0c8c472de68b62d974270 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 1 Mar 2024 22:34:02 +0000 Subject: [PATCH 053/248] formatting percentage --- etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 0daf239b..b5c6835b 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -3059,20 +3059,18 @@ def forecast_remaining_sales(loader): totals_row[col] = results[col].sum() # For the delta columns, we calculate the delta on the totals - totals_row[("ECO4 post-ciga", "", "Delta vs original estimate - %", "")] = round( - 100 * ( + totals_row[("ECO4 post-ciga", "", "Delta vs original estimate - %", "")] = ( + ( totals_row[("ECO4 post-ciga", "", "Estimated total eligible - #", "ECO4 total (post-ciga)")] - totals_row[("", "Original Warmfront estimate", "Total - #", "ECO4 - November")] - ) / totals_row[("", "Original Warmfront estimate", "Total - #", "ECO4 - November")], - 1 + ) / totals_row[("", "Original Warmfront estimate", "Total - #", "ECO4 - November")] ) - totals_row[("GBIS Postcode list", "", "Delta vs original estimate - %", "")] = round( - 100 * ( + totals_row[("GBIS Postcode list", "", "Delta vs original estimate - %", "")] = ( + ( totals_row[("GBIS Postcode list", "Warmfront post code list", "Total - #", "GBIS total")] - totals_row[("", "Original Warmfront estimate", "Total - #", "GBIS - November")] - ) / totals_row[("", "Original Warmfront estimate", "Total - #", "GBIS - November")], - 1 + ) / totals_row[("", "Original Warmfront estimate", "Total - #", "GBIS - November")] ) blank_row = pd.DataFrame([{col: "" for col in results.columns}]) From d9e9be4389d371176a8f83ec5f83f0fcbabbeb8b Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 1 Mar 2024 23:48:27 +0000 Subject: [PATCH 054/248] Added HA25 --- .../ha_15_32/ha_analysis_batch_3.py | 79 ++++++++++++------- 1 file changed, 51 insertions(+), 28 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index b5c6835b..baaa4050 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -159,19 +159,18 @@ class DataLoader: } UNMATCHED_CIGA = { - # We expect 4 unmatched addresses, which have been validated manually as being in the ciga file but not - # the asset list + "HA6": 117, "HA14": 3, "HA16": 7, - # There's just too many unmatched here - "HA6": 117, + "HA24": 12, "HA107": 51, } - def __init__(self, directories, december_figures_filepath, use_cache): + def __init__(self, directories, december_figures_filepath, use_cache, rebuild): self.directories = directories self.use_cache = use_cache self.december_figures_filepath = december_figures_filepath + self.rebuild = rebuild self.data = {} self.december_figures = None @@ -312,23 +311,20 @@ class DataLoader: return asset_list @staticmethod - def create_ciga_list_house_no(ha_name, ciga_list): + def create_ciga_list_house_no(ciga_list): """ This function will append the House number onto the asset list :return: """ - if ha_name in ["HA6", "HA14", "HA107", "HA16"]: - split_addresses = ciga_list['Matched Address'].str.split(',', expand=True) - house_numbers = split_addresses[0].str.split(' ', expand=True) - # THe first column should be HouseNo - we aren't interested in the other columns, but we don't know how - # many columns there might be - house_numbers = house_numbers.iloc[:, 0:1] - house_numbers.columns = ['HouseNo'] + split_addresses = ciga_list['Matched Address'].str.split(',', expand=True) + house_numbers = split_addresses[0].str.split(' ', expand=True) + # THe first column should be HouseNo - we aren't interested in the other columns, but we don't know how + # many columns there might be + house_numbers = house_numbers.iloc[:, 0:1] + house_numbers.columns = ['HouseNo'] - ciga_list = pd.concat([ciga_list, house_numbers[["HouseNo"]]], axis=1) - else: - raise NotImplementedError("Implement me") + ciga_list = pd.concat([ciga_list, house_numbers[["HouseNo"]]], axis=1) return ciga_list @@ -447,7 +443,7 @@ class DataLoader: # Remove rows with missing postcode which happens in a small number of cases ciga_list = ciga_list[~pd.isnull(ciga_list["Matched Postcode"])] ciga_list["ciga_list_row_id"] = [ha_name + "_ciga_" + str(i) for i in range(0, len(ciga_list))] - ciga_list = self.create_ciga_list_house_no(ha_name, ciga_list) + ciga_list = self.create_ciga_list_house_no(ciga_list) ciga_list = self.dedupe_ciga_list(ciga_list) ciga_list = self.merge_ciga_to_assets(asset_list, ciga_list, ha_name) @@ -800,6 +796,10 @@ class DataLoader: "st. leodegars close", "st leodegars close" ) + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "montgomery crescent", "montgomery road" + ) + return survey_list @staticmethod @@ -1102,16 +1102,18 @@ class DataLoader: for col in ["ECO4", "GBIS", "ECO4 remaining", "GBIS remaining"]: self.december_figures[col] = self.december_figures[col].astype("Int64") - if self.use_cache: - self.data = read_pickle_from_s3( + if self.use_cache and not self.rebuild: + data = read_pickle_from_s3( bucket_name="retrofit-datalake-dev", s3_file_name="ha-analysis/batch3-inputs.pickle", ) - return + else: + data = {} - data = {} for filepath in self.directories: ha_name = filepath.split("/")[2] + if ha_name in data: + continue # Load asset list logger.info("Loading data for {}".format(ha_name)) asset_list, survey_list, ciga_list = self.load_asset_list( @@ -2635,6 +2637,10 @@ def forecast_remaining_sales(loader): # and I don't want the numbers to change too much, depenent on the CIGA conversation rate maximum_ciga_conversion = 0.75 + # This is a hard limit to the allowed conversion rates to final sale. These are typically very + # high but there are some anomalies, amongst surveys that are early on + sales_conversion_lower_bound = 0.8 + gbis_rate = 600 eco4_rate = 1710 # old_gbis_rate = 432 @@ -2796,14 +2802,30 @@ def forecast_remaining_sales(loader): eco4_ciga_independent_passrates = pd.DataFrame(eco4_ciga_independent_passrates) gbis_ciga_independent_passrates = pd.DataFrame(gbis_ciga_independent_passrates) + eco4_ciga_independent_passrates["conversion"] = ( + eco4_ciga_independent_passrates["# ECO4 successfully installed"] / + eco4_ciga_independent_passrates["# ECO4 at install stage"] + ) + eco4_ciga_independent_passrates_clipped = eco4_ciga_independent_passrates[ + eco4_ciga_independent_passrates["conversion"] >= sales_conversion_lower_bound + ] + + gbis_ciga_independent_passrates["conversion"] = ( + gbis_ciga_independent_passrates["# GBIS successfully installed"] / + gbis_ciga_independent_passrates["# GBIS at install stage"] + ) + gbis_ciga_independent_passrates_clipped = gbis_ciga_independent_passrates[ + gbis_ciga_independent_passrates["conversion"] >= sales_conversion_lower_bound + ] + median_eco4_to_install = ( - eco4_ciga_independent_passrates["# ECO4 successfully installed"].sum() / - eco4_ciga_independent_passrates["# ECO4 at install stage"].sum() + eco4_ciga_independent_passrates_clipped["# ECO4 successfully installed"].sum() / + eco4_ciga_independent_passrates_clipped["# ECO4 at install stage"].sum() ) median_gbis_to_install = ( - gbis_ciga_independent_passrates["# GBIS successfully installed"].sum() / - gbis_ciga_independent_passrates["# GBIS at install stage"].sum() + gbis_ciga_independent_passrates_clipped["# GBIS successfully installed"].sum() / + gbis_ciga_independent_passrates_clipped["# GBIS at install stage"].sum() ) # Produce the final output @@ -3270,6 +3292,8 @@ def app(): use_cache = True # Determines if we want to perform the data pull pull_data = False + # Override to re-build all inputs + rebuild_inputs = False # List all of the data in the folder @@ -3278,12 +3302,11 @@ def app(): # Grab the December HA figures filepath december_figures_filepath = "local_data/ha_data/HA_December_figures.csv" - # priority_has = ["HA1", "HA6", "HA7", "HA14", "HA16", "HA24", "HA39", "HA107"] - priority_has = ["HA1", "HA6", "HA7", "HA14", "HA16", "HA39", "HA107"] + priority_has = ["HA1", "HA6", "HA7", "HA14", "HA16", "HA24", "HA25", "HA39", "HA107"] # Filter down the directories to only the priority HAs directories = [d for d in directories if d.split("/")[2] in priority_has] - loader = DataLoader(directories, december_figures_filepath, use_cache) + loader = DataLoader(directories, december_figures_filepath, use_cache, rebuild_inputs) loader.load() loader.ha_facts_and_figures() From cbd4a0052ef005e00ce143c16306b5f0b782c4ed Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 1 Mar 2024 23:52:19 +0000 Subject: [PATCH 055/248] Starting HA25 --- etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index baaa4050..0c9f685f 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -347,6 +347,8 @@ class DataLoader: return "Asset" elif "Decent Homes Stock" in workbook.sheetnames: return "Decent Homes Stock" + elif "Report" in workbook.sheetnames: + return "Report" else: return "Assets" From fc022b8a22d571651ba21fff9fd4c5901b18e20f Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sat, 2 Mar 2024 12:34:22 +0000 Subject: [PATCH 056/248] Added data load for HA25 --- .../ha_15_32/ha_analysis_batch_3.py | 32 +++++++++++++++---- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 0c9f685f..4ae881d2 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -155,6 +155,10 @@ class DataLoader: "HA24": { "address": "Address", "postcode": "Postcode" + }, + "HA25": { + "address": "T1_Address", + "postcode": "matching_postcode" } } @@ -178,7 +182,7 @@ class DataLoader: def create_asset_list_matching_address(self, ha_name, asset_list): - if ha_name in ["HA1", "HA6", "HA16", "HA24"]: + if ha_name in ["HA1", "HA6", "HA16", "HA24", "HA25"]: asset_list["matching_address"] = asset_list[ self.COLUMN_CONFIG[ha_name]["address"] ].astype(str).str.lower().str.strip() @@ -374,13 +378,23 @@ class DataLoader: asset_sheetname = self.get_asset_sheetname(workbook) asset_sheet = workbook[asset_sheetname] asset_sheet_colnames = [cell.value for cell in asset_sheet[1]] + if ha_name == "HA25": + asset_sheet_colnames[11] = "matching_postcode" + + values_only = not ha_name != "HA25" rows_data = [] - for row in asset_sheet.iter_rows(min_row=2, values_only=False): - row_data = [cell.value for cell in row] # This will get you the cell values - rows_data.append(row_data) + if not values_only: + for row in asset_sheet.iter_rows(min_row=2, values_only=values_only): + row_data = [cell.value for cell in row] # This will get you the cell values + rows_data.append(row_data) + else: + for row in asset_sheet.iter_rows(min_row=2, values_only=values_only): # use values_only=True to get values + row_data = list(row) # No need for comprehension, values_only=True returns a tuple of values + rows_data.append(row_data) asset_list = pd.DataFrame(rows_data, columns=asset_sheet_colnames) + asset_list = asset_list.loc[:, asset_list.columns.notnull()] # Remove entirely empty rows - consider all rows apart from row_color @@ -403,9 +417,10 @@ class DataLoader: asset_list_correction_function = getattr(self, f"correct_{ha_name.lower()}_asset_list") asset_list = asset_list_correction_function(asset_list) - # For HA1, there is an exception in the structure of the data. We don't have any survey or ciga lists, and so + # For HA1 and HA25, there is an exception in the structure of the data. We don't have any survey or ciga + # lists, and so # we can return the asset list now - if ha_name == "HA1": + if ha_name in ["HA1", "HA25"]: return asset_list, pd.DataFrame(), pd.DataFrame() # We check if there is a survey list @@ -1149,7 +1164,8 @@ class DataLoader: "ECO4": "ECO4", "AFFORDABLE WARMTH": "ECO4", "ECO4 A/W": "ECO4", - "ECO4 GBIS (ECO+)": "GBIS" + "ECO4 GBIS (ECO+)": "GBIS", + "ECO4 GBIS (ECO+) JJC UNDER 73m²": "GBIS" } eco_eligibility_map = { @@ -3305,6 +3321,8 @@ def app(): december_figures_filepath = "local_data/ha_data/HA_December_figures.csv" priority_has = ["HA1", "HA6", "HA7", "HA14", "HA16", "HA24", "HA25", "HA39", "HA107"] + # Next HAs to do: 15, 32, 33, + # Then: 28, 41, 38, 10, 14, 20, 48 # Filter down the directories to only the priority HAs directories = [d for d in directories if d.split("/")[2] in priority_has] From 9a69f8741ece9fdb740cb1b9855f53e639637f44 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sat, 2 Mar 2024 12:54:19 +0000 Subject: [PATCH 057/248] adding HA15 --- .../ha_15_32/ha_analysis_batch_3.py | 32 +++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 4ae881d2..81ed2301 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -165,6 +165,7 @@ class DataLoader: UNMATCHED_CIGA = { "HA6": 117, "HA14": 3, + "HA15": 3, "HA16": 7, "HA24": 12, "HA107": 51, @@ -204,7 +205,15 @@ class DataLoader: asset_list["Address 4"].astype(str).str.lower().str.strip() + ", " + \ asset_list["Postcode"].astype(str).str.lower().str.strip() asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip() - + elif ha_name == "HA15": + asset_list["matching_address"] = ( + asset_list["Address Line 1"].astype(str).str.lower().str.strip() + ", " + + asset_list["Address Line 2"].astype(str).str.lower().str.strip() + ", " + + asset_list["Address Line 3"].astype(str).str.lower().str.strip() + ", " + + asset_list["Address Line 4"].astype(str).str.lower().str.strip() + ", " + + asset_list["Postcode"].astype(str).str.lower().str.strip() + ) + asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip() elif ha_name == "HA39": # Create matching_address by concatenating add_1, add_2, add_3, add_4, add_5, post_code asset_list["matching_address"] = asset_list["add_1"].astype(str).str.lower().str.strip() + ", " + \ @@ -502,6 +511,15 @@ class DataLoader: return asset_list + @staticmethod + def correct_ha15_asset_list(asset_list): + asset_list["matching_postcode"] = np.where( + asset_list["Address Line 1"] == "103 Priory Crescent", + "hp19 9ny", + asset_list["matching_postcode"] + ) + return asset_list + @staticmethod def correct_ha6_survey_list(survey_list): @@ -655,6 +673,14 @@ class DataLoader: return survey_list + @staticmethod + def correct_ha15_survey_list(survey_list): + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "Mary Mac Manus Drive, Milton Keynes", "Mary Mac Manus Drive" + ) + + return survey_list + @staticmethod def correct_ha16_survey_list(survey_list): survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("/", ", ") @@ -3320,7 +3346,9 @@ def app(): # Grab the December HA figures filepath december_figures_filepath = "local_data/ha_data/HA_December_figures.csv" - priority_has = ["HA1", "HA6", "HA7", "HA14", "HA16", "HA24", "HA25", "HA39", "HA107"] + priority_has = [ + "HA1", "HA6", "HA7", "HA14", "HA15", "HA16", "HA24", "HA25", "HA39", "HA107" + ] # Next HAs to do: 15, 32, 33, # Then: 28, 41, 38, 10, 14, 20, 48 # Filter down the directories to only the priority HAs From dad2fc74c889112cbed0a67578fb013e21b276f9 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sat, 2 Mar 2024 13:10:28 +0000 Subject: [PATCH 058/248] HA15 checked and added --- etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 81ed2301..1ae05d16 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -1191,12 +1191,15 @@ class DataLoader: "AFFORDABLE WARMTH": "ECO4", "ECO4 A/W": "ECO4", "ECO4 GBIS (ECO+)": "GBIS", - "ECO4 GBIS (ECO+) JJC UNDER 73m²": "GBIS" + "ECO4 GBIS (ECO+) JJC UNDER 73m²": "GBIS", + "ECO4 AFFORDABLE WARMTH": "ECO4" } eco_eligibility_map = { "not eligble": "not eligible", "eco 4(subject to ciga)": "eco4 (subject to ciga)", + "eco4 (subject to ciga/archetype check": "eco4 (subject to ciga)", + "eco4 (subject to archetype check)": "eco4" } ha_facts_and_figures = [] From 9eccfca70dda75ac1c49084bcd63ec3734e3dd23 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sat, 2 Mar 2024 13:26:54 +0000 Subject: [PATCH 059/248] fixing merge --- .../ha_15_32/ha_analysis_batch_3.py | 67 ++++++++++++++++++- 1 file changed, 65 insertions(+), 2 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 1ae05d16..1f99d23c 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -214,6 +214,13 @@ class DataLoader: asset_list["Postcode"].astype(str).str.lower().str.strip() ) asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip() + elif ha_name == "HA32": + asset_list["matching_address"] = ( + asset_list["Dwelling num"].astype(str).str.lower().str.strip() + ", " + + asset_list["Street"].astype(str).str.lower().str.strip() + ", " + + asset_list["Postcode"].astype(str).str.lower().str.strip() + ) + asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip() elif ha_name == "HA39": # Create matching_address by concatenating add_1, add_2, add_3, add_4, add_5, post_code asset_list["matching_address"] = asset_list["add_1"].astype(str).str.lower().str.strip() + ", " + \ @@ -308,6 +315,8 @@ class DataLoader: if ha_name in ["HA107"]: asset_list["HouseNo"] = asset_list["House No"].copy() + elif ha_name == "HA32": + asset_list["HouseNo"] = asset_list["Dwelling num"].copy() else: split_addresses = asset_list['matching_address'].str.split(',', expand=True) house_numbers = split_addresses[0].str.split(' ', expand=True) @@ -520,6 +529,16 @@ class DataLoader: ) return asset_list + @staticmethod + def correct_ha32_asset_list(asset_list): + asset_list["Postcode"] = np.where( + (asset_list["Street"] == "Norton Grove") & (asset_list["Postcode"] == "HU4 6HQ") & ( + asset_list["Dwelling num"] == "7"), + "hu4 6hg", + asset_list["Postcode"] + ) + return asset_list + @staticmethod def correct_ha6_survey_list(survey_list): @@ -845,6 +864,50 @@ class DataLoader: return survey_list + @staticmethod + def correct_ha32_survey_list(survey_list): + survey_list["Street / Block Name"] = np.where( + survey_list["Street / Block Name"] == "Coxwold", + "Coxwold Grove", + survey_list["Street / Block Name"] + ) + + # Update the Barringhton Avenue with their correct spelling: Barrington Avenue + survey_list["Street / Block Name"] = np.where( + survey_list["Street / Block Name"] == "Barringhton Avenue", + "Barrington Avenue", + survey_list["Street / Block Name"] + ) + + # Update how the Rustenburn addresses are listed in the identified addresses + survey_list["Street / Block Name"] = np.where( + survey_list["Street / Block Name"] == "Rustenburg", + "Rustenburg Street", + survey_list["Street / Block Name"] + ) + + # Update how the MALIN LODGE, RONALDSWAY CLOSE addresses are listed in the identified addresses + survey_list["Street / Block Name"] = np.where( + survey_list["Street / Block Name"] == "MALIN LODGE, RONALDSWAY CLOSE", + "Malin Lodge", + survey_list["Street / Block Name"] + ) + + # Update how the Feroes Close are listed in the identified addresses + survey_list["Street / Block Name"] = np.where( + survey_list["Street / Block Name"] == "Feroes Close", + "Faroes Close", + survey_list["Street / Block Name"] + ) + + survey_list["Street / Block Name"] = np.where( + survey_list["Street / Block Name"] == 'FORESTER WAY', + 'FORESTER WAY', + survey_list["Street / Block Name"] + ) + + return survey_list + @staticmethod def correct_ha107_survey_list(survey_list): # Replace Front Street, East Stockham with Front Street, East Stockwith @@ -3350,9 +3413,9 @@ def app(): december_figures_filepath = "local_data/ha_data/HA_December_figures.csv" priority_has = [ - "HA1", "HA6", "HA7", "HA14", "HA15", "HA16", "HA24", "HA25", "HA39", "HA107" + "HA1", "HA6", "HA7", "HA14", "HA15", "HA16", "HA24", "HA25", "HA32", "HA39", "HA107" ] - # Next HAs to do: 15, 32, 33, + # Next HAs to do: 15[DONE], 32, 33, # Then: 28, 41, 38, 10, 14, 20, 48 # Filter down the directories to only the priority HAs directories = [d for d in directories if d.split("/")[2] in priority_has] From 2828b005cbb3676216827fcb5dc70630f8ecb393 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sun, 3 Mar 2024 15:06:31 +0000 Subject: [PATCH 060/248] fixing HA32 merge --- etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 1f99d23c..c84a2c5c 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -906,6 +906,19 @@ class DataLoader: survey_list["Street / Block Name"] ) + survey_list["Street / Block Name"] = np.where( + survey_list["Street / Block Name"] == '6 Zeigfeld', + 'Ziegfeld Court', + survey_list["Street / Block Name"] + ) + + # Malin Lodge, Ronaldsway Close + survey_list["Street / Block Name"] = np.where( + survey_list["Street / Block Name"] == 'Malin Lodge, Ronaldsway Close', + 'Malin Lodge', + survey_list["Street / Block Name"] + ) + return survey_list @staticmethod From 811f141c45b1fcfa52c9f1d685690389df55f531 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sun, 3 Mar 2024 15:35:49 +0000 Subject: [PATCH 061/248] started working on ha33 but paused --- etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index c84a2c5c..9bd04884 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -221,6 +221,12 @@ class DataLoader: asset_list["Postcode"].astype(str).str.lower().str.strip() ) asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip() + elif ha_name == "HA33": + asset_list["matching_address"] = ( + asset_list["ADDRESS"].astype(str).str.lower().str.strip() + ", " + + asset_list["POST CODE"].astype(str).str.lower().str.strip() + ) + asset_list["matching_postcode"] = asset_list["POST CODE"].astype(str).str.lower().str.strip() elif ha_name == "HA39": # Create matching_address by concatenating add_1, add_2, add_3, add_4, add_5, post_code asset_list["matching_address"] = asset_list["add_1"].astype(str).str.lower().str.strip() + ", " + \ @@ -3426,9 +3432,9 @@ def app(): december_figures_filepath = "local_data/ha_data/HA_December_figures.csv" priority_has = [ - "HA1", "HA6", "HA7", "HA14", "HA15", "HA16", "HA24", "HA25", "HA32", "HA39", "HA107" + "HA1", "HA6", "HA7", "HA14", "HA15", "HA16", "HA24", "HA25", "HA28", "HA32", "HA39", "HA107", ] - # Next HAs to do: 15[DONE], 32, 33, + # Next HAs to do: 15[DONE], 32 [DONE], 33 [Input format is 4 parts and no eco4 jobs identified - come back on this], # Then: 28, 41, 38, 10, 14, 20, 48 # Filter down the directories to only the priority HAs directories = [d for d in directories if d.split("/")[2] in priority_has] From cb39590f618e7c6ff382e76cc461792101a9741a Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sun, 3 Mar 2024 15:48:05 +0000 Subject: [PATCH 062/248] debugging matching for HA28 --- .../ha_15_32/ha_analysis_batch_3.py | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 9bd04884..7481724b 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -214,6 +214,13 @@ class DataLoader: asset_list["Postcode"].astype(str).str.lower().str.strip() ) asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip() + elif ha_name == "HA28": + asset_list["matching_address"] = ( + asset_list["House Number"].astype(str).str.lower().str.strip() + ", " + + asset_list["Street 1"].astype(str).str.lower().str.strip() + ", " + + asset_list["Postcode"].astype(str).str.lower().str.strip() + ) + asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip() elif ha_name == "HA32": asset_list["matching_address"] = ( asset_list["Dwelling num"].astype(str).str.lower().str.strip() + ", " + @@ -323,6 +330,8 @@ class DataLoader: asset_list["HouseNo"] = asset_list["House No"].copy() elif ha_name == "HA32": asset_list["HouseNo"] = asset_list["Dwelling num"].copy() + elif ha_name == "HA28": + asset_list["HouseNo"] = asset_list["House Number"].copy() else: split_addresses = asset_list['matching_address'].str.split(',', expand=True) house_numbers = split_addresses[0].str.split(' ', expand=True) @@ -371,6 +380,8 @@ class DataLoader: def get_asset_sheetname(workbook): if "Asset List" in workbook.sheetnames: return "Asset List" + elif "Asset list" in workbook.sheetnames: + return "Asset list" elif "Asset" in workbook.sheetnames and "Assets" not in workbook.sheetnames: return "Asset" elif "Decent Homes Stock" in workbook.sheetnames: @@ -394,6 +405,8 @@ class DataLoader: def get_survey_sheetname(workbook): if "ECO Surveys" in workbook.sheetnames: return "ECO Surveys" + elif "ECO Survey" in workbook.sheetnames: + return "ECO Survey" else: return "ECO surveys" @@ -870,6 +883,12 @@ class DataLoader: return survey_list + @staticmethod + def correct_ha28_survey_list(survey_list): + # Rename the "No" column to "No." to align with the other survey sheets + survey_list = survey_list.rename(columns={"NO ": "NO."}) + return survey_list + @staticmethod def correct_ha32_survey_list(survey_list): survey_list["Street / Block Name"] = np.where( @@ -1027,6 +1046,10 @@ class DataLoader: asset_list["matching_address"].str.contains(row["Street / Block Name"].lower().strip()) ].copy() + if str(house_number) not in df["matching_address"].values: + if "flat" in str(house_number): + house_number = house_number.split("flat")[1].strip() + df = df[df["matching_address"].str.contains(str(house_number))] if df.empty: From 0909b811ee7aea834784f0deb947308593ce7cdd Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sun, 3 Mar 2024 15:57:49 +0000 Subject: [PATCH 063/248] fixed matching for ha28 --- .../ha_15_32/ha_analysis_batch_3.py | 23 ++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 7481724b..b954a651 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -887,6 +887,27 @@ class DataLoader: def correct_ha28_survey_list(survey_list): # Rename the "No" column to "No." to align with the other survey sheets survey_list = survey_list.rename(columns={"NO ": "NO."}) + + survey_list["Post Code"] = np.where( + survey_list["Post Code"] == "ME75HA", + "ME7 5HA", + survey_list["Post Code"] + ) + + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "ANDREW MANOR/BRITTON ST", "ANDREW MANOR" + ) + + survey_list["Post Code"] = np.where( + survey_list["Post Code"] == "ME75TW", + "ME7 5TW", + survey_list["Post Code"] + ) + + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "ST MARKS HOUSE/SAXON ST", "ST MARKS HOUSE" + ) + return survey_list @staticmethod @@ -1046,7 +1067,7 @@ class DataLoader: asset_list["matching_address"].str.contains(row["Street / Block Name"].lower().strip()) ].copy() - if str(house_number) not in df["matching_address"].values: + if not any(df["matching_address"].str.contains(str(house_number))): if "flat" in str(house_number): house_number = house_number.split("flat")[1].strip() From 87c77e53c03ec83286718d6ef6bb5593466a48b1 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sun, 3 Mar 2024 16:22:42 +0000 Subject: [PATCH 064/248] handing facts and figures for ha28 --- .../ha_15_32/ha_analysis_batch_3.py | 92 +++++++++++-------- 1 file changed, 53 insertions(+), 39 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index b954a651..3ded09ba 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -398,6 +398,8 @@ class DataLoader: return "CIGA Checks" elif "CIGA checks" in workbook.sheetnames: return "CIGA checks" + elif "CIGA check" in workbook.sheetnames: + return "CIGA check" else: return "CIGA" @@ -1318,14 +1320,16 @@ class DataLoader: "ECO4 A/W": "ECO4", "ECO4 GBIS (ECO+)": "GBIS", "ECO4 GBIS (ECO+) JJC UNDER 73m²": "GBIS", - "ECO4 AFFORDABLE WARMTH": "ECO4" + "ECO4 AFFORDABLE WARMTH": "ECO4", + "Affordable Warmth": "ECO4" } eco_eligibility_map = { "not eligble": "not eligible", "eco 4(subject to ciga)": "eco4 (subject to ciga)", "eco4 (subject to ciga/archetype check": "eco4 (subject to ciga)", - "eco4 (subject to archetype check)": "eco4" + "eco4 (subject to archetype check)": "eco4", + "eco4 (subject to ciga/archetype)": "eco4 (subject to ciga)", } ha_facts_and_figures = [] @@ -1384,46 +1388,56 @@ class DataLoader: sales_report = {} if not survey_list.empty: scheme_column = survey_list.columns[0] - # We clean up the survey list installation or cancelled - survey_list["installed_or_cancelled_clean"] = survey_list["INSTALLED OR CANCELLED"].str.lower() - # Remove all punctuation - survey_list["installed_or_cancelled_clean"] = survey_list["installed_or_cancelled_clean"].str.replace( - r'[^\w\s]', '', regex=True - ) - # Remove double spaces - survey_list["installed_or_cancelled_clean"] = survey_list["installed_or_cancelled_clean"].str.replace( - r'\s+', ' ', regex=True - ) - # Remove trailing spaces - survey_list["installed_or_cancelled_clean"] = survey_list["installed_or_cancelled_clean"].str.strip() - # Remap the values in the scheme column survey_list[scheme_column] = survey_list[scheme_column].replace(scheme_map) + # We clean up the survey list installation or cancelled + if "INSTALLED OR CANCELLED" in survey_list.columns: + survey_list["installed_or_cancelled_clean"] = survey_list["INSTALLED OR CANCELLED"].str.lower() + # Remove all punctuation + survey_list["installed_or_cancelled_clean"] = survey_list[ + "installed_or_cancelled_clean"].str.replace( + r'[^\w\s]', '', regex=True + ) + # Remove double spaces + survey_list["installed_or_cancelled_clean"] = survey_list[ + "installed_or_cancelled_clean"].str.replace( + r'\s+', ' ', regex=True + ) + # Remove trailing spaces + survey_list["installed_or_cancelled_clean"] = survey_list[ + "installed_or_cancelled_clean"].str.strip() - survey_list["installation_status"] = None - survey_list["installation_status"] = np.where( - survey_list["installed_or_cancelled_clean"].isin(["installed", "installed see notes"]), - "installed", - survey_list["installation_status"] - ) - survey_list["installation_status"] = np.where( - survey_list["installed_or_cancelled_clean"].isin(["cancelled"]), - "cancelled", - survey_list["installation_status"] - ) - # Find partial installations - survey_list["installation_status"] = np.where( - survey_list["installed_or_cancelled_clean"].str.contains("still to be installed"), - "partially installed", - survey_list["installation_status"] - ) - # Find partial cancellations - # TODO: We might have more indications of partial cancellations - survey_list["installation_status"] = np.where( - survey_list["installed_or_cancelled_clean"].isin(["loft cancelled"]), - "partially cancelled", - survey_list["installation_status"] - ) + survey_list["installation_status"] = None + survey_list["installation_status"] = np.where( + survey_list["installed_or_cancelled_clean"].isin(["installed", "installed see notes"]), + "installed", + survey_list["installation_status"] + ) + survey_list["installation_status"] = np.where( + survey_list["installed_or_cancelled_clean"].isin(["cancelled"]), + "cancelled", + survey_list["installation_status"] + ) + # Find partial installations + survey_list["installation_status"] = np.where( + survey_list["installed_or_cancelled_clean"].str.contains("still to be installed"), + "partially installed", + survey_list["installation_status"] + ) + # Find partial cancellations + # TODO: We might have more indications of partial cancellations + survey_list["installation_status"] = np.where( + survey_list["installed_or_cancelled_clean"].isin(["loft cancelled"]), + "partially cancelled", + survey_list["installation_status"] + ) + else: + # We have some examples, e.g. HA28, where we do not have the installed or cancelled column + survey_list["installation_status"] = np.where( + survey_list['INSTALL/ CANCELLATION DATE'].str.lower().str.contains("cancelled"), + "cancelled", + "installed", + ) # Finally, for other cases, we set the status to "in progress" survey_list["installation_status"] = survey_list["installation_status"].fillna("in progress") From f8948ff60f9e00d9501bd2f71f4269152cf3ab51 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sun, 3 Mar 2024 16:47:10 +0000 Subject: [PATCH 065/248] ha38 wip: --- .../ha_15_32/ha_analysis_batch_3.py | 20 +++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 3ded09ba..4af7d9b9 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -234,6 +234,13 @@ class DataLoader: asset_list["POST CODE"].astype(str).str.lower().str.strip() ) asset_list["matching_postcode"] = asset_list["POST CODE"].astype(str).str.lower().str.strip() + elif ha_name == "HA38": + asset_list["matching_address"] = asset_list["House_Number"].astype(str).str.lower().str.strip() + ", " + \ + asset_list["Address_Line_1"].astype(str).str.lower().str.strip() + ", " + \ + asset_list["Address_Line_2"].astype(str).str.lower().str.strip() + ", " + \ + asset_list["Address_Line_3"].astype(str).str.lower().str.strip() + ", " + \ + asset_list["Postcode"].astype(str).str.lower().str.strip() + asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip() elif ha_name == "HA39": # Create matching_address by concatenating add_1, add_2, add_3, add_4, add_5, post_code asset_list["matching_address"] = asset_list["add_1"].astype(str).str.lower().str.strip() + ", " + \ @@ -332,6 +339,8 @@ class DataLoader: asset_list["HouseNo"] = asset_list["Dwelling num"].copy() elif ha_name == "HA28": asset_list["HouseNo"] = asset_list["House Number"].copy() + elif ha_name == "HA38": + asset_list["HouseNo"] = asset_list["House_Number"].copy() else: split_addresses = asset_list['matching_address'].str.split(',', expand=True) house_numbers = split_addresses[0].str.split(' ', expand=True) @@ -912,6 +921,12 @@ class DataLoader: return survey_list + @staticmethod + def correct_ha38_survey_list(survey_list): + # Rename the "No" column to "No." to align with the other survey sheets + survey_list = survey_list.rename(columns={"NO ": "NO."}) + return survey_list + @staticmethod def correct_ha32_survey_list(survey_list): survey_list["Street / Block Name"] = np.where( @@ -3490,10 +3505,11 @@ def app(): december_figures_filepath = "local_data/ha_data/HA_December_figures.csv" priority_has = [ - "HA1", "HA6", "HA7", "HA14", "HA15", "HA16", "HA24", "HA25", "HA28", "HA32", "HA39", "HA107", + "HA1", "HA6", "HA7", "HA14", "HA15", "HA16", "HA24", "HA25", "HA28", "HA32", "HA38", "HA39", "HA107", ] # Next HAs to do: 15[DONE], 32 [DONE], 33 [Input format is 4 parts and no eco4 jobs identified - come back on this], - # Then: 28, 41, 38, 10, 14, 20, 48 + # Then: 28 [DONE], + # 38, 41, 10, 14, 20, 48 # Filter down the directories to only the priority HAs directories = [d for d in directories if d.split("/")[2] in priority_has] From abe0e627dbe1c89209de2f867c2abe4eef419d2e Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 6 Mar 2024 10:24:42 +0000 Subject: [PATCH 066/248] Fixing bug with gbis remaining counts --- .../ha_15_32/ha_analysis_batch_3.py | 266 ++++++++++++------ 1 file changed, 184 insertions(+), 82 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 4af7d9b9..6d1a3b45 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -424,6 +424,12 @@ class DataLoader: def load_asset_list(self, filepath, ha_name): workbook = openpyxl.load_workbook(filepath) asset_sheetname = self.get_asset_sheetname(workbook) + + # TODO: TEMP + sheetnames_lower = [x.lower() for x in workbook.sheetnames] + if any("eco3" in x for x in sheetnames_lower): + raise Exception("REMOVE ME") + asset_sheet = workbook[asset_sheetname] asset_sheet_colnames = [cell.value for cell in asset_sheet[1]] if ha_name == "HA25": @@ -569,6 +575,34 @@ class DataLoader: ) return asset_list + @staticmethod + def correct_ha38_asset_list(asset_list): + # For Kingsford court, the house number is at the end of the address + def rearrange_address_if_flat(address): + if '/flat' in address.lower(): + parts = address.split('/flat', 1) + return f"FLAT{parts[1]}, {parts[0]}" + return address + + def extract_house_no_if_flat(address): + if '/flat' in address.lower(): + # Attempt to extract the house number following "/flat" + try: + house_no = address.split('/flat ')[1].split(' ')[0] + # Remove trailing comma + house_no = house_no.replace(",", "") + except IndexError: + house_no = None + return house_no + return None + + asset_list['ExtractedHouseNo'] = asset_list['matching_address'].apply(extract_house_no_if_flat) + asset_list.loc[asset_list['ExtractedHouseNo'].notnull(), 'HouseNo'] = asset_list['ExtractedHouseNo'] + asset_list['matching_address'] = asset_list['matching_address'].apply(rearrange_address_if_flat) + # We then need to + + return asset_list + @staticmethod def correct_ha6_survey_list(survey_list): @@ -925,6 +959,11 @@ class DataLoader: def correct_ha38_survey_list(survey_list): # Rename the "No" column to "No." to align with the other survey sheets survey_list = survey_list.rename(columns={"NO ": "NO."}) + + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + 'Kingsford Court, Coombe Valley Road', 'Kingsford Court' + ) + return survey_list @staticmethod @@ -1345,6 +1384,7 @@ class DataLoader: "eco4 (subject to ciga/archetype check": "eco4 (subject to ciga)", "eco4 (subject to archetype check)": "eco4", "eco4 (subject to ciga/archetype)": "eco4 (subject to ciga)", + "eco4 (subject to ciga)": "eco4 (subject to ciga)" } ha_facts_and_figures = [] @@ -2943,8 +2983,8 @@ def forecast_remaining_sales(loader): median_ciga_success_rate = ciga_passrates["# CIGA passed"].sum() / ciga_passrates["# CIGA dependent"].sum() # 3) Calculate the conversion rate of an ECO4 and a GBISjob, that doesn't need ciga, to install - eco4_ciga_independent_passrates = [] - gbis_ciga_independent_passrates = [] + eco4_ciga_independent_to_install = [] + gbis_to_install = [] for ha_name, input_data in loader.data.items(): asset_list = input_data["asset_list"].copy() survey_list = input_data["survey_list"].copy() @@ -2973,7 +3013,7 @@ def forecast_remaining_sales(loader): ) ] - eco4_ciga_independent_passrates.append( + eco4_ciga_independent_to_install.append( { "Ha Name": ha_name, "# ECO4 at install stage": typical_eco4_installed.shape[0], @@ -2993,7 +3033,7 @@ def forecast_remaining_sales(loader): ) ] - gbis_ciga_independent_passrates.append( + gbis_to_install.append( { "Ha Name": ha_name, "# GBIS at install stage": typical_gbis_installed.shape[0], @@ -3001,33 +3041,33 @@ def forecast_remaining_sales(loader): } ) - eco4_ciga_independent_passrates = pd.DataFrame(eco4_ciga_independent_passrates) - gbis_ciga_independent_passrates = pd.DataFrame(gbis_ciga_independent_passrates) + eco4_ciga_independent_to_install = pd.DataFrame(eco4_ciga_independent_to_install) + gbis_to_install = pd.DataFrame(gbis_to_install) - eco4_ciga_independent_passrates["conversion"] = ( - eco4_ciga_independent_passrates["# ECO4 successfully installed"] / - eco4_ciga_independent_passrates["# ECO4 at install stage"] + eco4_ciga_independent_to_install["conversion"] = ( + eco4_ciga_independent_to_install["# ECO4 successfully installed"] / + eco4_ciga_independent_to_install["# ECO4 at install stage"] ) - eco4_ciga_independent_passrates_clipped = eco4_ciga_independent_passrates[ - eco4_ciga_independent_passrates["conversion"] >= sales_conversion_lower_bound + eco4_ciga_independent_to_install_clipped = eco4_ciga_independent_to_install[ + eco4_ciga_independent_to_install["conversion"] >= sales_conversion_lower_bound ] - gbis_ciga_independent_passrates["conversion"] = ( - gbis_ciga_independent_passrates["# GBIS successfully installed"] / - gbis_ciga_independent_passrates["# GBIS at install stage"] + gbis_to_install["conversion"] = ( + gbis_to_install["# GBIS successfully installed"] / + gbis_to_install["# GBIS at install stage"] ) - gbis_ciga_independent_passrates_clipped = gbis_ciga_independent_passrates[ - gbis_ciga_independent_passrates["conversion"] >= sales_conversion_lower_bound + gbis_to_install_clipped = gbis_to_install[ + gbis_to_install["conversion"] >= sales_conversion_lower_bound ] median_eco4_to_install = ( - eco4_ciga_independent_passrates_clipped["# ECO4 successfully installed"].sum() / - eco4_ciga_independent_passrates_clipped["# ECO4 at install stage"].sum() + eco4_ciga_independent_to_install_clipped["# ECO4 successfully installed"].sum() / + eco4_ciga_independent_to_install_clipped["# ECO4 at install stage"].sum() ) median_gbis_to_install = ( - gbis_ciga_independent_passrates_clipped["# GBIS successfully installed"].sum() / - gbis_ciga_independent_passrates_clipped["# GBIS at install stage"].sum() + gbis_to_install_clipped["# GBIS successfully installed"].sum() / + gbis_to_install_clipped["# GBIS at install stage"].sum() ) # Produce the final output @@ -3044,29 +3084,26 @@ def forecast_remaining_sales(loader): results = [] for ha_name, input_data in loader.data.items(): - # Original warmfront figures - ECO4 original_warmfront_estimates = december_figures[december_figures["HA Name"] == ha_name] original_warmfront_eco4 = original_warmfront_estimates["ECO4"].values[0] original_warmfront_remaining_eco4 = original_warmfront_estimates["ECO4 remaining"].values[0] + original_warmfront_sold_eco4 = ( + original_warmfront_estimates["No. of Tech surveys complete - Eco 4"].values[0] * eco4_rate + ) - # original_warmfront_eco4_revenue = ( - # original_warmfront_remaining_eco4 * eco4_rate + - # (original_warmfront_eco4 - original_warmfront_remaining_eco4) * old_eco4_rate - # ) original_warmfront_eco4_revenue = original_warmfront_eco4 * eco4_rate original_warmfront_remaining_eco4_revenue = original_warmfront_remaining_eco4 * eco4_rate + original_warmfront_sold_gbis = ( + original_warmfront_estimates["No. of Tech surveys complete - GBIS"].values[0] * gbis_rate + ) # Original warmfront figures - GBIS original_warmfront_gbis = original_warmfront_estimates["GBIS"].values[0] original_warmfront_remaining_gbis = original_warmfront_estimates["GBIS remaining"].values[0] - # original_warmfront_gbis_revenue = ( - # original_warmfront_remaining_gbis * gbis_rate + - # (original_warmfront_gbis - original_warmfront_remaining_gbis) * old_gbis_rate - # ) original_warmfront_gbis_revenue = ( original_warmfront_gbis * gbis_rate ) @@ -3123,7 +3160,7 @@ def forecast_remaining_sales(loader): # We also need the ha ciga passed to install success rate ha_ciga_pass_to_sale = converted_ciga_jobs[converted_ciga_jobs["HA Name"] == ha_name] - if not ha_ciga_pass_to_sale.empty: + if not ha_ciga_pass_to_sale.empty and ha_ciga_pass_to_sale["# Ciga dependent at installation"].values[0] != 0: ha_ciga_pass_to_sale_rate = ( ha_ciga_pass_to_sale["# Ciga dependent successfully installed"].values[0] / ha_ciga_pass_to_sale["# Ciga dependent at installation"].values[0] @@ -3131,7 +3168,9 @@ def forecast_remaining_sales(loader): else: ha_ciga_pass_to_sale_rate = median_ciga_pass_to_install - ha_eco4_to_sale = eco4_ciga_independent_passrates[eco4_ciga_independent_passrates["Ha Name"] == ha_name] + ha_eco4_to_sale = eco4_ciga_independent_to_install_clipped[ + eco4_ciga_independent_to_install_clipped["Ha Name"] == ha_name + ] if not ha_eco4_to_sale.empty: ha_eco4_to_sale_rate = ( ha_eco4_to_sale['# ECO4 successfully installed'].values[0] / @@ -3149,12 +3188,6 @@ def forecast_remaining_sales(loader): eco4_rate=eco4_rate ) - # Calculate the delta compared to Warmfront's original estimate - eco4_delta_vs_original_estimate = ( - eco4_post_ciga_total_results[ - "ECO4 - post CIGA - #"] - original_warmfront_eco4 - ) / original_warmfront_eco4 - eco4_post_ciga_remaining_results = calculate_eco4_post_ciga( eligiblity_counts=eligiblity_counts_remaining, input_data=input_data, @@ -3164,10 +3197,18 @@ def forecast_remaining_sales(loader): eco4_rate=eco4_rate ) + # Calculate the delta compared to Warmfront's original remaining + if original_warmfront_remaining_eco4 == 0: + eco4_delta_vs_original_estimate_remaining = eco4_post_ciga_remaining_results["ECO4 - post CIGA - #"] + else: + eco4_delta_vs_original_estimate_remaining = ((eco4_post_ciga_remaining_results["ECO4 - post CIGA - #"] - + original_warmfront_remaining_eco4) / + original_warmfront_remaining_eco4) + # GBIS Figures # Estimate the GBIS conversion rate - ha_gbis_sale_conversion = gbis_ciga_independent_passrates[ - gbis_ciga_independent_passrates["Ha Name"] == ha_name + ha_gbis_sale_conversion = gbis_to_install_clipped[ + gbis_to_install_clipped["Ha Name"] == ha_name ] if not ha_gbis_sale_conversion.empty: @@ -3178,6 +3219,9 @@ def forecast_remaining_sales(loader): else: ha_gbis_sale_conversion = median_gbis_to_install + asset_list["ECO Eligibility"].value_counts() + asset_list_remaining["ECO Eligibility"].value_counts() + gbis_total = eligiblity_counts[ eligiblity_counts["ECO Eligibility"] == "gbis" ]["count"].sum() @@ -3185,18 +3229,59 @@ def forecast_remaining_sales(loader): gbis_total_revenue = int(gbis_total * gbis_rate) gbis_remaining = eligiblity_counts_remaining[ - eligiblity_counts["ECO Eligibility"] == "gbis" + eligiblity_counts_remaining["ECO Eligibility"] == "gbis" ]["count"].sum() gbis_remaining = int(np.round(gbis_remaining * ha_gbis_sale_conversion)) gbis_remaining_revenue = int(gbis_remaining * gbis_rate) # GBIS delta - if original_warmfront_gbis == 0: - gbis_delta_vs_original_estimate = gbis_total + if original_warmfront_remaining_gbis == 0: + gbis_delta_vs_original_estimate_remaining = gbis_remaining else: - gbis_delta_vs_original_estimate = ( - gbis_total - original_warmfront_gbis - ) / original_warmfront_gbis + gbis_delta_vs_original_estimate_remaining = ( + (gbis_remaining - original_warmfront_remaining_gbis) / original_warmfront_remaining_gbis + ) + + # Current sales figures + # For any sales surveys that are complete, that could still cancel, we apply a conversion rate + eco4_actually_sold = 0 + gbis_actually_sold = 0 + if not survey_list.empty: + surveys_with_eligibility = survey_list.merge( + asset_list[["asset_list_row_id", "ECO Eligibility"]], + how="left", on="asset_list_row_id" + ) + completed_eco4_sales = surveys_with_eligibility[ + surveys_with_eligibility["installation_status"] == "ECO4 - installed" + ] + incomplete_eco4_sales = surveys_with_eligibility[ + (surveys_with_eligibility["installation_status"] == "ECO4 - in progress") & + (~surveys_with_eligibility["ECO Eligibility"].isin( + ["eco4 - passed ciga"]) + ) + ] + incomplete_eco4_sales_ciga = surveys_with_eligibility[ + (surveys_with_eligibility["installation_status"] == "ECO4 - in progress") & + (surveys_with_eligibility["ECO Eligibility"].isin( + ["eco4 - passed ciga"]) + ) + ] + + eco4_actually_sold = (completed_eco4_sales.shape[0] * eco4_rate) + ( + incomplete_eco4_sales.shape[0] * ha_eco4_to_sale_rate + + incomplete_eco4_sales_ciga.shape[0] * ha_ciga_pass_to_sale_rate + ) * eco4_rate + + completed_gbis_sales = surveys_with_eligibility[ + surveys_with_eligibility["installation_status"] == "GBIS - installed" + ] + incomplete_gbis_sales = surveys_with_eligibility[ + (surveys_with_eligibility["installation_status"] == "GBIS - in progress") + ] + + gbis_actually_sold = completed_gbis_sales.shape[0] * gbis_rate + ( + incomplete_gbis_sales.shape[0] * ha_gbis_sale_conversion * gbis_rate + ) to_append = { ("", "", "", "HA Name"): ha_name, @@ -3204,29 +3289,33 @@ def forecast_remaining_sales(loader): ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): original_warmfront_eco4, ("ECO4 original", "", "Remaining - #", ""): original_warmfront_remaining_eco4, ("ECO4 original", "", "Total - £", ""): original_warmfront_eco4_revenue, + ("ECO4 original", "", "Sold - £", ""): original_warmfront_sold_eco4, ("ECO4 original", "", "Remaining - £", ""): original_warmfront_remaining_eco4_revenue, # GBIS - original warmfront figures ("", "Original Warmfront estimate", "Total - #", "GBIS - November"): original_warmfront_gbis, ("GBIS original", "", "Remaining - #", ""): original_warmfront_gbis, ("GBIS original", "", "Total - £", ""): original_warmfront_gbis_revenue, + ("GBIS original", "", "Sold - £", ""): original_warmfront_sold_gbis, ("GBIS original", "", "Remaining - £", ""): original_warmfront_remaining_gbis_revenue, # ECO4 - asset list, pre-ciga ("", "Warmfront post code list", "Total #", "ECO4 total (pre-ciga)"): eco4_pre_ciga, ("ECO4 pre-ciga", "", "Remaining - #", ""): eco4_pre_ciga_remaining, ("ECO4 pre-ciga", "", "Total - £", ""): eco4_pre_ciga_revenue, + ("ECO4 pre-ciga", "", "Sold - £", ""): eco4_actually_sold, ("ECO4 pre-ciga", "", "Remaining - £", ""): eco4_pre_ciga_remaining_revenue, # ECO4 - asset list, post ciga, total - ("ECO4 post-ciga", "", "Estimated total eligible - #", "ECO4 total (post-ciga)"): + ("ECO4 post-ciga", "", "Estimated total eligible - #", "ECO4 total"): eco4_post_ciga_total_results[ "ECO4 - post CIGA - #"], ("ECO4 post-ciga", "", "Estimated total eligible - £", ""): eco4_post_ciga_total_results[ "ECO4 - post CIGA - £"], - ("ECO4 post-ciga", "", "Delta vs original estimate - %", ""): eco4_delta_vs_original_estimate, # ECO4 - asset list, post ciga, remaining ("ECO4 post-ciga", "", "Estimated remaining eligible - #", ""): eco4_post_ciga_remaining_results[ "ECO4 - post CIGA - #"], ("ECO4 post-ciga", "", "Estimated remaining eligible - £", ""): eco4_post_ciga_remaining_results[ "ECO4 - post CIGA - £"], + ("ECO4 post-ciga", "", "Delta vs original estimate, remaining - %", + ""): eco4_delta_vs_original_estimate_remaining, ("ECO4 post-ciga", "", "Of which - confirmed (post CIGA or no CIGA required) - #", ""): eco4_post_ciga_remaining_results["Of which confirmed - #"], ("ECO4 post-ciga", "", "Of which - confirmed (post CIGA or no CIGA required) - £", ""): @@ -3257,13 +3346,15 @@ def forecast_remaining_sales(loader): # GBIS postcode list ("GBIS Postcode list", "Warmfront post code list", "Total - #", "GBIS total"): gbis_total, ("GBIS Postcode list", "Warmfront post code list", "Total - £", "GBIS total"): gbis_total_revenue, - ("GBIS Postcode list", "", "Delta vs original estimate - %", ""): gbis_delta_vs_original_estimate, + ("GBIS Postcode list", "Warmfront post code list", "Sold - £", "GBIS total"): gbis_actually_sold, ("GBIS Postcode list", "Warmfront post code list", "Remaining - #", "GBIS total"): gbis_remaining, ("GBIS Postcode list", "Warmfront post code list", "Remaining - £", "GBIS total"): gbis_remaining_revenue, + ("GBIS Postcode list", "", "Delta vs original estimate, remaining - %", ""): + gbis_delta_vs_original_estimate_remaining, } # Make sure nothing is forgotten due to duplicate multi-index keys - if len(to_append) != 33: + if len(to_append) != 37: raise ValueError("Something went wrong") results.append(to_append) @@ -3275,26 +3366,26 @@ def forecast_remaining_sales(loader): if col == ('', '', '', 'HA Name'): totals_row[col] = "Total" elif col in [ - ("ECO4 post-ciga", "", "Delta vs original estimate - %", ""), - ("GBIS Postcode list", "", "Delta vs original estimate - %", "") + ("ECO4 post-ciga", "", "Delta vs original estimate, remaining - %", ""), + ("GBIS Postcode list", "", "Delta vs original estimate, remaining - %", "") ]: totals_row[col] = None else: totals_row[col] = results[col].sum() # For the delta columns, we calculate the delta on the totals - totals_row[("ECO4 post-ciga", "", "Delta vs original estimate - %", "")] = ( + totals_row[("ECO4 post-ciga", "", "Delta vs original estimate, remaining - %", "")] = ( ( - totals_row[("ECO4 post-ciga", "", "Estimated total eligible - #", "ECO4 total (post-ciga)")] - - totals_row[("", "Original Warmfront estimate", "Total - #", "ECO4 - November")] - ) / totals_row[("", "Original Warmfront estimate", "Total - #", "ECO4 - November")] + totals_row[("ECO4 post-ciga", "", "Estimated remaining eligible - #", "")] - + totals_row[("ECO4 original", "", "Remaining - #", "")] + ) / totals_row[("ECO4 original", "", "Remaining - #", "")] ) - totals_row[("GBIS Postcode list", "", "Delta vs original estimate - %", "")] = ( + totals_row[("GBIS Postcode list", "", "Delta vs original estimate, remaining - %", "")] = ( ( - totals_row[("GBIS Postcode list", "Warmfront post code list", "Total - #", "GBIS total")] - - totals_row[("", "Original Warmfront estimate", "Total - #", "GBIS - November")] - ) / totals_row[("", "Original Warmfront estimate", "Total - #", "GBIS - November")] + totals_row[("GBIS Postcode list", "Warmfront post code list", "Remaining - #", "GBIS total")] - + totals_row[("GBIS original", "", "Remaining - #", "")] + ) / totals_row[("GBIS original", "", "Remaining - #", "")] ) blank_row = pd.DataFrame([{col: "" for col in results.columns}]) @@ -3342,6 +3433,15 @@ def forecast_remaining_sales(loader): ) headline_total_delta = round(headline_total_delta, 1) + headline_eco4_sold_since_november = ( + totals_row[('ECO4 pre-ciga', '', 'Sold - £', '')] - totals_row[('ECO4 original', '', 'Sold - £', '')] + ) + + headline_gbis_sold_since_november = ( + totals_row[("GBIS Postcode list", "Warmfront post code list", "Sold - £", "GBIS total")] - + totals_row[('GBIS original', '', 'Sold - £', '')] + ) + headlines = [ { ("", "", "", "HA Name"): "Headlines", @@ -3358,16 +3458,22 @@ def forecast_remaining_sales(loader): "ECO4 - November"): headline_eco4_original_remaining_revenue }, { - ("", "", "", "HA Name"): "ECO4 Remaining - postcode list - #", + ("", "", "", "HA Name"): "ECO4 Sold since November - £", + ( + "", "Original Warmfront estimate", "Total - #", + "ECO4 - November"): headline_eco4_sold_since_november + }, + { + ("", "", "", "HA Name"): "ECO4 Remaining - postcode list (post CIGA) - #", ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): headline_eco4_postcode_list_remaining }, { - ("", "", "", "HA Name"): "ECO4 Remaining - postcode list - £", + ("", "", "", "HA Name"): "ECO4 Remaining - postcode list (post CIGA) - £", ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): headline_eco4_postcode_list_remaining_revenue }, { - ("", "", "", "HA Name"): "ECO4 delta %", + ("", "", "", "HA Name"): "ECO4 £ remaining delta - %", ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): str(headline_eco4_delta) + "%" }, { @@ -3380,6 +3486,12 @@ def forecast_remaining_sales(loader): "", "Original Warmfront estimate", "Total - #", "ECO4 - November"): headline_gbis_original_remaining_revenue }, + { + ("", "", "", "HA Name"): "GBIS Sold since November - £", + ( + "", "Original Warmfront estimate", "Total - #", + "ECO4 - November"): headline_gbis_sold_since_november + }, { ("", "", "", "HA Name"): "GBIS Remaining - post code list - #", ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): headline_gbis_postcode_list_remaining @@ -3400,7 +3512,7 @@ def forecast_remaining_sales(loader): "ECO4 - November"): headline_original_total_revenue_remaining }, { - ("", "", "", "HA Name"): "Total Remaining - post code list - £", + ("", "", "", "HA Name"): "Total Remaining - post code list (post CIGA) - £", ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): headline_postcode_list_total_revenue_remaining }, @@ -3440,14 +3552,16 @@ def forecast_remaining_sales(loader): ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): str( round(median_eco4_to_install * 100, 1)) + "%", ("ECO4 original", "", "Remaining - #", - ""): " - Sales conversion rate for a ECO4 property that didn't need a CIGA check. Job must not cancel" + ""): " - Sales conversion rate for a ECO4 property that didn't need a CIGA check. Surveys that resulted " + "in cancelled install are excluded." }, { ("", "", "", "HA Name"): "Median ECO4 (subect to CIGA) sales conversion rate", ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): str( round(median_ciga_pass_to_install * 100, 1)) + "%", ("ECO4 original", "", "Remaining - #", - ""): " - Sales conversion rate for a ECO4 property that passed a CIGA check. Job must not cancel" + ""): " - Sales conversion rate for a ECO4 property that passed a CIGA check. Surveys that resulted in " + "cancelled installs are excluded." } ] @@ -3462,23 +3576,7 @@ def forecast_remaining_sales(loader): pd.DataFrame(assumptions) ] ) - - # header_rows = [ - # [name[0] for name in results.columns.values], - # [name[1] for name in results.columns.values], - # [name[2] for name in results.columns.values], - # [name[3] for name in results.columns.values] - # ] - - # Step 2: Write the transformed header and DataFrame data to CSV. - # Open the file in write mode. - import csv with open("HA Remaining Analysis.csv", "w", newline="") as file: - # writer = csv.writer(file) - - # Write the header rows. - # writer.writerows(header_rows) - # Write the DataFrame data without the index (adjust if you want the index). results.to_csv(file, header=True, index=False) @@ -3504,8 +3602,12 @@ def app(): # Grab the December HA figures filepath december_figures_filepath = "local_data/ha_data/HA_December_figures.csv" + # priority_has = [ + # "HA1", "HA6", "HA7", "HA14", "HA15", "HA16", "HA24", "HA25", "HA28", "HA32", "HA38", "HA39", "HA107", + # ] + # TODO: Remove ECO3 sales from HA25 priority_has = [ - "HA1", "HA6", "HA7", "HA14", "HA15", "HA16", "HA24", "HA25", "HA28", "HA32", "HA38", "HA39", "HA107", + "HA1", "HA6", "HA7", "HA14", "HA15", "HA16", "HA24", "HA28", "HA32", "HA39", "HA107", ] # Next HAs to do: 15[DONE], 32 [DONE], 33 [Input format is 4 parts and no eco4 jobs identified - come back on this], # Then: 28 [DONE], From 5b32ac8aad59b1942f80a399d072486ab6db9ec3 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 6 Mar 2024 10:59:07 +0000 Subject: [PATCH 067/248] handling case where property is marked as gbis but sold for ECO4 --- .../ha_15_32/ha_analysis_batch_3.py | 21 +++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 6d1a3b45..7bfbd7f5 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -1509,11 +1509,12 @@ class DataLoader: } # We find some cases where properties have sold but are missing CIGA checks - survey_list_to_merge = survey_list[["asset_list_row_id"]].copy() + survey_list_to_merge = survey_list[["asset_list_row_id", "installation_status"]].copy() survey_list_to_merge["has_a_survey_record"] = True survey_list_to_merge = survey_list_to_merge[~pd.isnull(survey_list_to_merge["asset_list_row_id"])] asset_list = asset_list.merge(survey_list_to_merge, how='left', on="asset_list_row_id") + # Update the cases where properties have sold, but are missing a CIGA check asset_list["ECO Eligibility"] = np.where( (asset_list["ECO Eligibility"] == "eco4 (subject to ciga)") & ( asset_list["has_a_survey_record"] == True @@ -1521,6 +1522,17 @@ class DataLoader: "eco4 - passed ciga", asset_list["ECO Eligibility"] ) + # Update the cases where a property has been marked as eligible for GBIS, but sold for ECO4 + asset_list["ECO Eligibility"] = np.where( + (asset_list["ECO Eligibility"] == "gbis") & ( + asset_list["installation_status"].isin( + ["ECO4 - installed", "ECO4 - cancelled"] + ) + ), + "eco4", + asset_list["ECO Eligibility"] + ) + asset_list = asset_list.drop(columns=["has_a_survey_record"]) # Update the survey list with installation status @@ -3199,7 +3211,7 @@ def forecast_remaining_sales(loader): # Calculate the delta compared to Warmfront's original remaining if original_warmfront_remaining_eco4 == 0: - eco4_delta_vs_original_estimate_remaining = eco4_post_ciga_remaining_results["ECO4 - post CIGA - #"] + eco4_delta_vs_original_estimate_remaining = "N/A" else: eco4_delta_vs_original_estimate_remaining = ((eco4_post_ciga_remaining_results["ECO4 - post CIGA - #"] - original_warmfront_remaining_eco4) / @@ -3219,9 +3231,6 @@ def forecast_remaining_sales(loader): else: ha_gbis_sale_conversion = median_gbis_to_install - asset_list["ECO Eligibility"].value_counts() - asset_list_remaining["ECO Eligibility"].value_counts() - gbis_total = eligiblity_counts[ eligiblity_counts["ECO Eligibility"] == "gbis" ]["count"].sum() @@ -3236,7 +3245,7 @@ def forecast_remaining_sales(loader): # GBIS delta if original_warmfront_remaining_gbis == 0: - gbis_delta_vs_original_estimate_remaining = gbis_remaining + gbis_delta_vs_original_estimate_remaining = "N/A" else: gbis_delta_vs_original_estimate_remaining = ( (gbis_remaining - original_warmfront_remaining_gbis) / original_warmfront_remaining_gbis From 9d26c94ae571ce1ba5363e9c850b8017f110bc9d Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 6 Mar 2024 11:35:14 +0000 Subject: [PATCH 068/248] removed stray comma causing bugs --- .../ha_15_32/ha_analysis_batch_3.py | 32 ++++++++++++++++--- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 7bfbd7f5..e58c7799 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -1526,14 +1526,40 @@ class DataLoader: asset_list["ECO Eligibility"] = np.where( (asset_list["ECO Eligibility"] == "gbis") & ( asset_list["installation_status"].isin( - ["ECO4 - installed", "ECO4 - cancelled"] + ["ECO4 - installed", "ECO4 - cancelled", "ECO4 - in progress"] ) ), "eco4", asset_list["ECO Eligibility"] ) + # Update the cases where a property was marked as eligible for ECO4, but sold for GBIS + asset_list["ECO Eligibility"] = np.where( + (asset_list["ECO Eligibility"].isin( + ["eco4", "eco4 (subject to ciga)", "eco4 - passed ciga", "failed ciga"] + )) & ( + asset_list["installation_status"].isin( + ["GBIS - installed", "GBIS - cancelled", "GBIS - in progress"] + ) + ), + "gbis", + asset_list["ECO Eligibility"] + ) + # Update the cases where a property is marked as not eligible, but sold for GBIS + if ((asset_list["ECO Eligibility"] == "not eligible") & ( + asset_list["installation_status"].isin( + ["GBIS - in progress", "GBIS - installed", "GBIS - cancelled"] + ))).sum(): + bah + asset_list["ECO Eligibility"] = np.where( + (asset_list["ECO Eligibility"] == "not eligible") & ( + asset_list["installation_status"].isin( + ["GBIS - in progress", "GBIS - installed", "GBIS - cancelled"] + )), + "gbis", + asset_list["ECO Eligibility"] + ) - asset_list = asset_list.drop(columns=["has_a_survey_record"]) + asset_list = asset_list.drop(columns=["has_a_survey_record", "installation_status"]) # Update the survey list with installation status self.data[ha_name]["survey_list"] = survey_list @@ -2897,8 +2923,6 @@ def forecast_remaining_sales(loader): gbis_rate = 600 eco4_rate = 1710 - # old_gbis_rate = 432 - # old_eco4_rate = 1456 # 1) Calculate the conversion rate from passed CIGA to actual sale converted_ciga_jobs = [] From a70260f128aec2785a8000669dc981d8220505a3 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 6 Mar 2024 11:55:02 +0000 Subject: [PATCH 069/248] Update how we handle partially completed jobs --- etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index e58c7799..060539e1 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -1476,7 +1476,7 @@ class DataLoader: # Find partial installations survey_list["installation_status"] = np.where( survey_list["installed_or_cancelled_clean"].str.contains("still to be installed"), - "partially installed", + "in progress", survey_list["installation_status"] ) # Find partial cancellations @@ -1550,6 +1550,7 @@ class DataLoader: ["GBIS - in progress", "GBIS - installed", "GBIS - cancelled"] ))).sum(): bah + asset_list["ECO Eligibility"] = np.where( (asset_list["ECO Eligibility"] == "not eligible") & ( asset_list["installation_status"].isin( @@ -1559,6 +1560,15 @@ class DataLoader: asset_list["ECO Eligibility"] ) + # Update the cases where a property is marked as not eligible, but sold for ECO4 + asset_list["ECO Eligibility"] = np.where( + (asset_list["ECO Eligibility"] == "not eligible") & ( + asset_list["installation_status"].isin( + ["ECO4 - in progress", "ECO4 - installed", "ECO4 - cancelled"] + ) + ) + ) + asset_list = asset_list.drop(columns=["has_a_survey_record", "installation_status"]) # Update the survey list with installation status From 4cc467e5142c7eba903d2819d59229643cf93e03 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 6 Mar 2024 11:57:03 +0000 Subject: [PATCH 070/248] fix bug in updating eligibility for initially non-eligible rows --- etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 060539e1..8c03b1ef 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -1545,12 +1545,6 @@ class DataLoader: asset_list["ECO Eligibility"] ) # Update the cases where a property is marked as not eligible, but sold for GBIS - if ((asset_list["ECO Eligibility"] == "not eligible") & ( - asset_list["installation_status"].isin( - ["GBIS - in progress", "GBIS - installed", "GBIS - cancelled"] - ))).sum(): - bah - asset_list["ECO Eligibility"] = np.where( (asset_list["ECO Eligibility"] == "not eligible") & ( asset_list["installation_status"].isin( @@ -1566,7 +1560,9 @@ class DataLoader: asset_list["installation_status"].isin( ["ECO4 - in progress", "ECO4 - installed", "ECO4 - cancelled"] ) - ) + ), + "eco4", + asset_list["ECO Eligibility"] ) asset_list = asset_list.drop(columns=["has_a_survey_record", "installation_status"]) From 5e991547f7239cf5a84f8e5824d4d9379b825a2a Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 6 Mar 2024 14:08:05 +0000 Subject: [PATCH 071/248] debuging variances, fixed usage of 75% ciga pass rate --- etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 8c03b1ef..91c198b1 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -3196,8 +3196,8 @@ def forecast_remaining_sales(loader): ) else: ha_ciga_conversion_rate = ( - median_ciga_success_rate if median_ciga_success_rate <= median_ciga_success_rate else - median_ciga_success_rate + median_ciga_success_rate if median_ciga_success_rate <= maximum_ciga_conversion else + maximum_ciga_conversion ) # We also need the ha ciga passed to install success rate From d35d8ea8457ce128ac1fe0c51abd9f83f4e3acaa Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 6 Mar 2024 14:14:50 +0000 Subject: [PATCH 072/248] fixed but in eligibility counts remaining --- etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 91c198b1..1e2c5d92 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -3177,7 +3177,7 @@ def forecast_remaining_sales(loader): ]["count"].sum() eco4_pre_ciga_remaining = eligiblity_counts_remaining[ - eligiblity_counts["ECO Eligibility"].isin( + eligiblity_counts_remaining["ECO Eligibility"].isin( ["eco4", "eco4 (subject to ciga)", "eco4 - passed ciga", "failed ciga"] ) ]["count"].sum() From 680f38963a874eef548883d8f0f365f7958d42b1 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 6 Mar 2024 15:01:33 +0000 Subject: [PATCH 073/248] Added variance columns to output --- .../ha_15_32/ha_analysis_batch_3.py | 49 ++++++++++++++++++- 1 file changed, 48 insertions(+), 1 deletion(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 1e2c5d92..d4c3f74f 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -2859,21 +2859,30 @@ def calculate_eco4_post_ciga( eligiblity_counts["ECO Eligibility"] == "failed ciga" ]["count"].sum() + eco4_no_ciga_needed_or_ciga_passed = eco4_no_ciga_needed + eco4_ciga_passed + eco4_confirmed = (eco4_no_ciga_needed * ha_eco4_to_sale_rate) + (eco4_ciga_passed * ha_ciga_pass_to_sale_rate) eco4_confirmed = np.round(eco4_confirmed) + eco4_no_ciga_needed_cancellations = int(eco4_no_ciga_needed_or_ciga_passed - eco4_confirmed) + if remaining_needing_ciga_check > 0: # We update the eco4 post ciga with the converted remaining eco4_ciga_expected_remaining_to_pass = np.round(remaining_needing_ciga_check * ha_ciga_conversion_rate) + eco4_remaining_forecast = np.round( eco4_ciga_expected_remaining_to_pass * ha_ciga_pass_to_sale_rate ) + eco4_ciga_needed_cancellations = eco4_ciga_expected_remaining_to_pass - eco4_remaining_forecast eco4_estimated_ciga_failures = remaining_needing_ciga_check - eco4_ciga_expected_remaining_to_pass eco4_post_ciga = eco4_confirmed + eco4_remaining_forecast else: eco4_remaining_forecast = 0 eco4_estimated_ciga_failures = 0 + eco4_ciga_needed_cancellations = 0 eco4_post_ciga = eco4_confirmed + + eco4_expected_cancellations = eco4_no_ciga_needed_cancellations + eco4_ciga_needed_cancellations else: eco4_no_ciga_needed = eligiblity_counts[ eligiblity_counts["ECO Eligibility"] == "eco4" @@ -2881,14 +2890,18 @@ def calculate_eco4_post_ciga( eco4_confirmed_ciga_failures = 0 # Multiply by sale conversion eco4_confirmed = np.round(eco4_no_ciga_needed * ha_eco4_to_sale_rate) + eco4_no_ciga_cancellations = int(eco4_no_ciga_needed - eco4_confirmed) eco4_ciga_expected_remaining_to_pass = np.round(remaining_needing_ciga_check * ha_ciga_conversion_rate) eco4_estimated_ciga_failures = remaining_needing_ciga_check - eco4_ciga_expected_remaining_to_pass eco4_remaining_forecast = np.round( eco4_ciga_expected_remaining_to_pass * ha_ciga_pass_to_sale_rate ) + eco4_ciga_cancellations = int(eco4_ciga_expected_remaining_to_pass - eco4_remaining_forecast) eco4_post_ciga = eco4_confirmed + eco4_remaining_forecast + eco4_expected_cancellations = eco4_no_ciga_cancellations + eco4_ciga_cancellations + eco4_post_ciga = int(eco4_post_ciga) eco4_remaining_forecast = int(eco4_remaining_forecast) eco4_confirmed = int(eco4_confirmed) @@ -2912,6 +2925,9 @@ def calculate_eco4_post_ciga( ), "Confirmed CIGA failures - £": int(eco4_confirmed_ciga_failures * eco4_rate), "Estimated CIGA failures - £": int(eco4_estimated_ciga_failures * eco4_rate), + # Expected cencellations + "Expected cancellations - #": eco4_expected_cancellations, + "Expected cancellations - £": eco4_expected_cancellations * eco4_rate } return results @@ -3322,6 +3338,28 @@ def forecast_remaining_sales(loader): incomplete_gbis_sales.shape[0] * ha_gbis_sale_conversion * gbis_rate ) + # Add in the variance: + # We should expect that the pre-ciga total is: + # 1) The number of post CIGA successes + + # 2) the number of CIGA failures + + # 3) The number of cancellations + variance_total = eco4_pre_ciga - ( + eco4_post_ciga_total_results["ECO4 - post CIGA - #"] + + eco4_post_ciga_total_results['Estimated total - failed CIGA'] + + eco4_post_ciga_total_results["Expected cancellations - #"] + ) + if variance_total != 0: + raise ValueError("Something went wrong in variance total") + + variance_remaining = eco4_pre_ciga_remaining - ( + eco4_post_ciga_remaining_results["ECO4 - post CIGA - #"] + + eco4_post_ciga_remaining_results['Estimated total - failed CIGA'] + + eco4_post_ciga_remaining_results["Expected cancellations - #"] + ) + + if variance_remaining != 0: + raise ValueError("Something went wrong in variance remaining") + to_append = { ("", "", "", "HA Name"): ha_name, # ECO4 - original warmfront figures @@ -3340,6 +3378,8 @@ def forecast_remaining_sales(loader): ("", "Warmfront post code list", "Total #", "ECO4 total (pre-ciga)"): eco4_pre_ciga, ("ECO4 pre-ciga", "", "Remaining - #", ""): eco4_pre_ciga_remaining, ("ECO4 pre-ciga", "", "Total - £", ""): eco4_pre_ciga_revenue, + ("ECO4 pre-ciga", "", "VARIANCE - TOTAL", ""): variance_total, + ("ECO4 pre-ciga", "", "VARIANCE - REMAINING", ""): variance_remaining, ("ECO4 pre-ciga", "", "Sold - £", ""): eco4_actually_sold, ("ECO4 pre-ciga", "", "Remaining - £", ""): eco4_pre_ciga_remaining_revenue, # ECO4 - asset list, post ciga, total @@ -3382,6 +3422,13 @@ def forecast_remaining_sales(loader): ("ECO4 CIGA failures", "", "Estimated failures - £", ""): eco4_post_ciga_remaining_results[ "Estimated CIGA failures - £" ], + # Expected ECO4 cancellations + ("ECO4 Cancellations", "", "Expected cancellations - #", ""): eco4_post_ciga_remaining_results[ + "Expected cancellations - #" + ], + ("ECO4 Cancellations", "", "Expected cancellations - £", ""): eco4_post_ciga_remaining_results[ + "Expected cancellations - £" + ], # GBIS postcode list ("GBIS Postcode list", "Warmfront post code list", "Total - #", "GBIS total"): gbis_total, ("GBIS Postcode list", "Warmfront post code list", "Total - £", "GBIS total"): gbis_total_revenue, @@ -3393,7 +3440,7 @@ def forecast_remaining_sales(loader): } # Make sure nothing is forgotten due to duplicate multi-index keys - if len(to_append) != 37: + if len(to_append) != 41: raise ValueError("Something went wrong") results.append(to_append) From e966dfdf6e785cbcc1e2245cce852e842d0def92 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 6 Mar 2024 16:22:20 +0000 Subject: [PATCH 074/248] Adding cancellations to output --- .../ha_15_32/ha_analysis_batch_3.py | 68 +++++++++++++------ 1 file changed, 49 insertions(+), 19 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index d4c3f74f..09b0910e 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -3301,6 +3301,10 @@ def forecast_remaining_sales(loader): # For any sales surveys that are complete, that could still cancel, we apply a conversion rate eco4_actually_sold = 0 gbis_actually_sold = 0 + eco4_confirmed_cancellations = 0 + eco4_expected_cancellations = 0 + gbis_confirmed_cancellations = 0 + gbis_expected_cancellations = 0 if not survey_list.empty: surveys_with_eligibility = survey_list.merge( asset_list[["asset_list_row_id", "ECO Eligibility"]], @@ -3308,34 +3312,54 @@ def forecast_remaining_sales(loader): ) completed_eco4_sales = surveys_with_eligibility[ surveys_with_eligibility["installation_status"] == "ECO4 - installed" - ] + ].shape[0] incomplete_eco4_sales = surveys_with_eligibility[ (surveys_with_eligibility["installation_status"] == "ECO4 - in progress") & (~surveys_with_eligibility["ECO Eligibility"].isin( ["eco4 - passed ciga"]) ) - ] + ].shape[0] incomplete_eco4_sales_ciga = surveys_with_eligibility[ (surveys_with_eligibility["installation_status"] == "ECO4 - in progress") & (surveys_with_eligibility["ECO Eligibility"].isin( ["eco4 - passed ciga"]) ) - ] + ].shape[0] - eco4_actually_sold = (completed_eco4_sales.shape[0] * eco4_rate) + ( - incomplete_eco4_sales.shape[0] * ha_eco4_to_sale_rate + - incomplete_eco4_sales_ciga.shape[0] * ha_ciga_pass_to_sale_rate - ) * eco4_rate + eco4_confirmed_cancellations = surveys_with_eligibility[ + surveys_with_eligibility["installation_status"] == "ECO4 - cancelled" + ].shape[0] + + expected_eco4_sales_no_ciga = np.round(incomplete_eco4_sales * ha_eco4_to_sale_rate) + expected_eco4_sales_ciga = np.round(incomplete_eco4_sales_ciga * ha_ciga_pass_to_sale_rate) + + eco4_expected_cancellations = (incomplete_eco4_sales + incomplete_eco4_sales_ciga) - ( + expected_eco4_sales_no_ciga + expected_eco4_sales_ciga + ) + eco4_expected_cancellations = int(np.round(eco4_expected_cancellations)) + + eco4_actually_sold = eco4_rate * ( + completed_eco4_sales + expected_eco4_sales_no_ciga + expected_eco4_sales_ciga + ) completed_gbis_sales = surveys_with_eligibility[ surveys_with_eligibility["installation_status"] == "GBIS - installed" - ] + ].shape[0] incomplete_gbis_sales = surveys_with_eligibility[ (surveys_with_eligibility["installation_status"] == "GBIS - in progress") - ] + ].shape[0] - gbis_actually_sold = completed_gbis_sales.shape[0] * gbis_rate + ( - incomplete_gbis_sales.shape[0] * ha_gbis_sale_conversion * gbis_rate + # Get confirmed cancellations + gbis_confirmed_cancellations = surveys_with_eligibility[ + surveys_with_eligibility["installation_status"] == "GBIS - cancelled" + ].shape[0] + + expected_gbis_unconfirmed_sales = incomplete_gbis_sales * ha_gbis_sale_conversion + + gbis_expected_cancellations = int(incomplete_gbis_sales - expected_gbis_unconfirmed_sales) + + gbis_actually_sold = completed_gbis_sales * gbis_rate + ( + expected_gbis_unconfirmed_sales * gbis_rate ) # Add in the variance: @@ -3381,6 +3405,9 @@ def forecast_remaining_sales(loader): ("ECO4 pre-ciga", "", "VARIANCE - TOTAL", ""): variance_total, ("ECO4 pre-ciga", "", "VARIANCE - REMAINING", ""): variance_remaining, ("ECO4 pre-ciga", "", "Sold - £", ""): eco4_actually_sold, + ("ECO4 pre-ciga", "", "Confirmed cancellations - £", ""): eco4_confirmed_cancellations, + # This is for jobs that are in-progress and could still cancel + ("ECO4 pre-ciga", "", "Unconfirmed cancellations - £", ""): eco4_expected_cancellations, ("ECO4 pre-ciga", "", "Remaining - £", ""): eco4_pre_ciga_remaining_revenue, # ECO4 - asset list, post ciga, total ("ECO4 post-ciga", "", "Estimated total eligible - #", "ECO4 total"): @@ -3403,6 +3430,13 @@ def forecast_remaining_sales(loader): eco4_post_ciga_remaining_results["Of which forecast - #"], ("ECO4 post-ciga", "", "Of which forecast - £", ""): eco4_post_ciga_remaining_results["Of which forecast - £"], + # Expected ECO4 cancellations + ("ECO4 Cancellations", "", "Of which expected cancellations - #", ""): eco4_post_ciga_remaining_results[ + "Expected cancellations - #" + ], + ("ECO4 Cancellations", "", "Of which expected cancellations - £", ""): eco4_post_ciga_remaining_results[ + "Expected cancellations - £" + ], # CIGA failures ("ECO4 CIGA failures", "", "Estimated total - failed CIGA - #", ""): eco4_post_ciga_remaining_results[ 'Estimated total - failed CIGA' @@ -3422,17 +3456,13 @@ def forecast_remaining_sales(loader): ("ECO4 CIGA failures", "", "Estimated failures - £", ""): eco4_post_ciga_remaining_results[ "Estimated CIGA failures - £" ], - # Expected ECO4 cancellations - ("ECO4 Cancellations", "", "Expected cancellations - #", ""): eco4_post_ciga_remaining_results[ - "Expected cancellations - #" - ], - ("ECO4 Cancellations", "", "Expected cancellations - £", ""): eco4_post_ciga_remaining_results[ - "Expected cancellations - £" - ], # GBIS postcode list ("GBIS Postcode list", "Warmfront post code list", "Total - #", "GBIS total"): gbis_total, ("GBIS Postcode list", "Warmfront post code list", "Total - £", "GBIS total"): gbis_total_revenue, ("GBIS Postcode list", "Warmfront post code list", "Sold - £", "GBIS total"): gbis_actually_sold, + ("GBIS Postcode list", "", "Confirmed cancellations - £", ""): gbis_confirmed_cancellations, + # This is for jobs that are in-progress and could still cancel + ("GBIS Postcode list", "", "Unconfirmed cancellations - £", ""): gbis_expected_cancellations, ("GBIS Postcode list", "Warmfront post code list", "Remaining - #", "GBIS total"): gbis_remaining, ("GBIS Postcode list", "Warmfront post code list", "Remaining - £", "GBIS total"): gbis_remaining_revenue, ("GBIS Postcode list", "", "Delta vs original estimate, remaining - %", ""): @@ -3440,7 +3470,7 @@ def forecast_remaining_sales(loader): } # Make sure nothing is forgotten due to duplicate multi-index keys - if len(to_append) != 41: + if len(to_append) != 45: raise ValueError("Something went wrong") results.append(to_append) From e2055b3b7dde7a1b001a568c23bb3016fbfa4079 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 6 Mar 2024 19:34:43 +0000 Subject: [PATCH 075/248] fixed variance for HA6 --- .../ha_15_32/ha_analysis_batch_3.py | 135 +++++++++++++++++- 1 file changed, 129 insertions(+), 6 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 09b0910e..8c9f59c2 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -730,6 +730,81 @@ class DataLoader: "Post Code" ] = "ST5 7BY" + # PERFORM ADDITIONAL DROPS + # Dropping rows based on multiple conditions + conditions_to_drop = [ + (survey_list['Street / Block Name'] == "Bedford Crescent") & (survey_list['Post Code'] == "ST5 3EH") & ( + survey_list['NO.'] == 23) & (survey_list['INSTALLED OR CANCELLED'].str.contains("NO UPDATE YET")), + (survey_list['Street / Block Name'] == "Hereford Avenue") & (survey_list['Post Code'] == "ST5 3EJ") & ( + survey_list['NO.'] == 92) & (survey_list['INSTALLED OR CANCELLED'].str.contains("NO UPDATE YET")), + (survey_list['Street / Block Name'] == "Seabridge Lane") & (survey_list['Post Code'] == "ST5 3EX") & ( + survey_list['NO.'].isin([16, 18, 42])) & ( + survey_list['INSTALLED OR CANCELLED'].str.contains("NO UPDATE YET")), + (survey_list['Street / Block Name'] == "ESKDALE PLACE") & (survey_list['Post Code'] == "ST5 3QW") & ( + survey_list['NO.'] == 5) & (survey_list['SUBMISSION DATE'].astype(str) == "2023-03-06 00:00:00"), + (survey_list['Street / Block Name'] == "Birch House road") & (survey_list['Post Code'] == "ST6 2LS") & ( + survey_list['NO.'].isin([56, 58])), + (survey_list['Street / Block Name'] == "Blackthorn Place") & (survey_list['Post Code'] == "ST6 2LS") & ( + survey_list['NO.'].isin([37, 39])), + (survey_list['Street / Block Name'] == "Whitethorn Way") & (survey_list['Post Code'] == "ST5 7BT") & ( + survey_list['NO.'].isin([17, 6])), + (survey_list['Street / Block Name'] == "Lion Grove") & (survey_list['Post Code'] == "ST5 7HQ") & ( + survey_list['NO.'].isin([10, 12])) & ( + survey_list['INSTALLED OR CANCELLED'].str.contains("NO UPDATE YET")), + (survey_list['Street / Block Name'] == "DENRY CRESCENT") & (survey_list['Post Code'] == "ST5 8JW") & ( + survey_list['NO.'] == 87) & (survey_list['INSTALLED OR CANCELLED'].str.contains("NO UPDATE YET")), + (survey_list['Street / Block Name'] == "HOLLINS CRESCENT") & (survey_list['Post Code'] == "ST7 1JW") & ( + survey_list['NO.'] == 19) + ] + + # Combine all conditions with an OR "|" + combined_condition = np.logical_or.reduce(conditions_to_drop) + + # Drop rows that meet the combined condition + survey_list = survey_list[~combined_condition] + + # Making replacements using np.where + survey_list['Post Code'] = np.where( + (survey_list['Street / Block Name'] == "Whitethorn Way") & (survey_list['Post Code'] == "ST5 3EH") & ( + survey_list['NO.'] == 17), + "ST5 7BT", + survey_list['Post Code'] + ) + + survey_list['Post Code'] = np.where( + (survey_list['Street / Block Name'] == "Whitethorn Way") & (survey_list['Post Code'] == "ST5 3ED") & ( + survey_list['NO.'] == 6), + "ST5 7BT", + survey_list['Post Code'] + ) + + # Maple avenue (stoke on trent, not newcastle) should be st7 1jw + survey_list["Post Code"] = np.where( + (survey_list["Street / Block Name"].str.lower().str.contains("maple avenue")) & ( + survey_list["Post Code"].str.lower() == "st7 1jx" + ), + "st7 1jw", + survey_list["Post Code"] + ) + + # Hollins Crescent should be st7 1jx + survey_list["Post Code"] = np.where( + (survey_list["Street / Block Name"].str.lower().str.contains("hollins crescent")) & ( + survey_list["Post Code"].str.lower() == "st7 1jw" + ), + "st7 1jx", + survey_list["Post Code"] + ) + + # Additional drops as the above misses some: + survey_list = survey_list[ + ~((survey_list["NO."].astype(str).isin(["18", "42"])) & + (survey_list["Street / Block Name"] == "Seabridge Lane") & + (survey_list["Post Code"] == "ST5 3EY") & + (survey_list["SUBMISSION DATE"].astype(str) == "24.07.2023") & + (survey_list["INSTALLED OR CANCELLED"].str.contains("NO UPDATE YET"))) + ] + return survey_list @staticmethod @@ -1176,6 +1251,11 @@ class DataLoader: if matching_lookup.shape[0] != survey_list.shape[0]: raise ValueError("Mismatch in the number of survey rows and matching lookup rows") + matching_lookup = matching_lookup[~pd.isnull(matching_lookup["asset_list_row_id"])] + + if matching_lookup["asset_list_row_id"].duplicated().sum(): + raise ValueError("Duplicated matches in survey list") + # Merge onto the survey list survey_list = survey_list.merge(matching_lookup, how='left', on="survey_list_row_id") @@ -1483,7 +1563,7 @@ class DataLoader: # TODO: We might have more indications of partial cancellations survey_list["installation_status"] = np.where( survey_list["installed_or_cancelled_clean"].isin(["loft cancelled"]), - "partially cancelled", + "cancelled", survey_list["installation_status"] ) else: @@ -3174,6 +3254,8 @@ def forecast_remaining_sales(loader): if survey_list.empty: asset_list_remaining = asset_list.copy() else: + # For HA6, there are a small number of postcodes that do not match to any item in the asset list + survey_list = survey_list[~pd.isnull(survey_list["asset_list_row_id"])] asset_list_remaining = asset_list.merge( survey_list[["asset_list_row_id", "installation_status"]], how="left", @@ -3183,6 +3265,47 @@ def forecast_remaining_sales(loader): asset_list_remaining = asset_list_remaining[pd.isnull(asset_list_remaining["installation_status"])] asset_list_remaining = asset_list_remaining.drop(columns=["installation_status"]) + # # TODO: TEMP + # n_pre_ciga = asset_list[ + # asset_list["ECO Eligibility"].isin( + # [ + # "eco4 - passed ciga", + # "eco4 (subject to ciga)", + # "failed ciga", + # "eco4" + # ] + # ) + # ].shape[0] + # + # n_pre_ciga_remaining = asset_list_remaining[ + # asset_list_remaining["ECO Eligibility"].isin( + # [ + # "eco4 - passed ciga", + # "eco4 (subject to ciga)", + # "failed ciga", + # "eco4" + # ] + # ) + # ].shape[0] + # + # compare_to_ids = asset_list_remaining["asset_list_row_id"].values + # assets_diff_ids = [x for x in asset_list["asset_list_row_id"].values if x not in compare_to_ids] + # diff = asset_list[asset_list["asset_list_row_id"].isin(assets_diff_ids)] + # + # n_sold = survey_list[survey_list["installation_status"].str.contains("ECO4")].shape[0] + # # cancellations = survey_list[] + # asset_list["ECO Eligibility"].value_counts() + # + # # Revenenue + # pre_ciga_revenue = n_pre_ciga * eco4_rate + # pre_ciga_remaining_revenue = n_pre_ciga_remaining * eco4_rate + # sold_revenue = n_sold * eco4_rate + # + # pre_ciga_revenue - (pre_ciga_remaining_revenue + sold_revenue) + # # MISSING 1 SALE from sold + # cancelled = survey_list[survey_list["installation_status"].str.contains("ECO4")].shape[0] + # # TODO: END TEMP + eligiblity_counts = pd.DataFrame(asset_list["ECO Eligibility"].value_counts()).reset_index() eligiblity_counts_remaining = pd.DataFrame(asset_list_remaining["ECO Eligibility"].value_counts()).reset_index() @@ -3402,13 +3525,13 @@ def forecast_remaining_sales(loader): ("", "Warmfront post code list", "Total #", "ECO4 total (pre-ciga)"): eco4_pre_ciga, ("ECO4 pre-ciga", "", "Remaining - #", ""): eco4_pre_ciga_remaining, ("ECO4 pre-ciga", "", "Total - £", ""): eco4_pre_ciga_revenue, + ("ECO4 pre-ciga", "", "Remaining - £", ""): eco4_pre_ciga_remaining_revenue, ("ECO4 pre-ciga", "", "VARIANCE - TOTAL", ""): variance_total, ("ECO4 pre-ciga", "", "VARIANCE - REMAINING", ""): variance_remaining, ("ECO4 pre-ciga", "", "Sold - £", ""): eco4_actually_sold, - ("ECO4 pre-ciga", "", "Confirmed cancellations - £", ""): eco4_confirmed_cancellations, + ("ECO4 pre-ciga", "", "Confirmed cancellations - £", ""): eco4_confirmed_cancellations * eco4_rate, # This is for jobs that are in-progress and could still cancel - ("ECO4 pre-ciga", "", "Unconfirmed cancellations - £", ""): eco4_expected_cancellations, - ("ECO4 pre-ciga", "", "Remaining - £", ""): eco4_pre_ciga_remaining_revenue, + ("ECO4 pre-ciga", "", "Unconfirmed cancellations - £", ""): eco4_expected_cancellations * eco4_rate, # ECO4 - asset list, post ciga, total ("ECO4 post-ciga", "", "Estimated total eligible - #", "ECO4 total"): eco4_post_ciga_total_results[ @@ -3460,9 +3583,9 @@ def forecast_remaining_sales(loader): ("GBIS Postcode list", "Warmfront post code list", "Total - #", "GBIS total"): gbis_total, ("GBIS Postcode list", "Warmfront post code list", "Total - £", "GBIS total"): gbis_total_revenue, ("GBIS Postcode list", "Warmfront post code list", "Sold - £", "GBIS total"): gbis_actually_sold, - ("GBIS Postcode list", "", "Confirmed cancellations - £", ""): gbis_confirmed_cancellations, + ("GBIS Postcode list", "", "Confirmed cancellations - £", ""): gbis_confirmed_cancellations * gbis_rate, # This is for jobs that are in-progress and could still cancel - ("GBIS Postcode list", "", "Unconfirmed cancellations - £", ""): gbis_expected_cancellations, + ("GBIS Postcode list", "", "Unconfirmed cancellations - £", ""): gbis_expected_cancellations * gbis_rate, ("GBIS Postcode list", "Warmfront post code list", "Remaining - #", "GBIS total"): gbis_remaining, ("GBIS Postcode list", "Warmfront post code list", "Remaining - £", "GBIS total"): gbis_remaining_revenue, ("GBIS Postcode list", "", "Delta vs original estimate, remaining - %", ""): From 21082d8d3779a75cae422becf1a6e589ebcbaba6 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 6 Mar 2024 19:46:28 +0000 Subject: [PATCH 076/248] fixed duplication variance for HA16 --- .../ha_15_32/ha_analysis_batch_3.py | 20 ++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 8c9f59c2..7859d6d2 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -960,6 +960,21 @@ class DataLoader: survey_list["NO."] ) + # Delete some duplicated entries + survey_list = survey_list[ + ~((survey_list["Street / Block Name"] == "york road") & + (survey_list["NO."].astype(str) == "12") & + (survey_list["Post Code"] == "M44 5HU") & + (survey_list["SUBMISSION DATE"].astype(str) == "45229")) + ] + + survey_list = survey_list[ + ~((survey_list["Street / Block Name"] == "peatfield avenue") & + (survey_list["NO."].astype(str) == "23") & + (survey_list["Post Code"] == "M27 9XG") & + (survey_list["SUBMISSION DATE"].astype(str) == "45236")) + ] + return survey_list @staticmethod @@ -3265,7 +3280,7 @@ def forecast_remaining_sales(loader): asset_list_remaining = asset_list_remaining[pd.isnull(asset_list_remaining["installation_status"])] asset_list_remaining = asset_list_remaining.drop(columns=["installation_status"]) - # # TODO: TEMP + # TODO: TEMP # n_pre_ciga = asset_list[ # asset_list["ECO Eligibility"].isin( # [ @@ -3304,6 +3319,9 @@ def forecast_remaining_sales(loader): # pre_ciga_revenue - (pre_ciga_remaining_revenue + sold_revenue) # # MISSING 1 SALE from sold # cancelled = survey_list[survey_list["installation_status"].str.contains("ECO4")].shape[0] + # dupes = survey_list[survey_list["asset_list_row_id"].duplicated()]["asset_list_row_id"].values + # z = survey_list[survey_list["asset_list_row_id"].isin(dupes)] + # z[['NO.', 'Street / Block Name', 'Post Code', 'INSTALLED OR CANCELLED', 'SUBMISSION DATE']] # # TODO: END TEMP eligiblity_counts = pd.DataFrame(asset_list["ECO Eligibility"].value_counts()).reset_index() From af13467c2c4c9b7fc98e5be1e343399f57c062fb Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 6 Mar 2024 20:04:37 +0000 Subject: [PATCH 077/248] Added gbis variance checks --- .../ha_15_32/ha_analysis_batch_3.py | 83 ++++++++----------- 1 file changed, 36 insertions(+), 47 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 7859d6d2..553f6271 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -3237,6 +3237,7 @@ def forecast_remaining_sales(loader): results = [] for ha_name, input_data in loader.data.items(): + # Original warmfront figures - ECO4 original_warmfront_estimates = december_figures[december_figures["HA Name"] == ha_name] @@ -3280,50 +3281,6 @@ def forecast_remaining_sales(loader): asset_list_remaining = asset_list_remaining[pd.isnull(asset_list_remaining["installation_status"])] asset_list_remaining = asset_list_remaining.drop(columns=["installation_status"]) - # TODO: TEMP - # n_pre_ciga = asset_list[ - # asset_list["ECO Eligibility"].isin( - # [ - # "eco4 - passed ciga", - # "eco4 (subject to ciga)", - # "failed ciga", - # "eco4" - # ] - # ) - # ].shape[0] - # - # n_pre_ciga_remaining = asset_list_remaining[ - # asset_list_remaining["ECO Eligibility"].isin( - # [ - # "eco4 - passed ciga", - # "eco4 (subject to ciga)", - # "failed ciga", - # "eco4" - # ] - # ) - # ].shape[0] - # - # compare_to_ids = asset_list_remaining["asset_list_row_id"].values - # assets_diff_ids = [x for x in asset_list["asset_list_row_id"].values if x not in compare_to_ids] - # diff = asset_list[asset_list["asset_list_row_id"].isin(assets_diff_ids)] - # - # n_sold = survey_list[survey_list["installation_status"].str.contains("ECO4")].shape[0] - # # cancellations = survey_list[] - # asset_list["ECO Eligibility"].value_counts() - # - # # Revenenue - # pre_ciga_revenue = n_pre_ciga * eco4_rate - # pre_ciga_remaining_revenue = n_pre_ciga_remaining * eco4_rate - # sold_revenue = n_sold * eco4_rate - # - # pre_ciga_revenue - (pre_ciga_remaining_revenue + sold_revenue) - # # MISSING 1 SALE from sold - # cancelled = survey_list[survey_list["installation_status"].str.contains("ECO4")].shape[0] - # dupes = survey_list[survey_list["asset_list_row_id"].duplicated()]["asset_list_row_id"].values - # z = survey_list[survey_list["asset_list_row_id"].isin(dupes)] - # z[['NO.', 'Street / Block Name', 'Post Code', 'INSTALLED OR CANCELLED', 'SUBMISSION DATE']] - # # TODO: END TEMP - eligiblity_counts = pd.DataFrame(asset_list["ECO Eligibility"].value_counts()).reset_index() eligiblity_counts_remaining = pd.DataFrame(asset_list_remaining["ECO Eligibility"].value_counts()).reset_index() @@ -3525,6 +3482,35 @@ def forecast_remaining_sales(loader): if variance_remaining != 0: raise ValueError("Something went wrong in variance remaining") + # We also check variances to make sure that the pre-CIGA ECO4 total equals + # 1) Pre CIGA remaining + + # 2) ECO4 sold + + # 3) ECO4 confirmed cancellations + + # 4) ECO4 unconfirmed cancellations + + pre_ciga_eco4_variance = ( + eco4_pre_ciga_revenue - + eco4_pre_ciga_remaining_revenue - + eco4_actually_sold - + eco4_confirmed_cancellations * eco4_rate - + eco4_expected_cancellations * eco4_rate + ) + + if pre_ciga_eco4_variance != 0: + raise ValueError("Something went wrong in pre_ciga_eco4_variance") + + # Check GBIS total variance + gbis_variance = ( + gbis_total_revenue - + gbis_actually_sold - + gbis_confirmed_cancellations * gbis_rate - + gbis_expected_cancellations * gbis_rate - + gbis_remaining_revenue + ) + + if gbis_variance != 0: + raise ValueError("Something went wrong in gbis_variance") + to_append = { ("", "", "", "HA Name"): ha_name, # ECO4 - original warmfront figures @@ -3544,8 +3530,10 @@ def forecast_remaining_sales(loader): ("ECO4 pre-ciga", "", "Remaining - #", ""): eco4_pre_ciga_remaining, ("ECO4 pre-ciga", "", "Total - £", ""): eco4_pre_ciga_revenue, ("ECO4 pre-ciga", "", "Remaining - £", ""): eco4_pre_ciga_remaining_revenue, - ("ECO4 pre-ciga", "", "VARIANCE - TOTAL", ""): variance_total, - ("ECO4 pre-ciga", "", "VARIANCE - REMAINING", ""): variance_remaining, + ("ECO4 pre-ciga", "", "VARIANCE - PRE-CIGA ECO4 TOTAL", ""): pre_ciga_eco4_variance, + ("ECO4 pre-ciga", "", "VARIANCE - PRE-CIGA ECO4 TOTAL VS ELIGIBLE & INELIGIBLE", ""): variance_total, + ("ECO4 pre-ciga", "", "VARIANCE - PRE-CIGA ECO4 REMAINING VS ELIGIBLE & INELIGIBLE", ""): + variance_remaining, ("ECO4 pre-ciga", "", "Sold - £", ""): eco4_actually_sold, ("ECO4 pre-ciga", "", "Confirmed cancellations - £", ""): eco4_confirmed_cancellations * eco4_rate, # This is for jobs that are in-progress and could still cancel @@ -3600,6 +3588,7 @@ def forecast_remaining_sales(loader): # GBIS postcode list ("GBIS Postcode list", "Warmfront post code list", "Total - #", "GBIS total"): gbis_total, ("GBIS Postcode list", "Warmfront post code list", "Total - £", "GBIS total"): gbis_total_revenue, + ("GBIS Postcode list", "Warmfront post code list", "GBIS VARIANCE", "GBIS total"): gbis_variance, ("GBIS Postcode list", "Warmfront post code list", "Sold - £", "GBIS total"): gbis_actually_sold, ("GBIS Postcode list", "", "Confirmed cancellations - £", ""): gbis_confirmed_cancellations * gbis_rate, # This is for jobs that are in-progress and could still cancel @@ -3611,7 +3600,7 @@ def forecast_remaining_sales(loader): } # Make sure nothing is forgotten due to duplicate multi-index keys - if len(to_append) != 45: + if len(to_append) != 47: raise ValueError("Something went wrong") results.append(to_append) From 8dcb6a9be0f903fc06e4c9dcb3218bb1d6db949e Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 6 Mar 2024 21:11:17 +0000 Subject: [PATCH 078/248] 11% through matching ha38 --- etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 553f6271..6998eb4b 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -1054,6 +1054,17 @@ class DataLoader: 'Kingsford Court, Coombe Valley Road', 'Kingsford Court' ) + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + 'LESLIE TEW COURT/DERWENT ROAD', 'LESLIE TEW COURT' + ) + + # There is no 18A LESLIE TEW COURT in the asset list + survey_list = survey_list[ + ~((survey_list["Street / Block Name"] == "LESLIE TEW COURT") & + (survey_list["Post Code"] == "TN10 3TX") & + (survey_list["NO."] == "18A")) + ] + return survey_list @staticmethod @@ -3848,12 +3859,10 @@ def app(): # Grab the December HA figures filepath december_figures_filepath = "local_data/ha_data/HA_December_figures.csv" - # priority_has = [ - # "HA1", "HA6", "HA7", "HA14", "HA15", "HA16", "HA24", "HA25", "HA28", "HA32", "HA38", "HA39", "HA107", - # ] + # Add in: "HA25" # TODO: Remove ECO3 sales from HA25 priority_has = [ - "HA1", "HA6", "HA7", "HA14", "HA15", "HA16", "HA24", "HA28", "HA32", "HA39", "HA107", + "HA1", "HA6", "HA7", "HA14", "HA15", "HA16", "HA24", "HA28", "HA32", "HA38", "HA39", "HA107", ] # Next HAs to do: 15[DONE], 32 [DONE], 33 [Input format is 4 parts and no eco4 jobs identified - come back on this], # Then: 28 [DONE], From 17b5f6e140a90d261b790fee1a4a28f43d1e3a62 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 6 Mar 2024 21:42:17 +0000 Subject: [PATCH 079/248] ha38 23% merged --- .../ha_15_32/ha_analysis_batch_3.py | 50 ++++++++++++++----- 1 file changed, 38 insertions(+), 12 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 6998eb4b..ff39b190 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -1065,6 +1065,24 @@ class DataLoader: (survey_list["NO."] == "18A")) ] + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + 'Brindley House, Wellbeck Road', 'Brindley House' + ) + + # Try taking just the first part of the string, splitting on a / + survey_list['Street / Block Name'] = survey_list['Street / Block Name'].str.split('/').str[0].str.strip() + + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + 'HUNTSMAN WAY', 'HUNTSMANS WAY' + ) + + # Try taking just the first part of the string, splitting on a , + survey_list['Street / Block Name'] = survey_list['Street / Block Name'].str.split(',').str[0].str.strip() + + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "McCLAREN COURT", "MCLAREN COURT" + ) + return survey_list @staticmethod @@ -1228,6 +1246,10 @@ class DataLoader: if "flat" in str(house_number): house_number = house_number.split("flat")[1].strip() + # We check if we had an instance of flat x, y + if "," in str(house_number): + house_number = house_number.split(",")[0].strip() + df = df[df["matching_address"].str.contains(str(house_number))] if df.empty: @@ -1251,19 +1273,23 @@ class DataLoader: df = df[df["HouseNo"].astype(str) == str(house_number)] if df.shape[0] != 1: df = df[df["matching_postcode"].str.lower().str.contains(row["Post Code"].lower().strip())] - - full_key = str(row["NO."]).lower().strip() + row["Street / Block Name"].lower().strip() + row[ - "Town/Area"].lower().strip() + row["Post Code"].lower().strip() - # Remove any spaces from the full key - full_key = full_key.replace(" ", "") - - df = self.levenstein_match(full_key, df) - if df.shape[0] != 1: - print(row["Street / Block Name"]) - print(house_number) - print(row["Post Code"]) - raise ValueError("Investigate") + if "Town/Area" not in row.keys(): + full_key = (str(row["NO."]).lower().strip() + row["Street / Block Name"].lower().strip() + + row["Post Code"].lower().strip()) + else: + full_key = str(row["NO."]).lower().strip() + row["Street / Block Name"].lower().strip() + \ + row["Town/Area"].lower().strip() + row["Post Code"].lower().strip() + # Remove any spaces from the full key + full_key = full_key.replace(" ", "") + + df = self.levenstein_match(full_key, df) + + if df.shape[0] != 1: + print(row["Street / Block Name"]) + print(house_number) + print(row["Post Code"]) + raise ValueError("Investigate") matching_lookup.append( { From 8e258ff3ca164e2eddcd9cc74d1e7531bf655e4f Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 6 Mar 2024 22:29:18 +0000 Subject: [PATCH 080/248] 44% through matching --- .../ha_15_32/ha_analysis_batch_3.py | 70 ++++++++++++++++++- 1 file changed, 69 insertions(+), 1 deletion(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index ff39b190..567394a4 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -1083,6 +1083,70 @@ class DataLoader: "McCLAREN COURT", "MCLAREN COURT" ) + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "ST JAMES CLOISTERS", "ST. JAMES'S CLOISTERS" + ) + + survey_list["Street / Block Name"] = np.where( + ((survey_list["NO."].isin( + [ + "FLAT 1 22", + "FLAT 2 22", + "FLAT 3 22", + "FLAT 4 22", + "FLAT 5 22", + "FLAT 6 22", + ] + )) & + (survey_list["Street / Block Name"] == "MELTON ROAD")), + "22 MELTON ROAD", + survey_list["Street / Block Name"] + ) + + survey_list["Street / Block Name"] = np.where( + ((survey_list["NO."].isin( + [ + "FLAT 1 24", + "FLAT 2 24", + "FLAT 3 24", + "FLAT 4 24", + "FLAT 5 24", + "FLAT 6 24", + ] + )) & + (survey_list["Street / Block Name"] == "MELTON ROAD")), + "24 MELTON ROAD", + survey_list["Street / Block Name"] + ) + + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "TURRETT GREEN COURT SILENT STREET", "TURRET GREEN COURT" + ) + + # Turret green court flat 1 doesn't exist in the asset list + survey_list = survey_list[ + ~((survey_list["Street / Block Name"] == "TURRET GREEN COURT") & + (survey_list["NO."] == 1)) + ] + # 3, 45 raywell steet doesn't exist in the asset list + survey_list = survey_list[ + ~((survey_list["Street / Block Name"] == "45 RAYWELL STREET") & + (survey_list["NO."] == 3)) + ] + + # 40 Avondale drive doesn't exist in the asset list + survey_list = survey_list[ + ~((survey_list["Street / Block Name"] == "Avondale Drive") & + (survey_list["NO."] == 40)) + ] + # 17A beech road has the wrong postcode + survey_list["Post Code"] = np.where( + (survey_list["Street / Block Name"] == "BEECH ROAD") & + (survey_list["Post Code"] == "DH6 1JD"), + "DH6 1JB", + survey_list["Post Code"] + ) + return survey_list @staticmethod @@ -1250,6 +1314,10 @@ class DataLoader: if "," in str(house_number): house_number = house_number.split(",")[0].strip() + # We may also have a space for an instance of flat x y + if " " in str(house_number): + house_number = house_number.split(" ")[0].strip() + df = df[df["matching_address"].str.contains(str(house_number))] if df.empty: @@ -1270,7 +1338,7 @@ class DataLoader: raise ValueError("Investigate") if df.shape[0] != 1: - df = df[df["HouseNo"].astype(str) == str(house_number)] + df = df[df["HouseNo"].astype(str).str.lower() == str(house_number)] if df.shape[0] != 1: df = df[df["matching_postcode"].str.lower().str.contains(row["Post Code"].lower().strip())] if df.shape[0] != 1: From 067a66c1b172b63abc419a112525382ce7c2baa3 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 6 Mar 2024 22:45:22 +0000 Subject: [PATCH 081/248] ha38 wip - leaving for now --- .../ha_15_32/ha_analysis_batch_3.py | 54 ++++++++++++++++++- 1 file changed, 53 insertions(+), 1 deletion(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 567394a4..c4f6307c 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -599,7 +599,52 @@ class DataLoader: asset_list['ExtractedHouseNo'] = asset_list['matching_address'].apply(extract_house_no_if_flat) asset_list.loc[asset_list['ExtractedHouseNo'].notnull(), 'HouseNo'] = asset_list['ExtractedHouseNo'] asset_list['matching_address'] = asset_list['matching_address'].apply(rearrange_address_if_flat) - # We then need to + + # We update a few specific rows + asset_list["HouseNo"] = np.where( + (asset_list["Address_Line_1"].isin( + [ + "10 SOUTH VIEW/ROOM A1", + "10 SOUTH VIEW/ROOM A2", + "10 SOUTH VIEW/ROOM A3", + ] + )), + "10A", + asset_list["HouseNo"] + ) + + asset_list["matching_address"] = np.where( + (asset_list["Address_Line_1"].isin( + [ + "10 SOUTH VIEW/ROOM A1", + ] + )), + "10a, 10 south view/room a1, spennymoor, co. durham, dl16 7df'", + asset_list["matching_address"] + ) + + asset_list["HouseNo"] = np.where( + (asset_list["Address_Line_1"].isin( + [ + "10 SOUTH VIEW/ROOM B1", + "10 SOUTH VIEW/ROOM B2", + "10 SOUTH VIEW/ROOM B3", + "10 SOUTH VIEW/ROOM B4", + ] + )), + "10B", + asset_list["HouseNo"] + ) + + asset_list["matching_address"] = np.where( + (asset_list["Address_Line_1"].isin( + [ + "10 SOUTH VIEW/ROOM B1", + ] + )), + "10b, 10 south view/room b1, spennymoor, co. durham, dl16 7df", + asset_list["matching_address"] + ) return asset_list @@ -1147,6 +1192,13 @@ class DataLoader: survey_list["Post Code"] ) + survey_list["Street / Block Name"] = np.where( + (survey_list["Street / Block Name"] == "SOUTHVIEW") & + (survey_list["Post Code"] == "DL16 7DF"), + "SOUTH VIEW", + survey_list["Street / Block Name"] + ) + return survey_list @staticmethod From 5c3f6320dd6bfc2ddaac4fefb8786646c50e7945 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 7 Mar 2024 10:42:51 +0000 Subject: [PATCH 082/248] 29% through matching eco3 ha25 --- .../ha_15_32/ha_analysis_batch_3.py | 136 +++++++++++++++--- 1 file changed, 117 insertions(+), 19 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index c4f6307c..3ea9649e 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -183,7 +183,7 @@ class DataLoader: def create_asset_list_matching_address(self, ha_name, asset_list): - if ha_name in ["HA1", "HA6", "HA16", "HA24", "HA25"]: + if ha_name in ["HA1", "HA6", "HA16", "HA24"]: asset_list["matching_address"] = asset_list[ self.COLUMN_CONFIG[ha_name]["address"] ].astype(str).str.lower().str.strip() @@ -214,6 +214,14 @@ class DataLoader: asset_list["Postcode"].astype(str).str.lower().str.strip() ) asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip() + elif ha_name == "HA25": + asset_list["matching_address"] = asset_list[ + self.COLUMN_CONFIG[ha_name]["address"] + ].astype(str).str.lower().str.strip() + + asset_list["matching_postcode"] = asset_list['matching_address'].apply( + lambda x: ' '.join(x.split()[-2:]) if pd.notnull(x) else x + ) elif ha_name == "HA28": asset_list["matching_address"] = ( asset_list["House Number"].astype(str).str.lower().str.strip() + ", " + @@ -352,6 +360,9 @@ class DataLoader: house_numbers = house_numbers.iloc[:, 0:1] house_numbers.columns = ['HouseNo'] + # Remove trailing punctuation such as , or ; + house_numbers["HouseNo"] = house_numbers["HouseNo"].str.rstrip(',;') + asset_list = pd.concat([asset_list, house_numbers[["HouseNo"]]], axis=1) return asset_list @@ -425,27 +436,16 @@ class DataLoader: workbook = openpyxl.load_workbook(filepath) asset_sheetname = self.get_asset_sheetname(workbook) - # TODO: TEMP - sheetnames_lower = [x.lower() for x in workbook.sheetnames] - if any("eco3" in x for x in sheetnames_lower): - raise Exception("REMOVE ME") - asset_sheet = workbook[asset_sheetname] asset_sheet_colnames = [cell.value for cell in asset_sheet[1]] if ha_name == "HA25": asset_sheet_colnames[11] = "matching_postcode" - values_only = not ha_name != "HA25" - rows_data = [] - if not values_only: - for row in asset_sheet.iter_rows(min_row=2, values_only=values_only): - row_data = [cell.value for cell in row] # This will get you the cell values - rows_data.append(row_data) - else: - for row in asset_sheet.iter_rows(min_row=2, values_only=values_only): # use values_only=True to get values - row_data = list(row) # No need for comprehension, values_only=True returns a tuple of values - rows_data.append(row_data) + + for row in asset_sheet.iter_rows(min_row=2, values_only=False): + row_data = [cell.value for cell in row] # This will get you the cell values + rows_data.append(row_data) asset_list = pd.DataFrame(rows_data, columns=asset_sheet_colnames) @@ -477,6 +477,29 @@ class DataLoader: if ha_name in ["HA1", "HA25"]: return asset_list, pd.DataFrame(), pd.DataFrame() + # If we have ECO3 surveys, we need to match them, because any properties treated under ECO3 won't be + # suitable under ECO4, since their walls will be filled + eco3_list = pd.DataFrame() + sheetnames_lower = [x.lower() for x in workbook.sheetnames] + eco3_sheetname_index = [i for i, x in enumerate(sheetnames_lower) if "eco3" in x.replace(" ", "")] + if eco3_sheetname_index: + eco3_sheetname = workbook.sheetnames[eco3_sheetname_index[0]] + eco3_sheet = workbook[eco3_sheetname] + eco3_rows = [] + for row in eco3_sheet.iter_rows(min_row=2, values_only=False): # Assuming the first row is headers + row_data = [cell.value for cell in row] # This will get you the cell values + eco3_rows.append(row_data) + + eco3_list = pd.DataFrame(eco3_rows, columns=[cell.value for cell in eco3_sheet[1]]) + # Remove columns that are None + eco3_list = eco3_list.loc[:, eco3_list.columns.notnull()] + # Remove rows that are completely empty + eco3_list = eco3_list.loc[eco3_list.loc[:, eco3_list.columns].notnull().any(axis=1)] + eco3_list["eco3_list_row_id"] = [ha_name + "_Eco3_" + str(i) for i in range(0, len(eco3_list))] + + # Perform the eco3 merge + eco3_list = self.merge_eco3_to_assets(asset_list, eco3_list, ha_name) + # We check if there is a survey list survey_sheetname = self.get_survey_sheetname(workbook) survey_sheet = workbook[survey_sheetname] @@ -518,7 +541,7 @@ class DataLoader: ciga_list = self.dedupe_ciga_list(ciga_list) ciga_list = self.merge_ciga_to_assets(asset_list, ciga_list, ha_name) - return asset_list, survey_list, ciga_list + return asset_list, survey_list, ciga_list, eco3_list @staticmethod def correct_ha6_asset_list(asset_list): @@ -1433,6 +1456,79 @@ class DataLoader: return survey_list + def merge_eco3_to_assets(self, asset_list, eco3_list, ha_name): + + # We add on a matching postcode without spaces for this + # asset_list["matching_postcode_no_space"] = asset_list["matching_postcode"].str.lower().str.replace(" ", "") + + # May need an eco3 list correction function + + # NEADS DRIVE, postcode with bs305dt, is not found in the asset list + eco3_list = eco3_list[ + ~(eco3_list["Post Code"] == "BS305DT") + ] + # Drop rows with missings postcode + eco3_list = eco3_list[ + ~pd.isnull(eco3_list["Post Code"]) + ] + + missed_postcodes = [] + if ha_name == "HA25": + missed_postcodes = { + postcode.lower() for postcode in eco3_list["Post Code"] if + postcode.lower() not in asset_list["matching_postcode"].values + } + eco3_list = eco3_list[~eco3_list["Post Code"].str.lower().isin(missed_postcodes)] + + matching_lookup = [] + missed = [] + for _, row in tqdm(eco3_list.iterrows(), total=len(eco3_list)): + + postcode = row["Post Code"].lower().strip() + + # df will never be empty, since we've already done a check for common postcodes + df = asset_list[ + asset_list["matching_postcode"].str.contains(postcode) + ] + + house_number = row["NO "] + if isinstance(house_number, str): + house_number = house_number.lower().strip() + + if not any(df["matching_address"].str.contains(str(house_number))): + if "flat" in str(house_number): + house_number = house_number.split("flat")[1].strip() + + # We check if we had an instance of flat x, y + if "," in str(house_number): + house_number = house_number.split(",")[0].strip() + + # We may also have a space for an instance of flat x y + if " " in str(house_number): + house_number = house_number.split(" ")[0].strip() + + df = df[df["matching_address"].str.contains(str(house_number))] + + if df.empty: + missed.append(row["eco3_list_row_id"]) + continue + + if df.shape[0] != 1: + df = df[df["HouseNo"].astype(str).str.lower() == str(house_number)] + + if df.shape[0] != 1: + print(row["Street / Block Name"]) + print(house_number) + print(row["Post Code"]) + raise ValueError("Investigate") + + matching_lookup.append( + { + "eco3_list_row_id": row["eco3_list_row_id"], + "asset_list_row_id": df["asset_list_row_id"].values[0], + } + ) + @staticmethod def extract_streetname(address, house_number=None, postcode=None): """ @@ -4008,11 +4104,13 @@ def app(): # Add in: "HA25" # TODO: Remove ECO3 sales from HA25 priority_has = [ - "HA1", "HA6", "HA7", "HA14", "HA15", "HA16", "HA24", "HA28", "HA32", "HA38", "HA39", "HA107", + "HA1", "HA6", "HA7", "HA14", "HA15", "HA16", "HA20", "HA24", "HA25", "HA28", "HA32", "HA39", "HA107", ] # Next HAs to do: 15[DONE], 32 [DONE], 33 [Input format is 4 parts and no eco4 jobs identified - come back on this], # Then: 28 [DONE], - # 38, 41, 10, 14, 20, 48 + # 41, 10, 14 [DONE], 20, 48, 50 + # 38[problematic, but no ECO4] + # TODO - do 50 and 25 next # Filter down the directories to only the priority HAs directories = [d for d in directories if d.split("/")[2] in priority_has] From ef77db10373c653e28c82265460ce9fd3bf3f3bf Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 7 Mar 2024 10:56:27 +0000 Subject: [PATCH 083/248] HA25 eco3 matching 91% complete --- etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 3ea9649e..ea5b0456 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -1516,6 +1516,15 @@ class DataLoader: if df.shape[0] != 1: df = df[df["HouseNo"].astype(str).str.lower() == str(house_number)] + if df.empty: + missed.append(row["eco3_list_row_id"]) + continue + + if df.shape[0] != 1: + # Perform a search on streetname + street_name_section1 = row["Street / Block Name"].lower().split("/")[0].split(",")[0] + df = df[df["matching_address"].str.contains(street_name_section1)] + if df.shape[0] != 1: print(row["Street / Block Name"]) print(house_number) From 022244377d36557f83081e505b8068ab2bd98004 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 7 Mar 2024 12:26:16 +0000 Subject: [PATCH 084/248] working on fixing missed matched in eco3 matching --- .../ha_15_32/ha_analysis_batch_3.py | 84 +++++++++++++++---- 1 file changed, 66 insertions(+), 18 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index ea5b0456..a5845990 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -171,6 +171,10 @@ class DataLoader: "HA107": 51, } + UNMATCHED_ECO3 = { + "HA25": 94 + } + def __init__(self, directories, december_figures_filepath, use_cache, rebuild): self.directories = directories self.use_cache = use_cache @@ -1458,9 +1462,6 @@ class DataLoader: def merge_eco3_to_assets(self, asset_list, eco3_list, ha_name): - # We add on a matching postcode without spaces for this - # asset_list["matching_postcode_no_space"] = asset_list["matching_postcode"].str.lower().str.replace(" ", "") - # May need an eco3 list correction function # NEADS DRIVE, postcode with bs305dt, is not found in the asset list @@ -1471,8 +1472,17 @@ class DataLoader: eco3_list = eco3_list[ ~pd.isnull(eco3_list["Post Code"]) ] + # We have a bunch of genuine duplicates + eco3_list = eco3_list.drop_duplicates(["NO ", "Street / Block Name", "Post Code"]) + + eco3_list["Street / Block Name"] = eco3_list["Street / Block Name"].str.replace( + "HALWILL MEADOOW", "HALWILL MEADOW" + ) + + eco3_list["Street / Block Name"] = eco3_list["Street / Block Name"].str.replace( + "Hall Road", "Hall Rd" + ) - missed_postcodes = [] if ha_name == "HA25": missed_postcodes = { postcode.lower() for postcode in eco3_list["Post Code"] if @@ -1480,10 +1490,18 @@ class DataLoader: } eco3_list = eco3_list[~eco3_list["Post Code"].str.lower().isin(missed_postcodes)] + # For the asset list, we create a matching address without any punctuation + # TODO: We should generally just remove puncutation from addresses when matching + asset_list['matching_address_no_punctuation'] = asset_list['matching_address'].str.replace(r'[^\w\s]', '', + regex=True) + # Remove double spaces + asset_list["matching_address_no_punctuation"] = asset_list["matching_address_no_punctuation"].str.replace( + " ", " " + ) + matching_lookup = [] missed = [] for _, row in tqdm(eco3_list.iterrows(), total=len(eco3_list)): - postcode = row["Post Code"].lower().strip() # df will never be empty, since we've already done a check for common postcodes @@ -1507,24 +1525,20 @@ class DataLoader: if " " in str(house_number): house_number = house_number.split(" ")[0].strip() - df = df[df["matching_address"].str.contains(str(house_number))] + # We must do the house number filter + df = df[df["HouseNo"].astype(str).str.lower() == str(house_number)] + + # Perform a search on streetname + # We do this to prevent duplicate matches to properties with the same postcode and house number, + # but different streets + street_name_section1 = row["Street / Block Name"].lower().split("/")[0].split(",")[0] + street_name_section1 = re.sub(r'[^\w\s]', '', street_name_section1) + df = df[df["matching_address_no_punctuation"].str.contains(street_name_section1)] if df.empty: missed.append(row["eco3_list_row_id"]) continue - if df.shape[0] != 1: - df = df[df["HouseNo"].astype(str).str.lower() == str(house_number)] - - if df.empty: - missed.append(row["eco3_list_row_id"]) - continue - - if df.shape[0] != 1: - # Perform a search on streetname - street_name_section1 = row["Street / Block Name"].lower().split("/")[0].split(",")[0] - df = df[df["matching_address"].str.contains(street_name_section1)] - if df.shape[0] != 1: print(row["Street / Block Name"]) print(house_number) @@ -1538,6 +1552,40 @@ class DataLoader: } ) + # We verify the missed + # -HA25 contains 88 missed entries. These are actually 8 unique postcodes, where surveys were conducted + # on properties that had house numbers outside of the asset list + if len(missed) != self.UNMATCHED_ECO3[ha_name]: + raise ValueError( + f"Unmatched addresses for {ha_name} is not as expected, got {len(missed)} unmatched" + ) + + # TODO: 194 missed + + matching_lookup = pd.DataFrame(matching_lookup) + # Check dupes as this will cause problems later on + if matching_lookup["asset_list_row_id"].duplicated().any(): + raise ValueError("Duplicated asset list row ids") + + missed_df = eco3_list[eco3_list["eco3_list_row_id"].isin(missed)] + missed_df.head(3).tail(1)["eco3_list_row_id"] + + duped_ids = matching_lookup[matching_lookup["asset_list_row_id"].duplicated()]["asset_list_row_id"].tolist() + duped_df = matching_lookup[ + matching_lookup["asset_list_row_id"].isin(duped_ids) + ] + duped_surveys = eco3_list[ + eco3_list["eco3_list_row_id"].isin(duped_df["eco3_list_row_id"].values) + ].copy() + + duped_surveys = duped_surveys.merge(matching_lookup, how="left", on="eco3_list_row_id") + + duped_surveys[ + ["NO ", "Street / Block Name", "Post Code", "eco3_list_row_id", "asset_list_row_id"] + ].sort_values("asset_list_row_id").head() + + asset_list[asset_list["asset_list_row_id"] == "HA2515145"]["matching_address"].values + @staticmethod def extract_streetname(address, house_number=None, postcode=None): """ From b09bd63b53c8d9b14f11c1c5b7cb38b28c63afbc Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 7 Mar 2024 12:53:25 +0000 Subject: [PATCH 085/248] done with ha25 matching for now --- .../ha_15_32/ha_analysis_batch_3.py | 66 +++++++++++-------- 1 file changed, 38 insertions(+), 28 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index a5845990..f0813aef 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -172,7 +172,7 @@ class DataLoader: } UNMATCHED_ECO3 = { - "HA25": 94 + "HA25": 119 } def __init__(self, directories, december_figures_filepath, use_cache, rebuild): @@ -478,7 +478,7 @@ class DataLoader: # For HA1 and HA25, there is an exception in the structure of the data. We don't have any survey or ciga # lists, and so # we can return the asset list now - if ha_name in ["HA1", "HA25"]: + if ha_name in ["HA1"]: return asset_list, pd.DataFrame(), pd.DataFrame() # If we have ECO3 surveys, we need to match them, because any properties treated under ECO3 won't be @@ -1460,10 +1460,8 @@ class DataLoader: return survey_list - def merge_eco3_to_assets(self, asset_list, eco3_list, ha_name): - - # May need an eco3 list correction function - + @staticmethod + def correct_ha25_eco3_list(eco3_list): # NEADS DRIVE, postcode with bs305dt, is not found in the asset list eco3_list = eco3_list[ ~(eco3_list["Post Code"] == "BS305DT") @@ -1483,6 +1481,29 @@ class DataLoader: "Hall Road", "Hall Rd" ) + eco3_list["Street / Block Name"] = eco3_list["Street / Block Name"].str.replace( + "SPRINGFIELD WAY SAINT DAY", "SPRINGFIELD WAY ST DAY" + ) + eco3_list["Street / Block Name"] = eco3_list["Street / Block Name"].str.replace( + "BOND SPEAR COURT", "BOND-SPEAR COURT" + ) + eco3_list["Street / Block Name"] = eco3_list["Street / Block Name"].str.replace( + "ST.MARYS HILL", "ST MARYS HILL" + ) + # Correct the postcode for edmund road + eco3_list["Post Code"] = np.where( + (eco3_list["Street / Block Name"] == "EDMUND ROAD") & + (eco3_list["Post Code"] == "TR14 8QJ"), + "TR15 1BY", + eco3_list["Post Code"] + ) + return eco3_list + + def merge_eco3_to_assets(self, asset_list, eco3_list, ha_name): + + eco3_list_correction_function = getattr(self, f"correct_{ha_name.lower()}_eco3_list") + eco3_list = eco3_list_correction_function(eco3_list) + if ha_name == "HA25": missed_postcodes = { postcode.lower() for postcode in eco3_list["Post Code"] if @@ -1492,8 +1513,9 @@ class DataLoader: # For the asset list, we create a matching address without any punctuation # TODO: We should generally just remove puncutation from addresses when matching - asset_list['matching_address_no_punctuation'] = asset_list['matching_address'].str.replace(r'[^\w\s]', '', - regex=True) + asset_list['matching_address_no_punctuation'] = asset_list['matching_address'].str.replace( + r'[^\w\s]', '', regex=True + ) # Remove double spaces asset_list["matching_address_no_punctuation"] = asset_list["matching_address_no_punctuation"].str.replace( " ", " " @@ -1502,6 +1524,8 @@ class DataLoader: matching_lookup = [] missed = [] for _, row in tqdm(eco3_list.iterrows(), total=len(eco3_list)): + # if row["eco3_list_row_id"] == "HA25_Eco3_5422": + # raise Exception() postcode = row["Post Code"].lower().strip() # df will never be empty, since we've already done a check for common postcodes @@ -1553,38 +1577,24 @@ class DataLoader: ) # We verify the missed - # -HA25 contains 88 missed entries. These are actually 8 unique postcodes, where surveys were conducted - # on properties that had house numbers outside of the asset list + # HA25 contains 119 missed entries. These are actually 24 unique postcodes, and the majority belong to 2 + # where many surveys were conducted on house numbers, not in the asset list if len(missed) != self.UNMATCHED_ECO3[ha_name]: raise ValueError( f"Unmatched addresses for {ha_name} is not as expected, got {len(missed)} unmatched" ) - # TODO: 194 missed - matching_lookup = pd.DataFrame(matching_lookup) # Check dupes as this will cause problems later on if matching_lookup["asset_list_row_id"].duplicated().any(): raise ValueError("Duplicated asset list row ids") - missed_df = eco3_list[eco3_list["eco3_list_row_id"].isin(missed)] - missed_df.head(3).tail(1)["eco3_list_row_id"] + # Merge onto eco3 list + eco3_list = eco3_list.merge(matching_lookup, how="left", on="eco3_list_row_id") - duped_ids = matching_lookup[matching_lookup["asset_list_row_id"].duplicated()]["asset_list_row_id"].tolist() - duped_df = matching_lookup[ - matching_lookup["asset_list_row_id"].isin(duped_ids) - ] - duped_surveys = eco3_list[ - eco3_list["eco3_list_row_id"].isin(duped_df["eco3_list_row_id"].values) - ].copy() + asset_list = asset_list.drop(columns=["matching_address_no_punctuation"]) - duped_surveys = duped_surveys.merge(matching_lookup, how="left", on="eco3_list_row_id") - - duped_surveys[ - ["NO ", "Street / Block Name", "Post Code", "eco3_list_row_id", "asset_list_row_id"] - ].sort_values("asset_list_row_id").head() - - asset_list[asset_list["asset_list_row_id"] == "HA2515145"]["matching_address"].values + return eco3_list @staticmethod def extract_streetname(address, house_number=None, postcode=None): From 961b53d523bf7dc82d9e83459861cb3aa2865c93 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 7 Mar 2024 12:58:29 +0000 Subject: [PATCH 086/248] Adding return for HA25 --- etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index f0813aef..7ad50583 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -479,7 +479,7 @@ class DataLoader: # lists, and so # we can return the asset list now if ha_name in ["HA1"]: - return asset_list, pd.DataFrame(), pd.DataFrame() + return asset_list, pd.DataFrame(), pd.DataFrame(), pd.DataFrame() # If we have ECO3 surveys, we need to match them, because any properties treated under ECO3 won't be # suitable under ECO4, since their walls will be filled @@ -504,6 +504,10 @@ class DataLoader: # Perform the eco3 merge eco3_list = self.merge_eco3_to_assets(asset_list, eco3_list, ha_name) + if ha_name in ["HA25"]: + # Accomodate ha25 unique structure + return asset_list, pd.DataFrame(), pd.DataFrame(), eco3_list + # We check if there is a survey list survey_sheetname = self.get_survey_sheetname(workbook) survey_sheet = workbook[survey_sheetname] @@ -1592,7 +1596,7 @@ class DataLoader: # Merge onto eco3 list eco3_list = eco3_list.merge(matching_lookup, how="left", on="eco3_list_row_id") - asset_list = asset_list.drop(columns=["matching_address_no_punctuation"]) + asset_list.drop(columns=["matching_address_no_punctuation"], inplace=True) return eco3_list @@ -1756,7 +1760,7 @@ class DataLoader: continue # Load asset list logger.info("Loading data for {}".format(ha_name)) - asset_list, survey_list, ciga_list = self.load_asset_list( + asset_list, survey_list, ciga_list, eco3_list = self.load_asset_list( filepath=filepath, ha_name=ha_name, ) @@ -1764,7 +1768,8 @@ class DataLoader: data[ha_name] = { "asset_list": asset_list, "survey_list": survey_list, - "ciga_list": ciga_list + "ciga_list": ciga_list, + "eco3_list": eco3_list } self.data = data From 7f88f0e0f59e584d82a6799671e8f1a64a034392 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 7 Mar 2024 13:59:32 +0000 Subject: [PATCH 087/248] Added in the re-labelling of assets based on eco3 merge --- .../ha_15_32/ha_analysis_batch_3.py | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 7ad50583..21509923 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -1812,6 +1812,7 @@ class DataLoader: asset_list = data_assets["asset_list"].copy() survey_list = data_assets["survey_list"].copy() ciga_list = data_assets["ciga_list"].copy() + eco3_list = data_assets.get("eco3_list", pd.DataFrame()) asset_list_starting_size = asset_list.shape[0] @@ -1859,6 +1860,25 @@ class DataLoader: if asset_list.shape[0] != asset_list_starting_size: raise ValueError("The asset list has changed in size") + # If we have eco3 surveys, we set a property to not eligible + if not eco3_list.empty: + eco3_list_to_merge = eco3_list[["asset_list_row_id"]].copy() + eco3_list_to_merge["has_eco3"] = True + asset_list = asset_list.merge( + eco3_list_to_merge, how="left", on="asset_list_row_id" + ) + + if asset_list.shape[0] != asset_list_starting_size: + raise ValueError("The asset list has changed in size, when merging on eco3") + + # Any rows that have an eco3 survey are set to not eligible + asset_list["ECO Eligibility"] = np.where( + asset_list["has_eco3"] == True, + "not eligible", + asset_list["ECO Eligibility"] + ) + asset_list = asset_list.drop(columns=["has_eco3"]) + # Report on sales sales_report = {} if not survey_list.empty: From 9a0c6c3e8fbae7a23980aa7e75912ef6202ab29d Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 7 Mar 2024 14:18:08 +0000 Subject: [PATCH 088/248] expanded eco3 matching --- .../ha_15_32/ha_analysis_batch_3.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 21509923..06bb0d96 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -172,7 +172,7 @@ class DataLoader: } UNMATCHED_ECO3 = { - "HA25": 119 + "HA25": 154 } def __init__(self, directories, december_figures_filepath, use_cache, rebuild): @@ -1508,12 +1508,16 @@ class DataLoader: eco3_list_correction_function = getattr(self, f"correct_{ha_name.lower()}_eco3_list") eco3_list = eco3_list_correction_function(eco3_list) + asset_list["matching_postcode_nospace"] = asset_list["matching_postcode"].str.replace(" ", "").str.lower() + eco3_list["postcode_no_space"] = eco3_list["Post Code"].str.lower().str.replace(" ", "") + if ha_name == "HA25": + # 317 -> 259 missed_postcodes = { - postcode.lower() for postcode in eco3_list["Post Code"] if - postcode.lower() not in asset_list["matching_postcode"].values + postcode for postcode in eco3_list["postcode_no_space"] if + postcode not in asset_list["matching_postcode_nospace"].values } - eco3_list = eco3_list[~eco3_list["Post Code"].str.lower().isin(missed_postcodes)] + eco3_list = eco3_list[~eco3_list["postcode_no_space"].isin(missed_postcodes)] # For the asset list, we create a matching address without any punctuation # TODO: We should generally just remove puncutation from addresses when matching @@ -1530,11 +1534,11 @@ class DataLoader: for _, row in tqdm(eco3_list.iterrows(), total=len(eco3_list)): # if row["eco3_list_row_id"] == "HA25_Eco3_5422": # raise Exception() - postcode = row["Post Code"].lower().strip() + postcode = row["postcode_no_space"] # df will never be empty, since we've already done a check for common postcodes df = asset_list[ - asset_list["matching_postcode"].str.contains(postcode) + asset_list["matching_postcode_nospace"].str.contains(postcode) ] house_number = row["NO "] @@ -1588,6 +1592,8 @@ class DataLoader: f"Unmatched addresses for {ha_name} is not as expected, got {len(missed)} unmatched" ) + # 154 missed, 2827 matched for HA 25 + matching_lookup = pd.DataFrame(matching_lookup) # Check dupes as this will cause problems later on if matching_lookup["asset_list_row_id"].duplicated().any(): From 8b70fb346c0ce51acd24b245bbbecedeaa10d30c Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 7 Mar 2024 15:00:51 +0000 Subject: [PATCH 089/248] matching ha50 --- .../ha_15_32/ha_analysis_batch_3.py | 56 ++++++++++++++++--- 1 file changed, 49 insertions(+), 7 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 06bb0d96..4708bf35 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -172,7 +172,8 @@ class DataLoader: } UNMATCHED_ECO3 = { - "HA25": 154 + "HA25": 154, + "HA50": 5 } def __init__(self, directories, december_figures_filepath, use_cache, rebuild): @@ -262,6 +263,10 @@ class DataLoader: asset_list["add_5"].astype(str).str.lower().str.strip() + ", " + \ asset_list["post_code"].astype(str).str.lower().str.strip() asset_list["matching_postcode"] = asset_list["post_code"].astype(str).str.lower().str.strip() + elif ha_name == "HA50": + asset_list["matching_address"] = asset_list["Address Line 1"].astype(str).str.lower().str.strip() + ", " + \ + asset_list["Post Code"].astype(str).str.lower().str.strip() + asset_list["matching_postcode"] = asset_list["Post Code"].astype(str).str.lower().str.strip() elif ha_name == "HA107": # Create matching_address by concatenating House No, Street, Town, District, Postcode asset_list["matching_address"] = asset_list["House No"].astype(str).str.lower().str.strip() + ", " + \ @@ -433,6 +438,8 @@ class DataLoader: return "ECO Surveys" elif "ECO Survey" in workbook.sheetnames: return "ECO Survey" + elif "ECO 4 Surveys completed" in workbook.sheetnames: + return "ECO 4 Surveys completed" else: return "ECO surveys" @@ -1289,6 +1296,34 @@ class DataLoader: return survey_list + @staticmethod + def correct_ha50_survey_list(survey_list): + + survey_list["Post Code"] = np.where( + (survey_list["Street / Block Name"] == 'COSELEY STREET') & + (survey_list["Post Code"] == 'ST16 1LR'), + "ST6 1JU", + survey_list["Post Code"] + ) + + # Remove some of COSELEY STREET, as we have surveys done, outside of the asset list + survey_list = survey_list[ + ~((survey_list["Street / Block Name"] == "COSELEY STREET") & + (survey_list["Post Code"] == "ST6 1JU") & + (survey_list["NO."].isin([96]))) + ] + + survey_list["Post Code"] = survey_list["Post Code"].str.replace("ST33JZ", "ST3 3JZ") + + # Remove some of Jesmond drive as we have surveys done outside of the asset list + survey_list = survey_list[ + ~((survey_list["Street / Block Name"] == "Jesmond Drive") & + (survey_list["Post Code"] == "ST3 3JZ") & + (survey_list["NO."].isin([29]))) + ] + + return survey_list + @staticmethod def correct_ha107_survey_list(survey_list): # Replace Front Street, East Stockham with Front Street, East Stockwith @@ -1503,6 +1538,10 @@ class DataLoader: ) return eco3_list + @staticmethod + def correct_ha50_eco3_list(eco3_list): + return eco3_list + def merge_eco3_to_assets(self, asset_list, eco3_list, ha_name): eco3_list_correction_function = getattr(self, f"correct_{ha_name.lower()}_eco3_list") @@ -1517,6 +1556,7 @@ class DataLoader: postcode for postcode in eco3_list["postcode_no_space"] if postcode not in asset_list["matching_postcode_nospace"].values } + eco3_list = eco3_list[~eco3_list["postcode_no_space"].isin(missed_postcodes)] # For the asset list, we create a matching address without any punctuation @@ -4199,16 +4239,18 @@ def app(): # Grab the December HA figures filepath december_figures_filepath = "local_data/ha_data/HA_December_figures.csv" - # Add in: "HA25" + # Add in: # TODO: Remove ECO3 sales from HA25 priority_has = [ - "HA1", "HA6", "HA7", "HA14", "HA15", "HA16", "HA20", "HA24", "HA25", "HA28", "HA32", "HA39", "HA107", + "HA1", "HA6", "HA7", "HA14", "HA15", "HA16", "HA24", "HA25", "HA28", "HA32", "HA39", "HA50", "HA107", ] - # Next HAs to do: 15[DONE], 32 [DONE], 33 [Input format is 4 parts and no eco4 jobs identified - come back on this], + # Next HAs to do: 14 [DONE], 15[DONE], 32 [DONE], 33 [Input format is 4 parts and no eco4 jobs identified - come + # back on this], # Then: 28 [DONE], - # 41, 10, 14 [DONE], 20, 48, 50 - # 38[problematic, but no ECO4] - # TODO - do 50 and 25 next + # 41, 48, 50 + # 38[problematic, but no ECO4], 10 problematic (no eligibility), + # 20 has barely any in + # TODO - do 50 # Filter down the directories to only the priority HAs directories = [d for d in directories if d.split("/")[2] in priority_has] From 3001a98421b377cb31e2c3b667528e8d4b80a150 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 7 Mar 2024 15:02:23 +0000 Subject: [PATCH 090/248] ha50 30% matched --- etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 4708bf35..901784e1 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -1322,6 +1322,10 @@ class DataLoader: (survey_list["NO."].isin([29]))) ] + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "BRUNDELL OVAL", "BRUNDALL OVAL" + ) + return survey_list @staticmethod From 4afd012e51bfc3b366dc1e8d1f70281bb1097bd0 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 7 Mar 2024 15:14:53 +0000 Subject: [PATCH 091/248] ha50 51% matched --- etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 901784e1..bde6f647 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -1326,6 +1326,13 @@ class DataLoader: "BRUNDELL OVAL", "BRUNDALL OVAL" ) + # Remove 4 Linden Place + survey_list = survey_list[ + ~((survey_list["Street / Block Name"] == "Linden Place") & + (survey_list["Post Code"] == "ST3 3AT") & + (survey_list["NO."].isin([4]))) + ] + return survey_list @staticmethod From 1146f34eba62ab2b00f610502b17ba6f9425cf43 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 7 Mar 2024 15:24:20 +0000 Subject: [PATCH 092/248] matching 81% complete --- .../ha_15_32/ha_analysis_batch_3.py | 39 +++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index bde6f647..818f6e4f 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -1333,6 +1333,45 @@ class DataLoader: (survey_list["NO."].isin([4]))) ] + # Remove 11 Tilehurst Place + survey_list = survey_list[ + ~((survey_list["Street / Block Name"] == "Tilehurst Place") & + (survey_list["Post Code"] == "ST3 3AP") & + (survey_list["NO."].isin([11]))) + ] + + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "deavile road", "DEAVILLE ROAD" + ) + + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "WOOLISCROFT ROAD", "WOOLLISCROFT ROAD" + ) + + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "Leak Road", "Leek Road" + ) + + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "Springfield road", "Springfields road" + ) + + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "MILLWARD RD", "MILLWARD ROAD" + ) + + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "REPINGTON RD", "REPINGTON ROAD" + ) + + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "ECCELSTONE PLACE", "ECCLESTONE PLACE" + ) + + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "St. James Place", "St James Place" + ) + return survey_list @staticmethod From 5a1aa3995221ddf125b25c6d619165fdbcab37ff Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 7 Mar 2024 15:33:26 +0000 Subject: [PATCH 093/248] ha50 93% complete --- .../ha_15_32/ha_analysis_batch_3.py | 44 +++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 818f6e4f..3b9bd7ca 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -1372,6 +1372,50 @@ class DataLoader: "St. James Place", "St James Place" ) + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "CHELL HEATH RD", "CHELL HEATH ROAD" + ) + # Correct postcode + survey_list["Post Code"] = np.where( + (survey_list["Street / Block Name"] == 'CHELL HEATH ROAD') & + (survey_list["Post Code"] == 'ST6 6HU'), + "ST6 6HJ", + survey_list["Post Code"] + ) + + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "Franklin Rd", "Franklin Road" + ) + + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "Lodge Rd", "Lodge Road" + ) + + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "St Matthews Street", "St Matthew Street" + ) + + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "Grove Bank Road", "Grovebank Road" + ) + + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "OVERSLEY RD", "OVERSLEY ROAD" + ) + + # Replace all of the " RD" with " ROAD" + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + " RD", " ROAD" + ) + + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "St. Georges Crescent", "St Georges Crescent" + ) + + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "Tewson Road", "Tewson Green" + ) + return survey_list @staticmethod From d4e378f109deb3c71b87165309a5935b3641a915 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 7 Mar 2024 15:40:37 +0000 Subject: [PATCH 094/248] ha50 matching complete subject to checks --- .../ha_15_32/ha_analysis_batch_3.py | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 3b9bd7ca..a5b99a72 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -1416,6 +1416,35 @@ class DataLoader: "Tewson Road", "Tewson Green" ) + # Remove 55 Seabridge Lane + survey_list = survey_list[ + ~((survey_list["Street / Block Name"] == "Seabridge Lane") & + (survey_list["Post Code"] == "ST5 4AG") & + (survey_list["NO."].isin([55]))) + ] + + survey_list = survey_list[ + ~((survey_list["Street / Block Name"] == "Tyne Way") & + (survey_list["Post Code"] == "ST5 4AX") & + (survey_list["NO."].isin([56]))) + ] + + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "St.Bernards Place", "St Bernard Place" + ) + + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "Penarth Road", "Penarth Grove" + ) + + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "St. Marys Road", "St Marys Road" + ) + + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "Larch Drive", "Larch Grove" + ) + return survey_list @staticmethod From 33b3f51ca4701ede548e6af82f80ae191a3c0710 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 7 Mar 2024 15:54:40 +0000 Subject: [PATCH 095/248] handling dupes for ha50 --- etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index a5b99a72..7124919e 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -1445,6 +1445,21 @@ class DataLoader: "Larch Drive", "Larch Grove" ) + # Drop 31 Lauder place north, as there is a duplicate. THis version also has a wrong postcode + survey_list = survey_list[ + ~((survey_list["Street / Block Name"] == "LAUDER PLACE NORTH") & + (survey_list["Post Code"] == "ST20QS") & + (survey_list["NO."].isin([31]))) + ] + + # Handle dropping of dupes + survey_list["street_pruner"] = survey_list["Street / Block Name"].str.lower().str.replace(" ", "") + survey_list["postcode_pruner"] = survey_list["Post Code"].str.lower().str.replace(" ", "") + + # Should go to 18 + survey_list = survey_list.drop_duplicates(["NO.", "street_pruner", "postcode_pruner"]) + survey_list = survey_list.drop(columns=["street_pruner", "postcode_pruner"]) + return survey_list @staticmethod From 23eaa5600118f0df54667ea36422153158db8dd5 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 7 Mar 2024 15:57:00 +0000 Subject: [PATCH 096/248] checked ha50 ciga merge --- etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 7124919e..2feded98 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -168,6 +168,7 @@ class DataLoader: "HA15": 3, "HA16": 7, "HA24": 12, + "HA50": 4, "HA107": 51, } @@ -429,6 +430,8 @@ class DataLoader: return "CIGA checks" elif "CIGA check" in workbook.sheetnames: return "CIGA check" + elif "CIGA requested" in workbook.sheetnames: + return "CIGA requested" else: return "CIGA" From 180c0c53eaa48c185c75cf22aee448aac91bbe30 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 7 Mar 2024 16:26:58 +0000 Subject: [PATCH 097/248] done with ha50 --- etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 2feded98..0720a686 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -1982,7 +1982,8 @@ class DataLoader: "ECO4 GBIS (ECO+)": "GBIS", "ECO4 GBIS (ECO+) JJC UNDER 73m²": "GBIS", "ECO4 AFFORDABLE WARMTH": "ECO4", - "Affordable Warmth": "ECO4" + "Affordable Warmth": "ECO4", + "ECO4 GBIS (ECO+) JJC UNDER 73m² ": "GBIS", } eco_eligibility_map = { From c43349a5777326145107a6406779eadcdc6e9dab Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 7 Mar 2024 16:39:47 +0000 Subject: [PATCH 098/248] Added ha41 matching --- .../ha_15_32/ha_analysis_batch_3.py | 22 ++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 0720a686..4cf447aa 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -174,7 +174,8 @@ class DataLoader: UNMATCHED_ECO3 = { "HA25": 154, - "HA50": 5 + "HA41": 26, + "HA50": 5, } def __init__(self, directories, december_figures_filepath, use_cache, rebuild): @@ -264,6 +265,14 @@ class DataLoader: asset_list["add_5"].astype(str).str.lower().str.strip() + ", " + \ asset_list["post_code"].astype(str).str.lower().str.strip() asset_list["matching_postcode"] = asset_list["post_code"].astype(str).str.lower().str.strip() + elif ha_name == "HA41": + asset_list["matching_address"] = asset_list["AddressLine1"].astype(str).str.lower().str.strip() + ", " + \ + asset_list["AddressLine2"].astype(str).str.lower().str.strip() + ", " + \ + asset_list["AddressLine3"].astype(str).str.lower().str.strip() + ", " + \ + asset_list["AddressLine4"].astype(str).str.lower().str.strip() + ", " + \ + asset_list["AddressLine5"].astype(str).str.lower().str.strip() + ", " + \ + asset_list["Postcode"].astype(str).str.lower().str.strip() + asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip() elif ha_name == "HA50": asset_list["matching_address"] = asset_list["Address Line 1"].astype(str).str.lower().str.strip() + ", " + \ asset_list["Post Code"].astype(str).str.lower().str.strip() @@ -1683,6 +1692,10 @@ class DataLoader: def correct_ha50_eco3_list(eco3_list): return eco3_list + @staticmethod + def correct_ha41_eco3_list(eco3_list): + return eco3_list + def merge_eco3_to_assets(self, asset_list, eco3_list, ha_name): eco3_list_correction_function = getattr(self, f"correct_{ha_name.lower()}_eco3_list") @@ -4384,15 +4397,14 @@ def app(): # Add in: # TODO: Remove ECO3 sales from HA25 priority_has = [ - "HA1", "HA6", "HA7", "HA14", "HA15", "HA16", "HA24", "HA25", "HA28", "HA32", "HA39", "HA50", "HA107", + "HA1", "HA6", "HA7", "HA14", "HA15", "HA16", "HA24", "HA25", "HA28", "HA32", "HA39", "HA41", "HA50", "HA107", ] # Next HAs to do: 14 [DONE], 15[DONE], 32 [DONE], 33 [Input format is 4 parts and no eco4 jobs identified - come # back on this], # Then: 28 [DONE], # 41, 48, 50 - # 38[problematic, but no ECO4], 10 problematic (no eligibility), - # 20 has barely any in - # TODO - do 50 + # Ignore for now: + # TODO: 38[problematic, but no ECO4], 10 problematic (no eligibility), 20 has barely any in # Filter down the directories to only the priority HAs directories = [d for d in directories if d.split("/")[2] in priority_has] From c4af2251f4fac0af95676b7158e5baf1ad9d3d3c Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 7 Mar 2024 16:41:58 +0000 Subject: [PATCH 099/248] data load for ha41 --- etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 4cf447aa..c2d585a2 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -452,6 +452,8 @@ class DataLoader: return "ECO Survey" elif "ECO 4 Surveys completed" in workbook.sheetnames: return "ECO 4 Surveys completed" + elif "ECO4 Surveys" in workbook.sheetnames: + return "ECO4 Surveys" else: return "ECO surveys" @@ -1533,6 +1535,10 @@ class DataLoader: return survey_list + @staticmethod + def correct_ha41_survey_list(survey_list): + return survey_list + @staticmethod def levenstein_match(matching_string, df): match_to = df["matching_address"].tolist() From ae714e42a62b1e6def566c6de46b34035d0ab7bb Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 7 Mar 2024 17:11:44 +0000 Subject: [PATCH 100/248] identified 9 additional has worth analysing --- etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index c2d585a2..b22ea273 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -4403,14 +4403,16 @@ def app(): # Add in: # TODO: Remove ECO3 sales from HA25 priority_has = [ - "HA1", "HA6", "HA7", "HA14", "HA15", "HA16", "HA24", "HA25", "HA28", "HA32", "HA39", "HA41", "HA50", "HA107", + "HA1", "HA6", "HA7", "HA14", "HA15", "HA16", "HA24", "HA25", "HA28", "HA32", "HA39", "HA41", "HA48", + "HA50", "HA107", ] # Next HAs to do: 14 [DONE], 15[DONE], 32 [DONE], 33 [Input format is 4 parts and no eco4 jobs identified - come - # back on this], - # Then: 28 [DONE], - # 41, 48, 50 + # back on this], 28 [DONE], 41 [DONE], 50 [DONE], + # 48 [WIP], + # Consider for ECO4: 2, 63, 12, 13, 136, 117 + # COnsider for GBIS: 56, 35, 34 # Ignore for now: - # TODO: 38[problematic, but no ECO4], 10 problematic (no eligibility), 20 has barely any in + # 38 [problematic, but no ECO4], 10 problematic (no eligibility), 20 has barely any in # Filter down the directories to only the priority HAs directories = [d for d in directories if d.split("/")[2] in priority_has] From c84be65e8defa04aa1453f80b53d073c9011a629 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 7 Mar 2024 19:52:08 +0000 Subject: [PATCH 101/248] ha48 ciga unmatched count added --- etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index b22ea273..56867ef7 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -159,6 +159,10 @@ class DataLoader: "HA25": { "address": "T1_Address", "postcode": "matching_postcode" + }, + "HA48": { + "address": "Full Address", + "postcode": "Postcode" } } @@ -170,6 +174,7 @@ class DataLoader: "HA24": 12, "HA50": 4, "HA107": 51, + "HA48": 0 } UNMATCHED_ECO3 = { @@ -190,7 +195,7 @@ class DataLoader: def create_asset_list_matching_address(self, ha_name, asset_list): - if ha_name in ["HA1", "HA6", "HA16", "HA24"]: + if ha_name in ["HA1", "HA6", "HA16", "HA24", "HA48"]: asset_list["matching_address"] = asset_list[ self.COLUMN_CONFIG[ha_name]["address"] ].astype(str).str.lower().str.strip() From c3fd2ae902bd96250bc5ca376a424ebc8cbc3335 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 7 Mar 2024 20:58:47 +0000 Subject: [PATCH 102/248] Adding HA2, data load done --- .../ha_15_32/ha_analysis_batch_3.py | 34 ++++++++++++------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 56867ef7..74c6d3f5 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -167,6 +167,7 @@ class DataLoader: } UNMATCHED_CIGA = { + "HA2": 0, "HA6": 117, "HA14": 3, "HA15": 3, @@ -202,6 +203,12 @@ class DataLoader: asset_list["matching_postcode"] = asset_list[ self.COLUMN_CONFIG[ha_name]["postcode"] ].astype(str).str.lower().str.strip() + elif ha_name == "HA2": + # Create matching_address by concatenating Address 1, Address 2, Address 3, Address 4, Postcode + asset_list["matching_address"] = asset_list["Address Line 1"].astype(str).str.lower().str.strip() + ", " + \ + asset_list["Address Line 2"].astype(str).str.lower().str.strip() + ", " + \ + asset_list["Postcode"].astype(str).str.lower().str.strip() + asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip() elif ha_name == "HA7": # Create matching_address by concatenating Address 1, Address 2, Address 3, Address 4, Postcode asset_list["matching_address"] = asset_list["Address"].astype(str).str.lower().str.strip() + ", " + \ @@ -3794,7 +3801,6 @@ def forecast_remaining_sales(loader): results = [] for ha_name, input_data in loader.data.items(): - # Original warmfront figures - ECO4 original_warmfront_estimates = december_figures[december_figures["HA Name"] == ha_name] @@ -4074,13 +4080,13 @@ def forecast_remaining_sales(loader): ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): original_warmfront_eco4, ("ECO4 original", "", "Remaining - #", ""): original_warmfront_remaining_eco4, ("ECO4 original", "", "Total - £", ""): original_warmfront_eco4_revenue, - ("ECO4 original", "", "Sold - £", ""): original_warmfront_sold_eco4, + ("ECO4 original", "", "Sold or cancelled - £", ""): original_warmfront_sold_eco4, ("ECO4 original", "", "Remaining - £", ""): original_warmfront_remaining_eco4_revenue, # GBIS - original warmfront figures ("", "Original Warmfront estimate", "Total - #", "GBIS - November"): original_warmfront_gbis, ("GBIS original", "", "Remaining - #", ""): original_warmfront_gbis, ("GBIS original", "", "Total - £", ""): original_warmfront_gbis_revenue, - ("GBIS original", "", "Sold - £", ""): original_warmfront_sold_gbis, + ("GBIS original", "", "Sold or cancelled - £", ""): original_warmfront_sold_gbis, ("GBIS original", "", "Remaining - £", ""): original_warmfront_remaining_gbis_revenue, # ECO4 - asset list, pre-ciga ("", "Warmfront post code list", "Total #", "ECO4 total (pre-ciga)"): eco4_pre_ciga, @@ -4237,12 +4243,17 @@ def forecast_remaining_sales(loader): headline_total_delta = round(headline_total_delta, 1) headline_eco4_sold_since_november = ( - totals_row[('ECO4 pre-ciga', '', 'Sold - £', '')] - totals_row[('ECO4 original', '', 'Sold - £', '')] + totals_row[('ECO4 pre-ciga', '', 'Sold - £', '')] + + totals_row[('ECO4 pre-ciga', '', 'Confirmed cancellations - £', '')] + # confirmed canclleations + totals_row[('ECO4 pre-ciga', '', 'Unconfirmed cancellations - £', '')] - # expected cancellations + totals_row[('ECO4 original', '', 'Sold or cancelled - £', '')] ) headline_gbis_sold_since_november = ( - totals_row[("GBIS Postcode list", "Warmfront post code list", "Sold - £", "GBIS total")] - - totals_row[('GBIS original', '', 'Sold - £', '')] + totals_row[("GBIS Postcode list", "Warmfront post code list", "Sold - £", "GBIS total")] + + totals_row[("GBIS Postcode list", "", "Confirmed cancellations - £", "")] + # confirmed cancellations + totals_row[("GBIS Postcode list", "", "Unconfirmed cancellations - £", "")] - # expected cancellations + totals_row[('GBIS original', '', 'Sold or cancelled - £', '')] ) headlines = [ @@ -4261,7 +4272,7 @@ def forecast_remaining_sales(loader): "ECO4 - November"): headline_eco4_original_remaining_revenue }, { - ("", "", "", "HA Name"): "ECO4 Sold since November - £", + ("", "", "", "HA Name"): "ECO4 Sold or cancelled since November - £", ( "", "Original Warmfront estimate", "Total - #", "ECO4 - November"): headline_eco4_sold_since_november @@ -4290,7 +4301,7 @@ def forecast_remaining_sales(loader): "ECO4 - November"): headline_gbis_original_remaining_revenue }, { - ("", "", "", "HA Name"): "GBIS Sold since November - £", + ("", "", "", "HA Name"): "GBIS Sold or cancelled since November - £", ( "", "Original Warmfront estimate", "Total - #", "ECO4 - November"): headline_gbis_sold_since_november @@ -4399,21 +4410,18 @@ def app(): rebuild_inputs = False # List all of the data in the folder - directories = [str(file) for entry in DATA_FOLDER.iterdir() if entry.is_dir() for file in entry.iterdir() if file.suffix == '.xlsx'] # Grab the December HA figures filepath december_figures_filepath = "local_data/ha_data/HA_December_figures.csv" # Add in: - # TODO: Remove ECO3 sales from HA25 priority_has = [ - "HA1", "HA6", "HA7", "HA14", "HA15", "HA16", "HA24", "HA25", "HA28", "HA32", "HA39", "HA41", "HA48", + "HA1", "HA2", "HA6", "HA7", "HA14", "HA15", "HA16", "HA24", "HA25", "HA28", "HA32", "HA39", "HA41", "HA48", "HA50", "HA107", ] # Next HAs to do: 14 [DONE], 15[DONE], 32 [DONE], 33 [Input format is 4 parts and no eco4 jobs identified - come - # back on this], 28 [DONE], 41 [DONE], 50 [DONE], - # 48 [WIP], + # back on this], 28 [DONE], 41 [DONE], 50 [DONE], 48 [DONE], # Consider for ECO4: 2, 63, 12, 13, 136, 117 # COnsider for GBIS: 56, 35, 34 # Ignore for now: From 19850f924445035e3880eaae40f750d21fb12b80 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 7 Mar 2024 21:34:46 +0000 Subject: [PATCH 103/248] fixing up ha63 eco3 list --- .../ha_15_32/ha_analysis_batch_3.py | 46 +++++++++++++++++-- 1 file changed, 42 insertions(+), 4 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 74c6d3f5..aebf0506 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -289,6 +289,10 @@ class DataLoader: asset_list["matching_address"] = asset_list["Address Line 1"].astype(str).str.lower().str.strip() + ", " + \ asset_list["Post Code"].astype(str).str.lower().str.strip() asset_list["matching_postcode"] = asset_list["Post Code"].astype(str).str.lower().str.strip() + elif ha_name == "HA63": + asset_list["matching_address"] = asset_list["Address1"].astype(str).str.lower().str.strip() + ", " + \ + asset_list["POSTCODE"].astype(str).str.lower().str.strip() + asset_list["matching_postcode"] = asset_list["POSTCODE"].astype(str).str.lower().str.strip() elif ha_name == "HA107": # Create matching_address by concatenating House No, Street, Town, District, Postcode asset_list["matching_address"] = asset_list["House No"].astype(str).str.lower().str.strip() + ", " + \ @@ -1551,6 +1555,16 @@ class DataLoader: def correct_ha41_survey_list(survey_list): return survey_list + @staticmethod + def correct_ha63_survey_list(survey_list): + # Drop some filler rows + survey_list = survey_list[ + ~survey_list[survey_list.columns[0]].isin( + ["NO JOBS SURVEYED JULY 2021 ", "NO JOBS SURVEYED SEPTEMBER 2021"] + ) + ] + return survey_list + @staticmethod def levenstein_match(matching_string, df): match_to = df["matching_address"].tolist() @@ -1714,6 +1728,26 @@ class DataLoader: def correct_ha41_eco3_list(eco3_list): return eco3_list + @staticmethod + def correct_ha63_eco3_list(eco3_list): + eco3_list = eco3_list[~pd.isnull(eco3_list["Post Code"])] + # Some postcode that aren't in the asset list + eco3_list = eco3_list[ + ~eco3_list["Post Code"].isin( + ["NR32 15X", "NR30 2BT"] + ) + ] + + eco3_list["Street / Block Name"] = eco3_list["Street / Block Name"].str.replace( + "POUND COTTAGES - BLOOMSBERRY CLOSE", "POUND COTTAGES" + ) + + eco3_list["Street / Block Name"] = eco3_list["Street / Block Name"].str.replace( + "FREDRICK ROAD", "Frederick Road" + ) + + return eco3_list + def merge_eco3_to_assets(self, asset_list, eco3_list, ha_name): eco3_list_correction_function = getattr(self, f"correct_{ha_name.lower()}_eco3_list") @@ -1799,12 +1833,15 @@ class DataLoader: # We verify the missed # HA25 contains 119 missed entries. These are actually 24 unique postcodes, and the majority belong to 2 # where many surveys were conducted on house numbers, not in the asset list + # 154 missed, 2827 matched for HA 25 if len(missed) != self.UNMATCHED_ECO3[ha_name]: raise ValueError( f"Unmatched addresses for {ha_name} is not as expected, got {len(missed)} unmatched" ) - # 154 missed, 2827 matched for HA 25 + # 41 + missed_df = eco3_list[eco3_list["eco3_list_row_id"].isin(missed)] + missed_df.head(1)["Street / Block Name"] matching_lookup = pd.DataFrame(matching_lookup) # Check dupes as this will cause problems later on @@ -4418,11 +4455,12 @@ def app(): # Add in: priority_has = [ "HA1", "HA2", "HA6", "HA7", "HA14", "HA15", "HA16", "HA24", "HA25", "HA28", "HA32", "HA39", "HA41", "HA48", - "HA50", "HA107", + "HA50", "HA63", "HA107", ] # Next HAs to do: 14 [DONE], 15[DONE], 32 [DONE], 33 [Input format is 4 parts and no eco4 jobs identified - come - # back on this], 28 [DONE], 41 [DONE], 50 [DONE], 48 [DONE], - # Consider for ECO4: 2, 63, 12, 13, 136, 117 + # back on this], 28 [DONE], 41 [DONE], 50 [DONE], 48 [DONE], 2 [DONE] + # 63 [WIP] + # Consider for ECO4: 12, 13, 136, 117 # COnsider for GBIS: 56, 35, 34 # Ignore for now: # 38 [problematic, but no ECO4], 10 problematic (no eligibility), 20 has barely any in From 47b97fce0a6eec4fe15a967f1721e18908bffccf Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 7 Mar 2024 21:46:44 +0000 Subject: [PATCH 104/248] fixing eco3 matching for ha63 --- .../ha_15_32/ha_analysis_batch_3.py | 27 +++++++++++++++---- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index aebf0506..bab5cdab 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -174,6 +174,7 @@ class DataLoader: "HA16": 7, "HA24": 12, "HA50": 4, + "HA63": 15, "HA107": 51, "HA48": 0 } @@ -182,6 +183,7 @@ class DataLoader: "HA25": 154, "HA41": 26, "HA50": 5, + "HA63": 0 } def __init__(self, directories, december_figures_filepath, use_cache, rebuild): @@ -1746,6 +1748,25 @@ class DataLoader: "FREDRICK ROAD", "Frederick Road" ) + # For denmark street, remove the space from the house number + eco3_list["NO "] = np.where( + eco3_list["Street / Block Name"] == "DENMARK STREET", + eco3_list["NO "].str.replace(" ", ""), + eco3_list["NO "] + ) + + eco3_list["Street / Block Name"] = eco3_list["Street / Block Name"].str.replace( + "OLD HOSPITAL MEWS HOSPITAL WALK", "Old Hospital Mews" + ) + + eco3_list["Street / Block Name"] = eco3_list["Street / Block Name"].str.replace( + "Portland House, Portland Street", "Portland House" + ) + + eco3_list["Street / Block Name"] = eco3_list["Street / Block Name"].str.replace( + "MIDDLE MARKET STREET", "Middle Market Road" + ) + return eco3_list def merge_eco3_to_assets(self, asset_list, eco3_list, ha_name): @@ -1791,7 +1812,7 @@ class DataLoader: if isinstance(house_number, str): house_number = house_number.lower().strip() - if not any(df["matching_address"].str.contains(str(house_number))): + if not any(df["HouseNo"].str.contains(str(house_number))): if "flat" in str(house_number): house_number = house_number.split("flat")[1].strip() @@ -1839,10 +1860,6 @@ class DataLoader: f"Unmatched addresses for {ha_name} is not as expected, got {len(missed)} unmatched" ) - # 41 - missed_df = eco3_list[eco3_list["eco3_list_row_id"].isin(missed)] - missed_df.head(1)["Street / Block Name"] - matching_lookup = pd.DataFrame(matching_lookup) # Check dupes as this will cause problems later on if matching_lookup["asset_list_row_id"].duplicated().any(): From 9cd166160bfbe9a3cc89f5d43231c3c8ed5c2ede Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 7 Mar 2024 21:51:16 +0000 Subject: [PATCH 105/248] sorted ha63 facts and figures --- etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index bab5cdab..2a1a4b16 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -2077,7 +2077,8 @@ class DataLoader: "eco4 (subject to ciga/archetype check": "eco4 (subject to ciga)", "eco4 (subject to archetype check)": "eco4", "eco4 (subject to ciga/archetype)": "eco4 (subject to ciga)", - "eco4 (subject to ciga)": "eco4 (subject to ciga)" + "eco4 (subject to ciga)": "eco4 (subject to ciga)", + "eco4(subject to ciga)": "eco4 (subject to ciga)", } ha_facts_and_figures = [] From 76ef60d06c8d508d4c78e1bda320902880bce96c Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 7 Mar 2024 22:16:05 +0000 Subject: [PATCH 106/248] done with ha12 --- .../ha_15_32/ha_analysis_batch_3.py | 58 ++++++++++++++----- 1 file changed, 45 insertions(+), 13 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 2a1a4b16..4dbf326b 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -148,6 +148,10 @@ class DataLoader: "address": "propertyaddress", "postcode": "address" # The 'address' column actually contains postcode }, + "HA12": { + "address": "Full Address", + "postcode": "Postcode" + }, "HA16": { "address": "Address", "postcode": "Postcode" @@ -169,6 +173,7 @@ class DataLoader: UNMATCHED_CIGA = { "HA2": 0, "HA6": 117, + "HA12": 6, "HA14": 3, "HA15": 3, "HA16": 7, @@ -198,7 +203,7 @@ class DataLoader: def create_asset_list_matching_address(self, ha_name, asset_list): - if ha_name in ["HA1", "HA6", "HA16", "HA24", "HA48"]: + if ha_name in ["HA1", "HA6", "HA12", "HA16", "HA24", "HA48"]: asset_list["matching_address"] = asset_list[ self.COLUMN_CONFIG[ha_name]["address"] ].astype(str).str.lower().str.strip() @@ -1558,13 +1563,39 @@ class DataLoader: return survey_list @staticmethod - def correct_ha63_survey_list(survey_list): - # Drop some filler rows - survey_list = survey_list[ - ~survey_list[survey_list.columns[0]].isin( - ["NO JOBS SURVEYED JULY 2021 ", "NO JOBS SURVEYED SEPTEMBER 2021"] - ) - ] + def correct_ha12_survey_list(survey_list): + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "Henstone Road", "Hanstone Road" + ) + + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "Lindern avenue", "Linden Avenue" + ) + + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "priness way", "Princess Way" + ) + + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "Worth Crecesent", "Worth Crescent" + ) + + survey_list["Post Code"] = survey_list["Post Code"].str.replace( + "DY117HA", "DY11 7HA" + ) + + survey_list["Post Code"] = survey_list["Post Code"].str.replace( + "DY117HF", "DY11 7HF" + ) + + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "Adderbrook Crescent", "Addenbrooke Crescent" + ) + + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "Kinver Road", "Kinver Avenue" + ) + return survey_list @staticmethod @@ -2079,6 +2110,7 @@ class DataLoader: "eco4 (subject to ciga/archetype)": "eco4 (subject to ciga)", "eco4 (subject to ciga)": "eco4 (subject to ciga)", "eco4(subject to ciga)": "eco4 (subject to ciga)", + "eco4 subject to ciga": "eco4 (subject to ciga)", } ha_facts_and_figures = [] @@ -4472,13 +4504,13 @@ def app(): # Add in: priority_has = [ - "HA1", "HA2", "HA6", "HA7", "HA14", "HA15", "HA16", "HA24", "HA25", "HA28", "HA32", "HA39", "HA41", "HA48", - "HA50", "HA63", "HA107", + "HA1", "HA2", "HA6", "HA7", "HA12", "HA14", "HA15", "HA16", "HA24", "HA25", "HA28", "HA32", "HA39", "HA41", + "HA48", "HA50", "HA63", "HA107", ] # Next HAs to do: 14 [DONE], 15[DONE], 32 [DONE], 33 [Input format is 4 parts and no eco4 jobs identified - come - # back on this], 28 [DONE], 41 [DONE], 50 [DONE], 48 [DONE], 2 [DONE] - # 63 [WIP] - # Consider for ECO4: 12, 13, 136, 117 + # back on this], 28 [DONE], 41 [DONE], 50 [DONE], 48 [DONE], 2 [DONE], 63 [DONE], 12 [DONE] + # + # Consider for ECO4: 13, 136, 117 # COnsider for GBIS: 56, 35, 34 # Ignore for now: # 38 [problematic, but no ECO4], 10 problematic (no eligibility), 20 has barely any in From e3f36fc881925fd845f623d469d0faf9cd6b89c3 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 8 Mar 2024 18:52:32 +0000 Subject: [PATCH 107/248] HA117 data load --- .../ha_15_32/ha_analysis_batch_3.py | 27 +++++++++++++++---- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 4dbf326b..d4de589a 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -188,7 +188,8 @@ class DataLoader: "HA25": 154, "HA41": 26, "HA50": 5, - "HA63": 0 + "HA63": 0, + "HA117": 4 } def __init__(self, directories, december_figures_filepath, use_cache, rebuild): @@ -308,6 +309,11 @@ class DataLoader: asset_list["District"].astype(str).str.lower().str.strip() + ", " + \ asset_list["Postcode"].astype(str).str.lower().str.strip() asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip() + elif ha_name == "HA117": + asset_list["matching_address"] = asset_list["Address1"].astype(str).str.lower().str.strip() + ", " + \ + asset_list["Address2"].astype(str).str.lower().str.strip() + ", " + \ + asset_list["PostCode"].astype(str).str.lower().str.strip() + asset_list["matching_postcode"] = asset_list["PostCode"].astype(str).str.lower().str.strip() else: raise NotImplementedError("implement me") @@ -1800,6 +1806,17 @@ class DataLoader: return eco3_list + @staticmethod + def correct_ha117_eco3_list(eco3_list): + # Delete rows where postcode is null - there are some placeholder rows where this happens + eco3_list = eco3_list[~pd.isnull(eco3_list["Post Code"])] + + eco3_list["Street / Block Name"] = eco3_list["Street / Block Name"].str.replace( + "TARRING ROAD", "155 TARRING ROAD" + ) + + return eco3_list + def merge_eco3_to_assets(self, asset_list, eco3_list, ha_name): eco3_list_correction_function = getattr(self, f"correct_{ha_name.lower()}_eco3_list") @@ -4505,13 +4522,13 @@ def app(): # Add in: priority_has = [ "HA1", "HA2", "HA6", "HA7", "HA12", "HA14", "HA15", "HA16", "HA24", "HA25", "HA28", "HA32", "HA39", "HA41", - "HA48", "HA50", "HA63", "HA107", + "HA48", "HA50", "HA63", "HA107", "HA117" ] # Next HAs to do: 14 [DONE], 15[DONE], 32 [DONE], 33 [Input format is 4 parts and no eco4 jobs identified - come # back on this], 28 [DONE], 41 [DONE], 50 [DONE], 48 [DONE], 2 [DONE], 63 [DONE], 12 [DONE] - # - # Consider for ECO4: 13, 136, 117 - # COnsider for GBIS: 56, 35, 34 + # 117 [WIP] + # Consider for ECO4: 13 + # Consider for GBIS: 56, 35, 34 # Ignore for now: # 38 [problematic, but no ECO4], 10 problematic (no eligibility), 20 has barely any in # Filter down the directories to only the priority HAs From 15efd02b8b8220f1d6cc745cb1b4a571be808643 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 8 Mar 2024 19:14:35 +0000 Subject: [PATCH 108/248] done ha117, ha13 next --- etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index d4de589a..97ac96da 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -2119,15 +2119,19 @@ class DataLoader: "ECO4 GBIS (ECO+) JJC UNDER 73m² ": "GBIS", } + # Since it seems like "subject to archetype check" has some failure conditions, for simplicity, we + # treat these as similar to subject to CIGA, and therefore unconfirmed worked that could fail. There + # are only a small volume of properties for which we see this eco_eligibility_map = { "not eligble": "not eligible", "eco 4(subject to ciga)": "eco4 (subject to ciga)", "eco4 (subject to ciga/archetype check": "eco4 (subject to ciga)", - "eco4 (subject to archetype check)": "eco4", + "eco4 (subject to archetype check)": "eco4 (subject to ciga)", "eco4 (subject to ciga/archetype)": "eco4 (subject to ciga)", "eco4 (subject to ciga)": "eco4 (subject to ciga)", "eco4(subject to ciga)": "eco4 (subject to ciga)", "eco4 subject to ciga": "eco4 (subject to ciga)", + "eco4 (subject to archetype)": "eco4 (subject to ciga)", } ha_facts_and_figures = [] @@ -4525,9 +4529,9 @@ def app(): "HA48", "HA50", "HA63", "HA107", "HA117" ] # Next HAs to do: 14 [DONE], 15[DONE], 32 [DONE], 33 [Input format is 4 parts and no eco4 jobs identified - come - # back on this], 28 [DONE], 41 [DONE], 50 [DONE], 48 [DONE], 2 [DONE], 63 [DONE], 12 [DONE] - # 117 [WIP] - # Consider for ECO4: 13 + # back on this], 28 [DONE], 41 [DONE], 50 [DONE], 48 [DONE], 2 [DONE], 63 [DONE], 12 [DONE], 117 [DONE] + # 13 [WIP] + # Consider for ECO4: # Consider for GBIS: 56, 35, 34 # Ignore for now: # 38 [problematic, but no ECO4], 10 problematic (no eligibility), 20 has barely any in From b2b8fd8f84321f369cc3d14b009515759a2eff9a Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 8 Mar 2024 19:20:38 +0000 Subject: [PATCH 109/248] ha13 49% matched --- .../ha_15_32/ha_analysis_batch_3.py | 23 +++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 97ac96da..3edc1490 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -224,6 +224,12 @@ class DataLoader: asset_list["Address3"].astype(str).str.lower().str.strip() + ", " + \ asset_list["Postcode"].astype(str).str.lower().str.strip() asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip() + elif ha_name == "HA13": + asset_list["matching_address"] = asset_list["Address 1"].astype(str).str.lower().str.strip() + ", " + \ + asset_list["address 2"].astype(str).str.lower().str.strip() + ", " + \ + asset_list["Address 3"].astype(str).str.lower().str.strip() + ", " + \ + asset_list["Postcode"].astype(str).str.lower().str.strip() + asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip() elif ha_name == "HA14": # Create matching_address by concatenating Address 1, Address 2, Address 3, Address 4, Postcode asset_list["matching_address"] = asset_list["Address 1"].astype(str).str.lower().str.strip() + ", " + \ @@ -1604,6 +1610,19 @@ class DataLoader: return survey_list + @staticmethod + def correct_ha13_survey_list(survey_list): + + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "Woodfarm Road", "WOOD FARM ROAD" + ) + + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "ALLANDALE ROAD", "ALLANDALE" + ) + + return survey_list + @staticmethod def levenstein_match(matching_string, df): match_to = df["matching_address"].tolist() @@ -4525,8 +4544,8 @@ def app(): # Add in: priority_has = [ - "HA1", "HA2", "HA6", "HA7", "HA12", "HA14", "HA15", "HA16", "HA24", "HA25", "HA28", "HA32", "HA39", "HA41", - "HA48", "HA50", "HA63", "HA107", "HA117" + "HA1", "HA2", "HA6", "HA7", "HA12", "HA13", "HA14", "HA15", "HA16", "HA24", "HA25", "HA28", "HA32", "HA39", + "HA41", "HA48", "HA50", "HA63", "HA107", "HA117" ] # Next HAs to do: 14 [DONE], 15[DONE], 32 [DONE], 33 [Input format is 4 parts and no eco4 jobs identified - come # back on this], 28 [DONE], 41 [DONE], 50 [DONE], 48 [DONE], 2 [DONE], 63 [DONE], 12 [DONE], 117 [DONE] From 21117f3e585be18d5da6e49744353f7ed830a483 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 8 Mar 2024 19:32:42 +0000 Subject: [PATCH 110/248] worked through ha13 matching - need to do facts and figures --- .../ha_15_32/ha_analysis_batch_3.py | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 3edc1490..15a4f438 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -174,6 +174,7 @@ class DataLoader: "HA2": 0, "HA6": 117, "HA12": 6, + "HA13": 119, "HA14": 3, "HA15": 3, "HA16": 7, @@ -1621,6 +1622,30 @@ class DataLoader: "ALLANDALE ROAD", "ALLANDALE" ) + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "NEWFIELDS LANE", "NEWFIELD LANE" + ) + + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "BROADFIELDS ROAD", "BROADFIELD ROAD" + ) + + survey_list["Post Code"] = survey_list["Post Code"].str.replace( + "HP2 5SF+", "HP2 5SF" + ) + + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "PESCOTT HILL", "PESCOT HILL" + ) + + # This is a duplicate record + survey_list = survey_list[ + ~((survey_list["NO."] == 33) & + (survey_list["Street / Block Name"] == "Turners Hill") & + (survey_list["Post Code"] == "HP2 4LH") & + (survey_list["INSTALLED OR CANCELLED"] == "NO UPDATE - CHECKED 18.12.23")) + ] + return survey_list @staticmethod @@ -1652,6 +1677,9 @@ class DataLoader: postcode.lower() not in asset_list["matching_postcode"].values ] + if ha_name == "HA13": + missed_postcodes = ["hp17 8le"] + matching_lookup = [] for _, row in tqdm(survey_list.iterrows(), total=len(survey_list)): From f03485d4f49045e8f68cf7a8dcc5caf58113ede1 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sat, 9 Mar 2024 14:41:38 +0000 Subject: [PATCH 111/248] updating facts and figures to treat archetype dependent properties separately --- .../ha_15_32/ha_analysis_batch_3.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 15a4f438..c0f3ab12 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -2172,13 +2172,12 @@ class DataLoader: eco_eligibility_map = { "not eligble": "not eligible", "eco 4(subject to ciga)": "eco4 (subject to ciga)", - "eco4 (subject to ciga/archetype check": "eco4 (subject to ciga)", - "eco4 (subject to archetype check)": "eco4 (subject to ciga)", - "eco4 (subject to ciga/archetype)": "eco4 (subject to ciga)", + "eco4 (subject to ciga/archetype check": "eco4 (subject to ciga) (subject to archetype)", + "eco4 (subject to archetype check)": "eco4 (subject to archetype)", + "eco4 (subject to ciga/archetype)": "eco4 (subject to ciga) (subject to archetype)", "eco4 (subject to ciga)": "eco4 (subject to ciga)", "eco4(subject to ciga)": "eco4 (subject to ciga)", "eco4 subject to ciga": "eco4 (subject to ciga)", - "eco4 (subject to archetype)": "eco4 (subject to ciga)", } ha_facts_and_figures = [] @@ -2330,7 +2329,7 @@ class DataLoader: asset_list = asset_list.merge(survey_list_to_merge, how='left', on="asset_list_row_id") # Update the cases where properties have sold, but are missing a CIGA check asset_list["ECO Eligibility"] = np.where( - (asset_list["ECO Eligibility"] == "eco4 (subject to ciga)") & ( + (asset_list["ECO Eligibility"].str.contains("(subject to ciga)")) & ( asset_list["has_a_survey_record"] == True ), "eco4 - passed ciga", @@ -2349,7 +2348,14 @@ class DataLoader: # Update the cases where a property was marked as eligible for ECO4, but sold for GBIS asset_list["ECO Eligibility"] = np.where( (asset_list["ECO Eligibility"].isin( - ["eco4", "eco4 (subject to ciga)", "eco4 - passed ciga", "failed ciga"] + [ + "eco4", + "eco4 (subject to ciga)", + "eco4 - passed ciga", + "failed ciga", + "eco4 (subject to archetype)", + "eco4 (subject to ciga) (subject to archetype)" + ] )) & ( asset_list["installation_status"].isin( ["GBIS - installed", "GBIS - cancelled", "GBIS - in progress"] From c1a15052f246288c5216e2c80849ccef3b2c6be0 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sat, 9 Mar 2024 14:46:26 +0000 Subject: [PATCH 112/248] Handling warning for regex searching of (subject to ciga) --- etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index c0f3ab12..430e5ff7 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -2329,7 +2329,7 @@ class DataLoader: asset_list = asset_list.merge(survey_list_to_merge, how='left', on="asset_list_row_id") # Update the cases where properties have sold, but are missing a CIGA check asset_list["ECO Eligibility"] = np.where( - (asset_list["ECO Eligibility"].str.contains("(subject to ciga)")) & ( + (asset_list["ECO Eligibility"].str.contains("subject to ciga")) & ( asset_list["has_a_survey_record"] == True ), "eco4 - passed ciga", From b46da0f6c0140b28d00385f02f29cae91f412b2d Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sat, 9 Mar 2024 15:48:51 +0000 Subject: [PATCH 113/248] adding in archetype check process to model --- .../ha_15_32/ha_analysis_batch_3.py | 99 +++++++++++++++---- 1 file changed, 82 insertions(+), 17 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 430e5ff7..9a959956 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -3658,19 +3658,47 @@ def patch_cleaned(cleaned): def calculate_eco4_post_ciga( eligiblity_counts, input_data, ha_ciga_conversion_rate, ha_ciga_pass_to_sale_rate, ha_eco4_to_sale_rate, - eco4_rate + eco4_rate, archetype_conversion_rate ): remaining_needing_ciga_check = eligiblity_counts[ - eligiblity_counts["ECO Eligibility"] == "eco4 (subject to ciga)" + eligiblity_counts["ECO Eligibility"].str.contains("subject to ciga") & + ~eligiblity_counts["ECO Eligibility"].str.contains("subject to archetype") ]["count"].sum() + remaining_needing_ciga_and_archetype_check = eligiblity_counts[ + eligiblity_counts["ECO Eligibility"].str.contains("subject to ciga") & + eligiblity_counts["ECO Eligibility"].str.contains("subject to archetype") + ]["count"].sum() + # We scale this down by the archetype_conversion_rate, and add this on to the remaining_needing_ciga_check + remaining_needing_ciga_and_archetype_check_passed = np.round( + remaining_needing_ciga_and_archetype_check * archetype_conversion_rate + ) + + remaining_needing_ciga_check += remaining_needing_ciga_and_archetype_check_passed + + eco4_no_ciga_needed = eligiblity_counts[ + eligiblity_counts["ECO Eligibility"] == "eco4" + ]["count"].sum() + + eco4_no_ciga_archetype_needed = eligiblity_counts[ + eligiblity_counts["ECO Eligibility"] == "eco4 (subject to archetype)" + ]["count"].sum() + eco4_no_ciga_archetype_needed_passed = np.round( + eco4_no_ciga_archetype_needed * archetype_conversion_rate + ) + + eco4_no_ciga_needed += eco4_no_ciga_archetype_needed_passed + + failed_archetype_check = int( + remaining_needing_ciga_and_archetype_check + + eco4_no_ciga_archetype_needed - + remaining_needing_ciga_and_archetype_check_passed - + eco4_no_ciga_archetype_needed_passed + ) + has_ciga_check = not input_data["ciga_list"].empty if has_ciga_check: - eco4_no_ciga_needed = eligiblity_counts[ - eligiblity_counts["ECO Eligibility"] == "eco4" - ]["count"].sum() - eco4_ciga_passed = eligiblity_counts[ eligiblity_counts["ECO Eligibility"] == "eco4 - passed ciga" ]["count"].sum() @@ -3681,8 +3709,10 @@ def calculate_eco4_post_ciga( eco4_no_ciga_needed_or_ciga_passed = eco4_no_ciga_needed + eco4_ciga_passed - eco4_confirmed = (eco4_no_ciga_needed * ha_eco4_to_sale_rate) + (eco4_ciga_passed * ha_ciga_pass_to_sale_rate) - eco4_confirmed = np.round(eco4_confirmed) + eco4_confirmed = np.round( + (eco4_no_ciga_needed * ha_eco4_to_sale_rate) + + (eco4_ciga_passed * ha_ciga_pass_to_sale_rate) + ) eco4_no_ciga_needed_cancellations = int(eco4_no_ciga_needed_or_ciga_passed - eco4_confirmed) @@ -3704,9 +3734,7 @@ def calculate_eco4_post_ciga( eco4_expected_cancellations = eco4_no_ciga_needed_cancellations + eco4_ciga_needed_cancellations else: - eco4_no_ciga_needed = eligiblity_counts[ - eligiblity_counts["ECO Eligibility"] == "eco4" - ]["count"].sum() + eco4_confirmed_ciga_failures = 0 # Multiply by sale conversion eco4_confirmed = np.round(eco4_no_ciga_needed * ha_eco4_to_sale_rate) @@ -3735,6 +3763,9 @@ def calculate_eco4_post_ciga( "ECO4 - post CIGA - £": eco4_post_ciga * eco4_rate, "Of which confirmed - £": eco4_confirmed * eco4_rate, "Of which forecast - £": eco4_remaining_forecast * eco4_rate, + # Archetype check failures + "Estimated total - failed archetype check - #": failed_archetype_check, + "Estimated total - failed archetype check - £": failed_archetype_check * eco4_rate, # Ciga failures "Estimated total - failed CIGA": int(eco4_confirmed_ciga_failures + eco4_estimated_ciga_failures), "Confirmed CIGA failures": eco4_confirmed_ciga_failures, @@ -3766,6 +3797,14 @@ def forecast_remaining_sales(loader): gbis_rate = 600 eco4_rate = 1710 + # Based on ONS https://www.ons.gov.uk/peoplepopulationandcommunity/housing/bulletins/housingenglandandwales + # /census2021 + # there are 5.7 million terraced properties in the UK, of the 19.3 million houses or bungalows. We therefore apply + # a 30% discount to homes that are dependent on an archetype check, since around 30% of them will be mid terraced + # This 30% is slightly harsh but we be conservative + # Therefore, the archetype check conversion rate is 70% + archetype_conversion_rate = 0.7 + # 1) Calculate the conversion rate from passed CIGA to actual sale converted_ciga_jobs = [] for ha_name, input_data in loader.data.items(): @@ -4010,13 +4049,27 @@ def forecast_remaining_sales(loader): eco4_pre_ciga = eligiblity_counts[ eligiblity_counts["ECO Eligibility"].isin( - ["eco4", "eco4 (subject to ciga)", "eco4 - passed ciga", "failed ciga"] + [ + "eco4", + "eco4 (subject to ciga)", + "eco4 - passed ciga", + "failed ciga", + "eco4 (subject to ciga) (subject to archetype)", + "eco4 (subject to archetype)" + ] ) ]["count"].sum() eco4_pre_ciga_remaining = eligiblity_counts_remaining[ eligiblity_counts_remaining["ECO Eligibility"].isin( - ["eco4", "eco4 (subject to ciga)", "eco4 - passed ciga", "failed ciga"] + [ + "eco4", + "eco4 (subject to ciga)", + "eco4 - passed ciga", + "failed ciga", + "eco4 (subject to ciga) (subject to archetype)", + "eco4 (subject to archetype)" + ] ) ]["count"].sum() @@ -4065,7 +4118,8 @@ def forecast_remaining_sales(loader): ha_ciga_conversion_rate=ha_ciga_conversion_rate, ha_ciga_pass_to_sale_rate=ha_ciga_pass_to_sale_rate, ha_eco4_to_sale_rate=ha_eco4_to_sale_rate, - eco4_rate=eco4_rate + eco4_rate=eco4_rate, + archetype_conversion_rate=archetype_conversion_rate ) eco4_post_ciga_remaining_results = calculate_eco4_post_ciga( @@ -4074,7 +4128,8 @@ def forecast_remaining_sales(loader): ha_ciga_conversion_rate=ha_ciga_conversion_rate, ha_ciga_pass_to_sale_rate=ha_ciga_pass_to_sale_rate, ha_eco4_to_sale_rate=ha_eco4_to_sale_rate, - eco4_rate=eco4_rate + eco4_rate=eco4_rate, + archetype_conversion_rate=archetype_conversion_rate ) # Calculate the delta compared to Warmfront's original remaining @@ -4111,6 +4166,8 @@ def forecast_remaining_sales(loader): gbis_remaining = int(np.round(gbis_remaining * ha_gbis_sale_conversion)) gbis_remaining_revenue = int(gbis_remaining * gbis_rate) + survey_list["installation_status"].value_counts() + # GBIS delta if original_warmfront_remaining_gbis == 0: gbis_delta_vs_original_estimate_remaining = "N/A" @@ -4176,7 +4233,7 @@ def forecast_remaining_sales(loader): surveys_with_eligibility["installation_status"] == "GBIS - cancelled" ].shape[0] - expected_gbis_unconfirmed_sales = incomplete_gbis_sales * ha_gbis_sale_conversion + expected_gbis_unconfirmed_sales = np.round(incomplete_gbis_sales * ha_gbis_sale_conversion) gbis_expected_cancellations = int(incomplete_gbis_sales - expected_gbis_unconfirmed_sales) @@ -4187,10 +4244,12 @@ def forecast_remaining_sales(loader): # Add in the variance: # We should expect that the pre-ciga total is: # 1) The number of post CIGA successes + + # 2) The number of archetype failures + # 2) the number of CIGA failures + # 3) The number of cancellations variance_total = eco4_pre_ciga - ( eco4_post_ciga_total_results["ECO4 - post CIGA - #"] + + eco4_post_ciga_total_results["Estimated total - failed archetype check - #"] + eco4_post_ciga_total_results['Estimated total - failed CIGA'] + eco4_post_ciga_total_results["Expected cancellations - #"] ) @@ -4199,6 +4258,7 @@ def forecast_remaining_sales(loader): variance_remaining = eco4_pre_ciga_remaining - ( eco4_post_ciga_remaining_results["ECO4 - post CIGA - #"] + + eco4_post_ciga_remaining_results["Estimated total - failed archetype check - #"] + eco4_post_ciga_remaining_results['Estimated total - failed CIGA'] + eco4_post_ciga_remaining_results["Expected cancellations - #"] ) @@ -4290,6 +4350,11 @@ def forecast_remaining_sales(loader): ("ECO4 Cancellations", "", "Of which expected cancellations - £", ""): eco4_post_ciga_remaining_results[ "Expected cancellations - £" ], + # Archetype check failures + ("ECO4 CIGA failures", "", "Estimated total - failed Archetype check - #", ""): + eco4_post_ciga_remaining_results['Estimated total - failed archetype check - #'], + ("ECO4 CIGA failures", "", "Estimated total - failed Archetype check - £", ""): + eco4_post_ciga_remaining_results['Estimated total - failed archetype check - £'], # CIGA failures ("ECO4 CIGA failures", "", "Estimated total - failed CIGA - #", ""): eco4_post_ciga_remaining_results[ 'Estimated total - failed CIGA' @@ -4324,7 +4389,7 @@ def forecast_remaining_sales(loader): } # Make sure nothing is forgotten due to duplicate multi-index keys - if len(to_append) != 47: + if len(to_append) != 49: raise ValueError("Something went wrong") results.append(to_append) From a7e593ecd9289551d7ef47481ea3dff0c2a70592 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sat, 9 Mar 2024 16:15:16 +0000 Subject: [PATCH 114/248] Added handling of archetype checks and corrected gbis calculations --- .../ha_15_32/ha_analysis_batch_3.py | 65 ++++++++++++++----- 1 file changed, 47 insertions(+), 18 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 9a959956..aca2ce43 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -4154,19 +4154,25 @@ def forecast_remaining_sales(loader): else: ha_gbis_sale_conversion = median_gbis_to_install - gbis_total = eligiblity_counts[ + gbis_total_pre_cancellations = eligiblity_counts[ eligiblity_counts["ECO Eligibility"] == "gbis" ]["count"].sum() - gbis_total = int(np.round(gbis_total * ha_gbis_sale_conversion)) - gbis_total_revenue = int(gbis_total * gbis_rate) - gbis_remaining = eligiblity_counts_remaining[ + gbis_total_pre_cancellations_revenue = gbis_total_pre_cancellations * gbis_rate + # gbis_total = int(np.round(gbis_total_pre_cancellations * ha_gbis_sale_conversion)) + # gbis_total_revenue = int(gbis_total * gbis_rate) + + gbis_remaining_pre_cancellations = eligiblity_counts_remaining[ eligiblity_counts_remaining["ECO Eligibility"] == "gbis" ]["count"].sum() - gbis_remaining = int(np.round(gbis_remaining * ha_gbis_sale_conversion)) + gbis_remaining_pre_cancellations_revenue = ( + gbis_remaining_pre_cancellations * gbis_rate + ) + # This is the gbis jobs we expect to sell + gbis_remaining = int(np.round(gbis_remaining_pre_cancellations * ha_gbis_sale_conversion)) gbis_remaining_revenue = int(gbis_remaining * gbis_rate) - - survey_list["installation_status"].value_counts() + # This is the number we expect to cancel + gbis_remaining_expected_cancellations = int(gbis_remaining_pre_cancellations - gbis_remaining) * gbis_rate # GBIS delta if original_warmfront_remaining_gbis == 0: @@ -4179,9 +4185,10 @@ def forecast_remaining_sales(loader): # Current sales figures # For any sales surveys that are complete, that could still cancel, we apply a conversion rate eco4_actually_sold = 0 - gbis_actually_sold = 0 eco4_confirmed_cancellations = 0 eco4_expected_cancellations = 0 + + gbis_actually_sold = 0 gbis_confirmed_cancellations = 0 gbis_expected_cancellations = 0 if not survey_list.empty: @@ -4284,17 +4291,30 @@ def forecast_remaining_sales(loader): raise ValueError("Something went wrong in pre_ciga_eco4_variance") # Check GBIS total variance - gbis_variance = ( - gbis_total_revenue - - gbis_actually_sold - - gbis_confirmed_cancellations * gbis_rate - - gbis_expected_cancellations * gbis_rate - - gbis_remaining_revenue + # The total before cancellations should equal: + # The number of sold + + # The number of confirmed cancelled + + # The number of expected cancelled + + # The number of remaining + gbis_variance = gbis_total_pre_cancellations - ( + gbis_actually_sold / gbis_rate + + gbis_confirmed_cancellations + + gbis_expected_cancellations + + gbis_remaining_pre_cancellations ) if gbis_variance != 0: raise ValueError("Something went wrong in gbis_variance") + # We expect the remaining to equal expected sales + expected cancellations + gbis_variance_2 = gbis_remaining_pre_cancellations - ( + gbis_remaining + + gbis_remaining_expected_cancellations + ) + + if gbis_variance_2 != 0: + raise ValueError("Something went wrong in gbis_variance") + to_append = { ("", "", "", "HA Name"): ha_name, # ECO4 - original warmfront figures @@ -4375,17 +4395,26 @@ def forecast_remaining_sales(loader): "Estimated CIGA failures - £" ], # GBIS postcode list - ("GBIS Postcode list", "Warmfront post code list", "Total - #", "GBIS total"): gbis_total, - ("GBIS Postcode list", "Warmfront post code list", "Total - £", "GBIS total"): gbis_total_revenue, + ("GBIS Postcode list", "Warmfront post code list", "Total - #", "GBIS total"): gbis_total_pre_cancellations, + ("GBIS Postcode list", "Warmfront post code list", "Total - £", "GBIS total"): + gbis_total_pre_cancellations_revenue, ("GBIS Postcode list", "Warmfront post code list", "GBIS VARIANCE", "GBIS total"): gbis_variance, ("GBIS Postcode list", "Warmfront post code list", "Sold - £", "GBIS total"): gbis_actually_sold, ("GBIS Postcode list", "", "Confirmed cancellations - £", ""): gbis_confirmed_cancellations * gbis_rate, # This is for jobs that are in-progress and could still cancel ("GBIS Postcode list", "", "Unconfirmed cancellations - £", ""): gbis_expected_cancellations * gbis_rate, - ("GBIS Postcode list", "Warmfront post code list", "Remaining - #", "GBIS total"): gbis_remaining, - ("GBIS Postcode list", "Warmfront post code list", "Remaining - £", "GBIS total"): gbis_remaining_revenue, + ("GBIS Postcode list", "Warmfront post code list", "Remaining - #", "GBIS total"): + gbis_remaining_pre_cancellations, + ("GBIS Postcode list", "Warmfront post code list", "Remaining - £", "GBIS total"): + gbis_remaining_pre_cancellations_revenue, ("GBIS Postcode list", "", "Delta vs original estimate, remaining - %", ""): gbis_delta_vs_original_estimate_remaining, + # Expected cancellations + ( + "GBIS Postcode list", "Of which expected sales - £", "Remaining - £", + "GBIS total"): gbis_remaining_revenue, + ("GBIS Postcode list", "Of which expected cancellations -£", "Remaining - £", "GBIS total"): + gbis_remaining_expected_cancellations } # Make sure nothing is forgotten due to duplicate multi-index keys From f9957a55d066a294e79efdf196b72e79d82689fb Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sat, 9 Mar 2024 16:19:54 +0000 Subject: [PATCH 115/248] fixed bug in gbis variance 2? --- etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index aca2ce43..a25f98c6 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -4172,7 +4172,8 @@ def forecast_remaining_sales(loader): gbis_remaining = int(np.round(gbis_remaining_pre_cancellations * ha_gbis_sale_conversion)) gbis_remaining_revenue = int(gbis_remaining * gbis_rate) # This is the number we expect to cancel - gbis_remaining_expected_cancellations = int(gbis_remaining_pre_cancellations - gbis_remaining) * gbis_rate + gbis_remaining_expected_cancellations = int(gbis_remaining_pre_cancellations - gbis_remaining) + gbis_remaining_expected_cancellations_revenue = gbis_remaining_expected_cancellations * gbis_rate # GBIS delta if original_warmfront_remaining_gbis == 0: @@ -4313,7 +4314,7 @@ def forecast_remaining_sales(loader): ) if gbis_variance_2 != 0: - raise ValueError("Something went wrong in gbis_variance") + raise ValueError("Something went wrong in gbis_variance2") to_append = { ("", "", "", "HA Name"): ha_name, @@ -4414,7 +4415,7 @@ def forecast_remaining_sales(loader): "GBIS Postcode list", "Of which expected sales - £", "Remaining - £", "GBIS total"): gbis_remaining_revenue, ("GBIS Postcode list", "Of which expected cancellations -£", "Remaining - £", "GBIS total"): - gbis_remaining_expected_cancellations + gbis_remaining_expected_cancellations_revenue } # Make sure nothing is forgotten due to duplicate multi-index keys From 1ccb2cdebdca9a2fc17f0b11ef431bac81309357 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sat, 9 Mar 2024 16:22:28 +0000 Subject: [PATCH 116/248] updated number of expected to append --- etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index a25f98c6..7ddc9844 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -4419,7 +4419,7 @@ def forecast_remaining_sales(loader): } # Make sure nothing is forgotten due to duplicate multi-index keys - if len(to_append) != 49: + if len(to_append) != 51: raise ValueError("Something went wrong") results.append(to_append) From 768a0385e3a2cf7fc29b86b827cfb43d914e4621 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sat, 9 Mar 2024 17:02:33 +0000 Subject: [PATCH 117/248] ha35 data read --- .../ha_15_32/ha_analysis_batch_3.py | 24 +++++++++++++++---- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 7ddc9844..ea0078c2 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -276,6 +276,13 @@ class DataLoader: asset_list["POST CODE"].astype(str).str.lower().str.strip() ) asset_list["matching_postcode"] = asset_list["POST CODE"].astype(str).str.lower().str.strip() + elif ha_name == "HA35": + asset_list["matching_address"] = asset_list["Address Line 1"].astype(str).str.lower().str.strip() + ", " + \ + asset_list["Address Line 2"].astype(str).str.lower().str.strip() + ", " + \ + asset_list["Address Line 3"].astype(str).str.lower().str.strip() + ", " + \ + asset_list["Address Line 4"].astype(str).str.lower().str.strip() + ", " + \ + asset_list["Address Post Code"].astype(str).str.lower().str.strip() + asset_list["matching_postcode"] = asset_list["Address Post Code"].astype(str).str.lower().str.strip() elif ha_name == "HA38": asset_list["matching_address"] = asset_list["House_Number"].astype(str).str.lower().str.strip() + ", " + \ asset_list["Address_Line_1"].astype(str).str.lower().str.strip() + ", " + \ @@ -1648,6 +1655,13 @@ class DataLoader: return survey_list + @staticmethod + def correct_ha35_survey_list(survey_list): + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "BALLADIER WLAK", "BALLADIER WALK" + ) + return survey_list + @staticmethod def levenstein_match(matching_string, df): match_to = df["matching_address"].tolist() @@ -4673,14 +4687,14 @@ def app(): # Add in: priority_has = [ - "HA1", "HA2", "HA6", "HA7", "HA12", "HA13", "HA14", "HA15", "HA16", "HA24", "HA25", "HA28", "HA32", "HA39", - "HA41", "HA48", "HA50", "HA63", "HA107", "HA117" + "HA1", "HA2", "HA6", "HA7", "HA12", "HA13", "HA14", "HA15", "HA16", "HA24", "HA25", "HA28", "HA32", "HA35", + "HA39", "HA41", "HA48", "HA50", "HA63", "HA107", "HA117" ] # Next HAs to do: 14 [DONE], 15[DONE], 32 [DONE], 33 [Input format is 4 parts and no eco4 jobs identified - come - # back on this], 28 [DONE], 41 [DONE], 50 [DONE], 48 [DONE], 2 [DONE], 63 [DONE], 12 [DONE], 117 [DONE] - # 13 [WIP] + # back on this], 28 [DONE], 41 [DONE], 50 [DONE], 48 [DONE], 2 [DONE], 63 [DONE], 12 [DONE], 117 [DONE], 13 [DONE] + # 35 [WIP] # Consider for ECO4: - # Consider for GBIS: 56, 35, 34 + # Consider for GBIS: 56, 34 # Ignore for now: # 38 [problematic, but no ECO4], 10 problematic (no eligibility), 20 has barely any in # Filter down the directories to only the priority HAs From 29f2a2abf801e4c01ad89383b18eaac4ed97b0af Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sat, 9 Mar 2024 17:09:43 +0000 Subject: [PATCH 118/248] HA35 done --- etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index ea0078c2..04ee343c 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -4691,8 +4691,9 @@ def app(): "HA39", "HA41", "HA48", "HA50", "HA63", "HA107", "HA117" ] # Next HAs to do: 14 [DONE], 15[DONE], 32 [DONE], 33 [Input format is 4 parts and no eco4 jobs identified - come - # back on this], 28 [DONE], 41 [DONE], 50 [DONE], 48 [DONE], 2 [DONE], 63 [DONE], 12 [DONE], 117 [DONE], 13 [DONE] - # 35 [WIP] + # back on this], 28 [DONE], 41 [DONE], 50 [DONE], 48 [DONE], 2 [DONE], 63 [DONE], 12 [DONE], 117 [DONE], 13 [DONE], + # 35 [DONE] + # 34 [WIP] # Consider for ECO4: # Consider for GBIS: 56, 34 # Ignore for now: From 6e4fc23ecc2036e14148b18611cb04aafde8084b Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sat, 9 Mar 2024 18:12:12 +0000 Subject: [PATCH 119/248] fixed dupes for HA34 --- .../ha_15_32/ha_analysis_batch_3.py | 104 +++++++++++++++++- 1 file changed, 98 insertions(+), 6 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 04ee343c..8784481b 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -276,6 +276,12 @@ class DataLoader: asset_list["POST CODE"].astype(str).str.lower().str.strip() ) asset_list["matching_postcode"] = asset_list["POST CODE"].astype(str).str.lower().str.strip() + elif ha_name == "HA34": + asset_list["matching_address"] = ( + asset_list[" Address"].astype(str).str.lower().str.strip() + ", " + + asset_list[" Postcode"].astype(str).str.lower().str.strip() + ) + asset_list["matching_postcode"] = asset_list[" Postcode"].astype(str).str.lower().str.strip() elif ha_name == "HA35": asset_list["matching_address"] = asset_list["Address Line 1"].astype(str).str.lower().str.strip() + ", " + \ asset_list["Address Line 2"].astype(str).str.lower().str.strip() + ", " + \ @@ -566,7 +572,8 @@ class DataLoader: eco3_list["eco3_list_row_id"] = [ha_name + "_Eco3_" + str(i) for i in range(0, len(eco3_list))] # Perform the eco3 merge - eco3_list = self.merge_eco3_to_assets(asset_list, eco3_list, ha_name) + if not eco3_list.empty: + eco3_list = self.merge_eco3_to_assets(asset_list, eco3_list, ha_name) if ha_name in ["HA25"]: # Accomodate ha25 unique structure @@ -1657,9 +1664,94 @@ class DataLoader: @staticmethod def correct_ha35_survey_list(survey_list): - survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( - "BALLADIER WLAK", "BALLADIER WALK" + return survey_list + + @staticmethod + def correct_ha34_survey_list(survey_list): + # Note in the asset list + survey_list = survey_list[ + survey_list["Post Code"] != "L5 3SS" + ] + + survey_list["Post Code"] = survey_list["Post Code"].str.replace( + "L177DR", "L17 7DR" ) + + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "PENVALLEY CRESENT", "Penvalley Crescent" + ) + + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "PENLINKEN DRIVE", "Penlinken Drive" + ) + + # There's no 32 Penlinken Drive in the asset sheet + survey_list = survey_list[ + ~((survey_list["Street / Block Name"] == "Penlinken Drive") & + (survey_list["NO."] == 32)) + ] + + # There's no 30 Gwent Street in the asset sheet + survey_list = survey_list[ + ~((survey_list["Street / Block Name"] == "GWENT ST") & + (survey_list["NO."] == 30)) + ] + + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "POULTON RD", "Poulton Road" + ) + + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "ST PAULS RD", "St Pauls Road" + ) + + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "BROAD LANE, KIRKBY", "BROAD LANE" + ) + + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "BULLENS RD, KIRKBY", "Bullens Road" + ) + + # There's no 219 NORTH HILL ST in the asset sheet + survey_list = survey_list[ + ~((survey_list["Street / Block Name"] == "NORTH HILL ST") & + (survey_list["NO."] == 219)) + ] + + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "CROSLAND RD, KIRKBY", "CROSLAND ROAD" + ) + + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "PARK BROW DRIVE, KIRKBY", "Park Brow Drive" + ) + + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "CELTIC TREET", "Celtic Street" + ) + + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "BUCKLAND ROAD", "Buckland Street" + ) + + # duplicates + survey_list = survey_list.drop_duplicates(["Street / Block Name", "NO.", "Post Code"]) + + # This is a duplicate with wrong postcode + survey_list = survey_list[ + ~((survey_list["Street / Block Name"] == "CLARIBEL STREET") & + (survey_list["NO."] == 7) & + (survey_list["Post Code"] == "L8 8AF")) + ] + + survey_list["NO."] = np.where( + ((survey_list["NO."] == "187 A") & + (survey_list["Post Code"] == "L32 6QF")), + "187A", + survey_list["NO."] + ) + return survey_list @staticmethod @@ -1685,7 +1777,7 @@ class DataLoader: survey_list = survey_list_correction_function(survey_list) missed_postcodes = [] - if ha_name == "HA6": + if ha_name in ["HA6", "HA34"]: missed_postcodes = [ postcode.lower() for postcode in survey_list["Post Code"] if postcode.lower() not in asset_list["matching_postcode"].values @@ -4687,8 +4779,8 @@ def app(): # Add in: priority_has = [ - "HA1", "HA2", "HA6", "HA7", "HA12", "HA13", "HA14", "HA15", "HA16", "HA24", "HA25", "HA28", "HA32", "HA35", - "HA39", "HA41", "HA48", "HA50", "HA63", "HA107", "HA117" + "HA1", "HA2", "HA6", "HA7", "HA12", "HA13", "HA14", "HA15", "HA16", "HA24", "HA25", "HA28", "HA32", "HA34", + "HA35", "HA39", "HA41", "HA48", "HA50", "HA63", "HA107", "HA117" ] # Next HAs to do: 14 [DONE], 15[DONE], 32 [DONE], 33 [Input format is 4 parts and no eco4 jobs identified - come # back on this], 28 [DONE], 41 [DONE], 50 [DONE], 48 [DONE], 2 [DONE], 63 [DONE], 12 [DONE], 117 [DONE], 13 [DONE], From 27fed2dce320a54a049df279fca5c3abd407275f Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sat, 9 Mar 2024 18:25:22 +0000 Subject: [PATCH 120/248] temp removed HA34 due to issue --- etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 8784481b..d1f8d546 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -2270,6 +2270,11 @@ class DataLoader: "ECO4 AFFORDABLE WARMTH": "ECO4", "Affordable Warmth": "ECO4", "ECO4 GBIS (ECO+) JJC UNDER 73m² ": "GBIS", + "ECO4 PPS": "ECO4", + "AFFORDABLE WARMTH / REMEDIAL": "ECO4", + "AFF0RDALE WARMTH": "ECO4", + "ECO 4 RdSAP CL": "ECO4", + "Affordable Warmth (R) ": "ECO4" } # Since it seems like "subject to archetype check" has some failure conditions, for simplicity, we @@ -4779,15 +4784,17 @@ def app(): # Add in: priority_has = [ - "HA1", "HA2", "HA6", "HA7", "HA12", "HA13", "HA14", "HA15", "HA16", "HA24", "HA25", "HA28", "HA32", "HA34", + "HA1", "HA2", "HA6", "HA7", "HA12", "HA13", "HA14", "HA15", "HA16", "HA24", "HA25", "HA28", "HA32", + # "HA34", "HA35", "HA39", "HA41", "HA48", "HA50", "HA63", "HA107", "HA117" ] # Next HAs to do: 14 [DONE], 15[DONE], 32 [DONE], 33 [Input format is 4 parts and no eco4 jobs identified - come # back on this], 28 [DONE], 41 [DONE], 50 [DONE], 48 [DONE], 2 [DONE], 63 [DONE], 12 [DONE], 117 [DONE], 13 [DONE], # 35 [DONE] - # 34 [WIP] + # [WIP] # Consider for ECO4: - # Consider for GBIS: 56, 34 + # Consider for GBIS: 56 + # 34 [bug in the results so leaving out for the moment] # Ignore for now: # 38 [problematic, but no ECO4], 10 problematic (no eligibility), 20 has barely any in # Filter down the directories to only the priority HAs From 28434f43c8fd9dac176fd68a1b4e20a79a128e9d Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sun, 10 Mar 2024 13:55:44 +0000 Subject: [PATCH 121/248] ha56 wip --- .../ha_15_32/ha_analysis_batch_3.py | 90 +++++++++++++++++-- 1 file changed, 83 insertions(+), 7 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index d1f8d546..064ff8f5 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -317,6 +317,12 @@ class DataLoader: asset_list["matching_address"] = asset_list["Address Line 1"].astype(str).str.lower().str.strip() + ", " + \ asset_list["Post Code"].astype(str).str.lower().str.strip() asset_list["matching_postcode"] = asset_list["Post Code"].astype(str).str.lower().str.strip() + elif ha_name == "HA56": + asset_list["matching_address"] = asset_list["Address 1"].astype(str).str.lower().str.strip() + ", " + \ + asset_list["Address 2"].astype(str).str.lower().str.strip() + ", " + \ + asset_list["Address 3"].astype(str).str.lower().str.strip() + ", " + \ + asset_list["Post Code"].astype(str).str.lower().str.strip() + asset_list["matching_postcode"] = asset_list["Post Code"].astype(str).str.lower().str.strip() elif ha_name == "HA63": asset_list["matching_address"] = asset_list["Address1"].astype(str).str.lower().str.strip() + ", " + \ asset_list["POSTCODE"].astype(str).str.lower().str.strip() @@ -639,6 +645,54 @@ class DataLoader: return asset_list + @staticmethod + def correct_ha56_asset_list(asset_list): + # CH1 4JR has already been surveyed, but it's listed in the asset list + # as a single row, when it's actually 32 units, so we just set this + # as ineligible + asset_list["ECO Eligibility"] = np.where( + asset_list["Post Code"] == "CH1 4JR", + "Not eligible", + asset_list["ECO Eligibility"] + ) + + # Same for CW8 3EU + asset_list["ECO Eligibility"] = np.where( + asset_list["Post Code"] == "CW8 3EU", + "Not eligible", + asset_list["ECO Eligibility"] + ) + + asset_list["ECO Eligibility"] = np.where( + asset_list["Post Code"] == "CW1 3HP", + "Not eligible", + asset_list["ECO Eligibility"] + ) + + asset_list["ECO Eligibility"] = np.where( + asset_list["Post Code"] == "WA4 2PH", + "Not eligible", + asset_list["ECO Eligibility"] + ) + + asset_list["ECO Eligibility"] = np.where( + asset_list["Post Code"] == "BD6 1QJ", + "Not eligible", + asset_list["ECO Eligibility"] + ) + + asset_list["ECO Eligibility"] = np.where( + asset_list["Post Code"] == "L39 1RS", + "Not eligible", + asset_list["ECO Eligibility"] + ) + + asset_list["ECO Eligibility"] = np.where( + asset_list["Post Code"] == "WA10 2DE", + "Not eligible", + asset_list["ECO Eligibility"] + ) + @staticmethod def correct_ha14_asset_list(asset_list): @@ -1970,6 +2024,24 @@ class DataLoader: return eco3_list + @staticmethod + def correct_ha56_eco3_list(eco3_list): + eco3_list = eco3_list[~pd.isnull(eco3_list["Post Code"])] + + eco3_list["Street / Block Name"] = eco3_list["Street / Block Name"].str.replace( + "Mount Pleasant, Crewe", "Mount Pleasant" + ) + + eco3_list["Street / Block Name"] = eco3_list["Street / Block Name"].str.replace( + "Dutton Close", "Dutton Way" + ) + + eco3_list["Post Code"] = eco3_list["Post Code"].str.replace( + "Ls63nl", "LS6 3NL" + ) + + return eco3_list + def merge_eco3_to_assets(self, asset_list, eco3_list, ha_name): eco3_list_correction_function = getattr(self, f"correct_{ha_name.lower()}_eco3_list") @@ -1978,8 +2050,8 @@ class DataLoader: asset_list["matching_postcode_nospace"] = asset_list["matching_postcode"].str.replace(" ", "").str.lower() eco3_list["postcode_no_space"] = eco3_list["Post Code"].str.lower().str.replace(" ", "") - if ha_name == "HA25": - # 317 -> 259 + if ha_name in ["HA25", "HA56"]: + # HA25: 317 -> 259 missed_postcodes = { postcode for postcode in eco3_list["postcode_no_space"] if postcode not in asset_list["matching_postcode_nospace"].values @@ -2060,6 +2132,7 @@ class DataLoader: raise ValueError( f"Unmatched addresses for {ha_name} is not as expected, got {len(missed)} unmatched" ) + missed_df = eco3_list[eco3_list["eco3_list_row_id"].isin(missed)] matching_lookup = pd.DataFrame(matching_lookup) # Check dupes as this will cause problems later on @@ -3896,6 +3969,9 @@ def calculate_eco4_post_ciga( def forecast_remaining_sales(loader): + # TODO: Skip HA34 for the moment + loader.data = {k: v for k, v in loader.data.items() if k != "HA34"} + # Assumptions: # We cap the ciga conversion rate at 75% because I expect future HAs to have a lower CIGA conversion rate # and I don't want the numbers to change too much, depenent on the CIGA conversation rate @@ -4523,9 +4599,9 @@ def forecast_remaining_sales(loader): gbis_delta_vs_original_estimate_remaining, # Expected cancellations ( - "GBIS Postcode list", "Of which expected sales - £", "Remaining - £", + "GBIS Postcode list", "", "Of which expected sales - £ - £", "GBIS total"): gbis_remaining_revenue, - ("GBIS Postcode list", "Of which expected cancellations -£", "Remaining - £", "GBIS total"): + ("GBIS Postcode list", "", "Of which expected cancellations -£", "GBIS total"): gbis_remaining_expected_cancellations_revenue } @@ -4786,14 +4862,14 @@ def app(): priority_has = [ "HA1", "HA2", "HA6", "HA7", "HA12", "HA13", "HA14", "HA15", "HA16", "HA24", "HA25", "HA28", "HA32", # "HA34", - "HA35", "HA39", "HA41", "HA48", "HA50", "HA63", "HA107", "HA117" + "HA35", "HA39", "HA41", "HA48", "HA50", "HA56", "HA63", "HA107", "HA117" ] # Next HAs to do: 14 [DONE], 15[DONE], 32 [DONE], 33 [Input format is 4 parts and no eco4 jobs identified - come # back on this], 28 [DONE], 41 [DONE], 50 [DONE], 48 [DONE], 2 [DONE], 63 [DONE], 12 [DONE], 117 [DONE], 13 [DONE], # 35 [DONE] - # [WIP] + # 56 [WIP] # Consider for ECO4: - # Consider for GBIS: 56 + # Consider for GBIS: # 34 [bug in the results so leaving out for the moment] # Ignore for now: # 38 [problematic, but no ECO4], 10 problematic (no eligibility), 20 has barely any in From db7b6de87bfb13486a179cbdc547ae375cfc0c8d Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sun, 10 Mar 2024 14:13:20 +0000 Subject: [PATCH 122/248] handle HA56 dupes --- etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 064ff8f5..62099386 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -189,6 +189,7 @@ class DataLoader: "HA25": 154, "HA41": 26, "HA50": 5, + "HA56": 320, "HA63": 0, "HA117": 4 } @@ -693,6 +694,8 @@ class DataLoader: asset_list["ECO Eligibility"] ) + return asset_list + @staticmethod def correct_ha14_asset_list(asset_list): @@ -2040,6 +2043,14 @@ class DataLoader: "Ls63nl", "LS6 3NL" ) + # Handle a duplicate + eco3_list = eco3_list[ + ~((eco3_list["Street / Block Name"] == "Mount Pleasant") & + (eco3_list["Post Code"] == "CW1 3JF") & + (eco3_list["NO "] == 5) & + (eco3_list["INSTALL/ CANCELLATION DATE"] == "CANCELLED 20.5.2022")) + ] + return eco3_list def merge_eco3_to_assets(self, asset_list, eco3_list, ha_name): @@ -2128,15 +2139,16 @@ class DataLoader: # HA25 contains 119 missed entries. These are actually 24 unique postcodes, and the majority belong to 2 # where many surveys were conducted on house numbers, not in the asset list # 154 missed, 2827 matched for HA 25 + # For HA56, the number of missed is high at 320, however a big portion of these are due to the block being + # listed in the asset list, and individual units being in the survey list if len(missed) != self.UNMATCHED_ECO3[ha_name]: raise ValueError( f"Unmatched addresses for {ha_name} is not as expected, got {len(missed)} unmatched" ) - missed_df = eco3_list[eco3_list["eco3_list_row_id"].isin(missed)] matching_lookup = pd.DataFrame(matching_lookup) # Check dupes as this will cause problems later on - if matching_lookup["asset_list_row_id"].duplicated().any(): + if matching_lookup["asset_list_row_id"].duplicated().sum(): raise ValueError("Duplicated asset list row ids") # Merge onto eco3 list From 8b3f4d3a520f9148195c6fbd55d3b1d7354d0ee1 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sun, 10 Mar 2024 14:25:47 +0000 Subject: [PATCH 123/248] ha56 survey list matching --- .../ha_15_32/ha_analysis_batch_3.py | 54 +++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 62099386..f9bf3856 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -694,6 +694,20 @@ class DataLoader: asset_list["ECO Eligibility"] ) + # Already surveyed under ECO4 + asset_list["ECO Eligibility"] = np.where( + asset_list["Post Code"] == "SK17 6NR", + "Not eligible", + asset_list["ECO Eligibility"] + ) + + asset_list["ECO Eligibility"] = np.where( + ~((asset_list["Post Code"] == "WA5 0EN") & + (asset_list["Address 1"] == "Block 17-26 Tavlin Avenue")), + "Not eligible", + asset_list["ECO Eligibility"] + ) + return asset_list @staticmethod @@ -1811,6 +1825,29 @@ class DataLoader: return survey_list + @staticmethod + def correct_ha56_survey_list(survey_list): + # Not in asset list + survey_list = survey_list[ + ~((survey_list["Street / Block Name"] == "Samual Street") & + (survey_list["NO."].isin([22, 24])) & + (survey_list["Post Code"] == "WA5 1BB")) + ] + + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "STOURTON RD", "Stourton Road" + ) + + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "BIRKIN RD", "Birkin Road" + ) + + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "PORTLAND RD", "Portland Road" + ) + + return survey_list + @staticmethod def levenstein_match(matching_string, df): match_to = df["matching_address"].tolist() @@ -1843,6 +1880,10 @@ class DataLoader: if ha_name == "HA13": missed_postcodes = ["hp17 8le"] + if ha_name == "HA56": + # Multiple properties are listed as blocks, which is a problem for matching + missed_postcodes = ["sk17 6nr", "wa5 0en"] + matching_lookup = [] for _, row in tqdm(survey_list.iterrows(), total=len(survey_list)): @@ -1890,6 +1931,19 @@ class DataLoader: df = df[df["HouseNo"].astype(str).str.lower() == str(house_number)] if df.shape[0] != 1: df = df[df["matching_postcode"].str.lower().str.contains(row["Post Code"].lower().strip())] + + if df.empty: + + postcode_lower = row["Post Code"].lower() + if postcode_lower in missed_postcodes: + matching_lookup.append( + { + "survey_list_row_id": row["survey_list_row_id"], + "asset_list_row_id": None, + } + ) + continue + if df.shape[0] != 1: if "Town/Area" not in row.keys(): full_key = (str(row["NO."]).lower().strip() + row["Street / Block Name"].lower().strip() + From 4a6711a1403a8661b467a0f7023151829e305822 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sun, 10 Mar 2024 14:35:08 +0000 Subject: [PATCH 124/248] handling ha56 dupes| --- etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index f9bf3856..0030af9d 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -1846,6 +1846,13 @@ class DataLoader: "PORTLAND RD", "Portland Road" ) + # We remove a row, because two rows match to a block listing + survey_list = survey_list[ + ~((survey_list["Street / Block Name"] == "Tavlin Avenue") & + (survey_list["NO."] == 17) & + (survey_list["Post Code"] == "WA5 0EN")) + ] + return survey_list @staticmethod From ba65b6c8e37e5a44492c3342a05513d05d275ac4 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sun, 10 Mar 2024 14:39:15 +0000 Subject: [PATCH 125/248] fixed bug in asset list cleaning --- etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 0030af9d..b1eda326 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -702,8 +702,8 @@ class DataLoader: ) asset_list["ECO Eligibility"] = np.where( - ~((asset_list["Post Code"] == "WA5 0EN") & - (asset_list["Address 1"] == "Block 17-26 Tavlin Avenue")), + ((asset_list["Post Code"] == "WA5 0EN") & + (asset_list["Address 1"] == "Block 17-26 Tavlin Avenue")), "Not eligible", asset_list["ECO Eligibility"] ) From 5eb938bf54fbaaf52bb72e7c8972bad5e2d58a46 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sun, 10 Mar 2024 15:40:02 +0000 Subject: [PATCH 126/248] ha18 done --- .../ha_15_32/ha_analysis_batch_3.py | 28 +++++++++++++++++-- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index b1eda326..676bd613 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -249,6 +249,20 @@ class DataLoader: asset_list["Postcode"].astype(str).str.lower().str.strip() ) asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip() + elif ha_name == "HA18": + asset_list["matching_address"] = ( + asset_list["Address"].astype(str).str.lower().str.strip() + ", " + + asset_list["Post Code"].astype(str).str.lower().str.strip() + ) + asset_list["matching_postcode"] = asset_list["Post Code"].astype(str).str.lower().str.strip() + elif ha_name == "HA19": + asset_list["matching_address"] = ( + asset_list["Address1"].astype(str).str.lower().str.strip() + ", " + + asset_list["Address2"].astype(str).str.lower().str.strip() + ", " + + asset_list["Address3"].astype(str).str.lower().str.strip() + ", " + + asset_list["Postcode"].astype(str).str.lower().str.strip() + ) + asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip() elif ha_name == "HA25": asset_list["matching_address"] = asset_list[ self.COLUMN_CONFIG[ha_name]["address"] @@ -495,6 +509,8 @@ class DataLoader: return "CIGA checks" elif "CIGA check" in workbook.sheetnames: return "CIGA check" + elif "CIGA Check" in workbook.sheetnames: + return "CIGA Check" elif "CIGA requested" in workbook.sheetnames: return "CIGA requested" else: @@ -1733,6 +1749,10 @@ class DataLoader: return survey_list + @staticmethod + def correct_ha18_survey_list(survey_list): + return survey_list + @staticmethod def correct_ha35_survey_list(survey_list): return survey_list @@ -2435,6 +2455,7 @@ class DataLoader: "eco4 (subject to ciga)": "eco4 (subject to ciga)", "eco4(subject to ciga)": "eco4 (subject to ciga)", "eco4 subject to ciga": "eco4 (subject to ciga)", + "eco4 (subject to archetype/ciga)": "eco4 (subject to ciga) (subject to archetype)", } ha_facts_and_figures = [] @@ -4933,14 +4954,15 @@ def app(): # Add in: priority_has = [ - "HA1", "HA2", "HA6", "HA7", "HA12", "HA13", "HA14", "HA15", "HA16", "HA24", "HA25", "HA28", "HA32", + "HA1", "HA2", "HA6", "HA7", "HA12", "HA13", "HA14", "HA15", "HA16", "HA18", + "HA19", "HA24", "HA25", "HA28", "HA32", # "HA34", "HA35", "HA39", "HA41", "HA48", "HA50", "HA56", "HA63", "HA107", "HA117" ] # Next HAs to do: 14 [DONE], 15[DONE], 32 [DONE], 33 [Input format is 4 parts and no eco4 jobs identified - come # back on this], 28 [DONE], 41 [DONE], 50 [DONE], 48 [DONE], 2 [DONE], 63 [DONE], 12 [DONE], 117 [DONE], 13 [DONE], - # 35 [DONE] - # 56 [WIP] + # 35 [DONE], 56 [DONE], 19 [DONE] + # # Consider for ECO4: # Consider for GBIS: # 34 [bug in the results so leaving out for the moment] From 5b39cf138df458b749d13fd100de011e6f3ac350 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sun, 10 Mar 2024 15:52:33 +0000 Subject: [PATCH 127/248] ha9 data load --- etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 676bd613..88ab706b 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -173,6 +173,7 @@ class DataLoader: UNMATCHED_CIGA = { "HA2": 0, "HA6": 117, + "HA9": 0, "HA12": 6, "HA13": 119, "HA14": 3, @@ -226,6 +227,14 @@ class DataLoader: asset_list["Address3"].astype(str).str.lower().str.strip() + ", " + \ asset_list["Postcode"].astype(str).str.lower().str.strip() asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip() + elif ha_name == "HA9": + asset_list["matching_address"] = asset_list["House Number"].astype(str).str.lower().str.strip() + ", " + \ + asset_list["Address Line 1"].astype(str).str.lower().str.strip() + ", " + \ + asset_list["Address Line 2"].astype(str).str.lower().str.strip() + ", " + \ + asset_list["Address Line 3"].astype(str).str.lower().str.strip() + ", " + \ + asset_list["Address Line 4"].astype(str).str.lower().str.strip() + ", " + \ + asset_list["Postcode"].astype(str).str.lower().str.strip() + asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip() elif ha_name == "HA13": asset_list["matching_address"] = asset_list["Address 1"].astype(str).str.lower().str.strip() + ", " + \ asset_list["address 2"].astype(str).str.lower().str.strip() + ", " + \ @@ -430,7 +439,7 @@ class DataLoader: :return: """ - if ha_name in ["HA107"]: + if ha_name == "HA107": asset_list["HouseNo"] = asset_list["House No"].copy() elif ha_name == "HA32": asset_list["HouseNo"] = asset_list["Dwelling num"].copy() @@ -438,6 +447,8 @@ class DataLoader: asset_list["HouseNo"] = asset_list["House Number"].copy() elif ha_name == "HA38": asset_list["HouseNo"] = asset_list["House_Number"].copy() + elif ha_name == "HA9": + asset_list["HouseNo"] = asset_list["House Number"].copy() else: split_addresses = asset_list['matching_address'].str.split(',', expand=True) house_numbers = split_addresses[0].str.split(' ', expand=True) @@ -4954,7 +4965,7 @@ def app(): # Add in: priority_has = [ - "HA1", "HA2", "HA6", "HA7", "HA12", "HA13", "HA14", "HA15", "HA16", "HA18", + "HA1", "HA2", "HA6", "HA7", "HA9", "HA12", "HA13", "HA14", "HA15", "HA16", "HA18", "HA19", "HA24", "HA25", "HA28", "HA32", # "HA34", "HA35", "HA39", "HA41", "HA48", "HA50", "HA56", "HA63", "HA107", "HA117" From efbda5cece019d8518b770c0ace444c1179a1d6a Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sun, 10 Mar 2024 16:09:08 +0000 Subject: [PATCH 128/248] ha27 complete --- etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 88ab706b..fba30f1f 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -280,6 +280,12 @@ class DataLoader: asset_list["matching_postcode"] = asset_list['matching_address'].apply( lambda x: ' '.join(x.split()[-2:]) if pd.notnull(x) else x ) + elif ha_name == "HA27": + asset_list["matching_address"] = ( + asset_list[" Address"].astype(str).str.lower().str.strip() + ", " + + asset_list[" Postcode"].astype(str).str.lower().str.strip() + ) + asset_list["matching_postcode"] = asset_list[" Postcode"].astype(str).str.lower().str.strip() elif ha_name == "HA28": asset_list["matching_address"] = ( asset_list["House Number"].astype(str).str.lower().str.strip() + ", " + @@ -582,7 +588,7 @@ class DataLoader: # For HA1 and HA25, there is an exception in the structure of the data. We don't have any survey or ciga # lists, and so # we can return the asset list now - if ha_name in ["HA1"]: + if ha_name in ["HA1", "HA27"]: return asset_list, pd.DataFrame(), pd.DataFrame(), pd.DataFrame() # If we have ECO3 surveys, we need to match them, because any properties treated under ECO3 won't be @@ -4966,13 +4972,13 @@ def app(): # Add in: priority_has = [ "HA1", "HA2", "HA6", "HA7", "HA9", "HA12", "HA13", "HA14", "HA15", "HA16", "HA18", - "HA19", "HA24", "HA25", "HA28", "HA32", + "HA19", "HA24", "HA25", "HA27", "HA28", "HA32", # "HA34", "HA35", "HA39", "HA41", "HA48", "HA50", "HA56", "HA63", "HA107", "HA117" ] # Next HAs to do: 14 [DONE], 15[DONE], 32 [DONE], 33 [Input format is 4 parts and no eco4 jobs identified - come # back on this], 28 [DONE], 41 [DONE], 50 [DONE], 48 [DONE], 2 [DONE], 63 [DONE], 12 [DONE], 117 [DONE], 13 [DONE], - # 35 [DONE], 56 [DONE], 19 [DONE] + # 35 [DONE], 56 [DONE], 19 [DONE], 18 [DONE], 9 [DONE], 27 DONE # # Consider for ECO4: # Consider for GBIS: From 22f3aca336abafc164439f00ddbdf34649f4f28a Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sun, 10 Mar 2024 16:26:42 +0000 Subject: [PATCH 129/248] ha30 32% matched --- .../ha_15_32/ha_analysis_batch_3.py | 29 +++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index fba30f1f..bdb0d0c4 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -164,6 +164,10 @@ class DataLoader: "address": "T1_Address", "postcode": "matching_postcode" }, + "HA30": { + "address": "A_Address", + "postcode": "A_Postcode" + }, "HA48": { "address": "Full Address", "postcode": "Postcode" @@ -207,7 +211,7 @@ class DataLoader: def create_asset_list_matching_address(self, ha_name, asset_list): - if ha_name in ["HA1", "HA6", "HA12", "HA16", "HA24", "HA48"]: + if ha_name in ["HA1", "HA6", "HA12", "HA16", "HA24", "HA30", "HA48"]: asset_list["matching_address"] = asset_list[ self.COLUMN_CONFIG[ha_name]["address"] ].astype(str).str.lower().str.strip() @@ -1892,6 +1896,27 @@ class DataLoader: return survey_list + @staticmethod + def correct_ha30_survey_list(survey_list): + + survey_list = survey_list[~pd.isnull(survey_list["Post Code"])] + + # Split on / and take the first half + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.split("/").str[0] + + # Not in the asset list + survey_list = survey_list[ + ~((survey_list["Street / Block Name"] == "Horsebridge Road") & + (survey_list["NO."] == 286)) + ] + + survey_list = survey_list[ + ~((survey_list["Street / Block Name"] == "DUTTON WAY") & + (survey_list["NO."] == 9)) + ] + + return survey_list + @staticmethod def levenstein_match(matching_string, df): match_to = df["matching_address"].tolist() @@ -4972,7 +4997,7 @@ def app(): # Add in: priority_has = [ "HA1", "HA2", "HA6", "HA7", "HA9", "HA12", "HA13", "HA14", "HA15", "HA16", "HA18", - "HA19", "HA24", "HA25", "HA27", "HA28", "HA32", + "HA19", "HA24", "HA25", "HA27", "HA28", "HA30", "HA32", # "HA34", "HA35", "HA39", "HA41", "HA48", "HA50", "HA56", "HA63", "HA107", "HA117" ] From cd81c2b0b29a65b3fd3c59ec5dec7730afdd64ec Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sun, 10 Mar 2024 16:45:59 +0000 Subject: [PATCH 130/248] done ha30 matching --- .../ha_15_32/ha_analysis_batch_3.py | 68 +++++++++++++++++++ 1 file changed, 68 insertions(+) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index bdb0d0c4..71062b16 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -1915,6 +1915,74 @@ class DataLoader: (survey_list["NO."] == 9)) ] + survey_list = survey_list[ + ~((survey_list["Street / Block Name"] == "PAYTHORNE CLOSE") & + (survey_list["NO."] == 10)) + ] + + survey_list = survey_list[ + ~((survey_list["Street / Block Name"] == "MARCHWOOD ROAD") & + (survey_list["NO."] == 11)) + ] + + survey_list = survey_list[ + ~((survey_list["Street / Block Name"] == "Otterburn Close") & + (survey_list["NO."] == 4)) + ] + + survey_list = survey_list[ + ~((survey_list["Street / Block Name"] == "Blossom Court") & + (survey_list["NO."] == 5)) + ] + + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "St LUKES CLOSE , HUNTINGDON", "St. Lukes Close" + ) + + survey_list = survey_list[ + ~((survey_list["Street / Block Name"] == "St. Lukes Close") & + (survey_list["NO."].isin([4, 7, 8]))) + ] + + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "ROMAN WAY , GODMANCHESTER , HUNTINGDON", "Roman Way" + ) + + survey_list = survey_list[ + ~((survey_list["Street / Block Name"] == "Roman Way") & + (survey_list["NO."].isin([58]))) + ] + + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "HEADLANDS , FENSTANTON , HUNTINGDON", "Headlands Fenstanton" + ) + + survey_list = survey_list[ + ~((survey_list["Street / Block Name"] == "Headlands Fenstanton") & + (survey_list["NO."].isin([126, 134]))) + ] + + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "WALLACE COURT , HUNTINGDON", "Wallace Court" + ) + + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "CRICKETERS WAY , CHATTERIS", "Cricketers Way" + ) + + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "Jubilee Gardens", "Jubilee Green" + ) + + survey_list = survey_list[ + ~((survey_list["Street / Block Name"] == "Harrow Road") & + (survey_list["NO."].isin([10]))) + ] + + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "ST LUKES CLOSE", "St. Lukes Close" + ) + return survey_list @staticmethod From 2810316e22ffe4662ae40c2c3bb9bee2f6af6f83 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sun, 10 Mar 2024 17:14:22 +0000 Subject: [PATCH 131/248] handled bug for HA30 --- etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 71062b16..1ee40dde 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -2566,6 +2566,7 @@ class DataLoader: "eco4(subject to ciga)": "eco4 (subject to ciga)", "eco4 subject to ciga": "eco4 (subject to ciga)", "eco4 (subject to archetype/ciga)": "eco4 (subject to ciga) (subject to archetype)", + "eco4( subject to ciga/archetype)": "eco4 (subject to ciga) (subject to archetype)", } ha_facts_and_figures = [] @@ -2716,11 +2717,13 @@ class DataLoader: asset_list = asset_list.merge(survey_list_to_merge, how='left', on="asset_list_row_id") # Update the cases where properties have sold, but are missing a CIGA check + # If we don't have a CIGA list, we set the value to ECO4 + set_to = "eco4 - passed ciga" if not ciga_list.empty else "eco4" asset_list["ECO Eligibility"] = np.where( (asset_list["ECO Eligibility"].str.contains("subject to ciga")) & ( asset_list["has_a_survey_record"] == True ), - "eco4 - passed ciga", + set_to, asset_list["ECO Eligibility"] ) # Update the cases where a property has been marked as eligible for GBIS, but sold for ECO4 @@ -4122,7 +4125,6 @@ def calculate_eco4_post_ciga( eco4_expected_cancellations = eco4_no_ciga_needed_cancellations + eco4_ciga_needed_cancellations else: - eco4_confirmed_ciga_failures = 0 # Multiply by sale conversion eco4_confirmed = np.round(eco4_no_ciga_needed * ha_eco4_to_sale_rate) From e15b977930c1b65ab39099c8c6a92d05039e96af Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sun, 10 Mar 2024 17:25:07 +0000 Subject: [PATCH 132/248] fixed ha34, completed 30 --- etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 1ee40dde..7d35386d 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -2550,7 +2550,8 @@ class DataLoader: "AFFORDABLE WARMTH / REMEDIAL": "ECO4", "AFF0RDALE WARMTH": "ECO4", "ECO 4 RdSAP CL": "ECO4", - "Affordable Warmth (R) ": "ECO4" + "Affordable Warmth (R) ": "ECO4", + "Affordable Warmth ": "ECO4" } # Since it seems like "subject to archetype check" has some failure conditions, for simplicity, we @@ -4175,9 +4176,6 @@ def calculate_eco4_post_ciga( def forecast_remaining_sales(loader): - # TODO: Skip HA34 for the moment - loader.data = {k: v for k, v in loader.data.items() if k != "HA34"} - # Assumptions: # We cap the ciga conversion rate at 75% because I expect future HAs to have a lower CIGA conversion rate # and I don't want the numbers to change too much, depenent on the CIGA conversation rate @@ -5066,18 +5064,15 @@ def app(): # Add in: priority_has = [ - "HA1", "HA2", "HA6", "HA7", "HA9", "HA12", "HA13", "HA14", "HA15", "HA16", "HA18", - "HA19", "HA24", "HA25", "HA27", "HA28", "HA30", "HA32", - # "HA34", - "HA35", "HA39", "HA41", "HA48", "HA50", "HA56", "HA63", "HA107", "HA117" + "HA1", "HA2", "HA6", "HA7", "HA9", "HA12", "HA13", "HA14", "HA15", "HA16", "HA18", "HA19", "HA24", "HA25", + "HA27", "HA28", "HA30", "HA32", "HA34", "HA35", "HA39", "HA41", "HA48", "HA50", "HA56", "HA63", "HA107", "HA117" ] # Next HAs to do: 14 [DONE], 15[DONE], 32 [DONE], 33 [Input format is 4 parts and no eco4 jobs identified - come # back on this], 28 [DONE], 41 [DONE], 50 [DONE], 48 [DONE], 2 [DONE], 63 [DONE], 12 [DONE], 117 [DONE], 13 [DONE], - # 35 [DONE], 56 [DONE], 19 [DONE], 18 [DONE], 9 [DONE], 27 DONE + # 35 [DONE], 56 [DONE], 19 [DONE], 18 [DONE], 9 [DONE], 27 [DONE], 34 [DONE], 30 [DONE] # # Consider for ECO4: # Consider for GBIS: - # 34 [bug in the results so leaving out for the moment] # Ignore for now: # 38 [problematic, but no ECO4], 10 problematic (no eligibility), 20 has barely any in # Filter down the directories to only the priority HAs From 41c17aa1dafe9110c74d6969f2fa06e58d3f0cf8 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sun, 10 Mar 2024 18:13:45 +0000 Subject: [PATCH 133/248] HA54 done --- .../ha_15_32/ha_analysis_batch_3.py | 22 ++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 7d35386d..d556450b 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -168,9 +168,17 @@ class DataLoader: "address": "A_Address", "postcode": "A_Postcode" }, + "HA31": { + "address": "A_Address", + "postcode": "matching_postcode" + }, "HA48": { "address": "Full Address", "postcode": "Postcode" + }, + "HA54": { + "address": "Postal Address", + "postcode": "matching_postcode" } } @@ -211,7 +219,7 @@ class DataLoader: def create_asset_list_matching_address(self, ha_name, asset_list): - if ha_name in ["HA1", "HA6", "HA12", "HA16", "HA24", "HA30", "HA48"]: + if ha_name in ["HA1", "HA6", "HA12", "HA16", "HA24", "HA30", "HA31", "HA48", "HA54"]: asset_list["matching_address"] = asset_list[ self.COLUMN_CONFIG[ha_name]["address"] ].astype(str).str.lower().str.strip() @@ -559,6 +567,12 @@ class DataLoader: if ha_name == "HA25": asset_sheet_colnames[11] = "matching_postcode" + if ha_name == "HA31": + asset_sheet_colnames[2] = "matching_postcode" + + if ha_name == "HA54": + asset_sheet_colnames[10] = "matching_postcode" + rows_data = [] for row in asset_sheet.iter_rows(min_row=2, values_only=False): @@ -2568,6 +2582,7 @@ class DataLoader: "eco4 subject to ciga": "eco4 (subject to ciga)", "eco4 (subject to archetype/ciga)": "eco4 (subject to ciga) (subject to archetype)", "eco4( subject to ciga/archetype)": "eco4 (subject to ciga) (subject to archetype)", + "eco4 (subject to ciga/ archetype)": "eco4 (subject to ciga) (subject to archetype)" } ha_facts_and_figures = [] @@ -5065,11 +5080,12 @@ def app(): # Add in: priority_has = [ "HA1", "HA2", "HA6", "HA7", "HA9", "HA12", "HA13", "HA14", "HA15", "HA16", "HA18", "HA19", "HA24", "HA25", - "HA27", "HA28", "HA30", "HA32", "HA34", "HA35", "HA39", "HA41", "HA48", "HA50", "HA56", "HA63", "HA107", "HA117" + "HA27", "HA28", "HA30", "HA31", "HA32", "HA34", "HA35", "HA39", "HA41", "HA48", "HA50", "HA54", "HA56", "HA63", + "HA107", "HA117" ] # Next HAs to do: 14 [DONE], 15[DONE], 32 [DONE], 33 [Input format is 4 parts and no eco4 jobs identified - come # back on this], 28 [DONE], 41 [DONE], 50 [DONE], 48 [DONE], 2 [DONE], 63 [DONE], 12 [DONE], 117 [DONE], 13 [DONE], - # 35 [DONE], 56 [DONE], 19 [DONE], 18 [DONE], 9 [DONE], 27 [DONE], 34 [DONE], 30 [DONE] + # 35 [DONE], 56 [DONE], 19 [DONE], 18 [DONE], 9 [DONE], 27 [DONE], 34 [DONE], 30 [DONE], 31 [DONE], 54 [DONE] # # Consider for ECO4: # Consider for GBIS: From 6a327629bf0ab5284b1b951cc98360597f30ce1f Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 12 Mar 2024 11:09:09 +0000 Subject: [PATCH 134/248] rough attempt to attribute surplus ciga dependent eco4 jobs --- .../ha_15_32/ha_analysis_batch_3.py | 144 +++++++++++++----- 1 file changed, 107 insertions(+), 37 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index d556450b..5ad1aa27 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -176,6 +176,10 @@ class DataLoader: "address": "Full Address", "postcode": "Postcode" }, + "HA49": { + "address": "Property Address Full", + "postcode": "Property Postcode" + }, "HA54": { "address": "Postal Address", "postcode": "matching_postcode" @@ -219,7 +223,7 @@ class DataLoader: def create_asset_list_matching_address(self, ha_name, asset_list): - if ha_name in ["HA1", "HA6", "HA12", "HA16", "HA24", "HA30", "HA31", "HA48", "HA54"]: + if ha_name in ["HA1", "HA6", "HA12", "HA16", "HA24", "HA30", "HA31", "HA48", "HA49", "HA54"]: asset_list["matching_address"] = asset_list[ self.COLUMN_CONFIG[ha_name]["address"] ].astype(str).str.lower().str.strip() @@ -382,6 +386,16 @@ class DataLoader: asset_list["Address2"].astype(str).str.lower().str.strip() + ", " + \ asset_list["PostCode"].astype(str).str.lower().str.strip() asset_list["matching_postcode"] = asset_list["PostCode"].astype(str).str.lower().str.strip() + elif ha_name == "HAXX": + asset_list["matching_address"] = asset_list["Address"].astype(str).str.lower().str.strip() + ", " + \ + asset_list["PostCode"].astype(str).str.lower().str.strip() + asset_list["matching_postcode"] = asset_list["PostCode"].astype(str).str.lower().str.strip() + elif ha_name == "HAXXX": + asset_list["matching_address"] = ( + asset_list["Combined Address"].astype(str).str.lower().str.strip() + ", " + + asset_list["Postcode"].astype(str).str.lower().str.strip() + ) + asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip() else: raise NotImplementedError("implement me") @@ -467,6 +481,8 @@ class DataLoader: asset_list["HouseNo"] = asset_list["House_Number"].copy() elif ha_name == "HA9": asset_list["HouseNo"] = asset_list["House Number"].copy() + elif ha_name == "HAXXX": + asset_list["HouseNo"] = asset_list["Door Number"].copy() else: split_addresses = asset_list['matching_address'].str.split(',', expand=True) house_numbers = split_addresses[0].str.split(' ', expand=True) @@ -1999,6 +2015,10 @@ class DataLoader: return survey_list + @staticmethod + def correct_ha49_survey_list(survey_list): + return survey_list + @staticmethod def levenstein_match(matching_string, df): match_to = df["matching_address"].tolist() @@ -5080,8 +5100,11 @@ def app(): # Add in: priority_has = [ "HA1", "HA2", "HA6", "HA7", "HA9", "HA12", "HA13", "HA14", "HA15", "HA16", "HA18", "HA19", "HA24", "HA25", - "HA27", "HA28", "HA30", "HA31", "HA32", "HA34", "HA35", "HA39", "HA41", "HA48", "HA50", "HA54", "HA56", "HA63", - "HA107", "HA117" + "HA27", "HA28", "HA30", "HA31", "HA32", "HA34", "HA35", "HA39", "HA41", "HA48", "HA49", "HA50", "HA54", "HA56", + "HA63", "HA107", "HA117", + + # New HAS + "HAXX", "HAXXX", ] # Next HAs to do: 14 [DONE], 15[DONE], 32 [DONE], 33 [Input format is 4 parts and no eco4 jobs identified - come # back on this], 28 [DONE], 41 [DONE], 50 [DONE], 48 [DONE], 2 [DONE], 63 [DONE], 12 [DONE], 117 [DONE], 13 [DONE], @@ -5100,39 +5123,86 @@ def app(): forecast_remaining_sales(loader) - # We load in the additional data required to perform the analysis - # cleaned = read_from_s3( - # s3_file_name="cleaned_epc_data/cleaned.bson", - # bucket_name="retrofit-data-dev" - # ) - # cleaned = msgpack.unpackb(cleaned, raw=False) - # cleaned = patch_cleaned(cleaned) - # - # cleaning_data = read_dataframe_from_s3_parquet( - # bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet", - # ) - # created_at = datetime.now().isoformat() - # - # photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev") - # - # outputs = get_epc_data( - # loader=loader, - # cleaned=cleaned, - # cleaning_data=cleaning_data, - # created_at=created_at, - # photo_supply_lookup=photo_supply_lookup, - # floor_area_decile_thresholds=floor_area_decile_thresholds, - # pull_data=pull_data - # ) + conversion_rate = 0.95 + archetype_check_conversion = 0.7 + res = [] + for k, v in loader.data.items(): + asset_list = v["asset_list"].copy() + agg = asset_list["ECO Eligibility"].value_counts() + # We find a case where there are properties that have passed CIGA + if not any("passed" in x for x in agg.index): + continue - # import pickle - # with open("ha_analysis.pickle", "wb") as f: - # pickle.dump({"outputs": outputs, "loader": loader}, f) + agg = pd.DataFrame(agg).reset_index() - # To read: - # import pickle - # with open("ha_analysis.pickle", "rb") as f: - # outputs = pickle.load(f)["outputs"] - # - # with open("loader.pickle", "rb") as f: - # loader = pickle.load(f) + passed_ciga = agg[agg["ECO Eligibility"] == "eco4 - passed ciga"] + passed_ciga = passed_ciga["count"].values[0] if not passed_ciga.empty else 0 + + failed_ciga = agg[agg["ECO Eligibility"] == "failed ciga"] + failed_ciga = failed_ciga["count"].values[0] if not failed_ciga.empty else 0 + + ciga_pass_rate = passed_ciga / (passed_ciga + failed_ciga) if (passed_ciga + failed_ciga) > 0 else 1 + + dormant_ciga = agg[ + agg["ECO Eligibility"].str.contains("subject to ciga") & + ~agg["ECO Eligibility"].str.contains("subject to archetype") + ] + + dormant_ciga = dormant_ciga['count'].values[0] if not dormant_ciga.empty else 0 + + dormant_ciga_archetype = agg[ + agg["ECO Eligibility"].str.contains("subject to ciga") & + agg["ECO Eligibility"].str.contains("subject to archetype") + ] + + dormant_ciga_archetype = dormant_ciga_archetype['count'].values[0] if not dormant_ciga_archetype.empty else 0 + + needing_check = dormant_ciga + dormant_ciga_archetype * archetype_check_conversion + needing_check = np.round(needing_check) + + additional_jobs = (dormant_ciga * ciga_pass_rate * conversion_rate) + ( + dormant_ciga_archetype * archetype_check_conversion * ciga_pass_rate * conversion_rate + ) + additional_jobs = np.round(additional_jobs) + + # We attempt to estimate the uplift and how much of that is attributed to surplus subject to ciga jobs + original_estimate = loader.december_figures[ + loader.december_figures["HA Name"] == k + ] + + original_estimate = original_estimate["ECO4"].values[0] if not original_estimate.empty else 0 + base_eco_figures = agg[ + agg["ECO Eligibility"].isin(["eco4", "eco4 - passed ciga"]) + ]["count"].sum() + eco4_from_ciga = original_estimate - base_eco_figures + eco4_from_ciga = eco4_from_ciga if eco4_from_ciga > 0 else 0 + surplus_from_dormant = additional_jobs - eco4_from_ciga + surplus_from_dormant = 0 if surplus_from_dormant < 0 else surplus_from_dormant + + res.append( + { + "ha_name": k, + "additional_eco4": additional_jobs, + "needing_check": needing_check, + "surplus_from_dormant": surplus_from_dormant + } + ) + + res = pd.DataFrame(res) + # Drop the HAs that are not in that pervious draft + # In the v2 draft, there are 12 HAs + + v5_surplus = res[ + ~res["ha_name"].isin(["HA9"]) + ]["additional_eco4"].sum() + # 7212 properties + # This is not a perfect difference though, because of the variations in how the numbers are recorded in the November + # all HAs sheet. E.g for HA 107, there were 1239 properties identified. In the postcode list, there are 1255, + # however 531 are still needing a CIGA check. Therefore their original figures, in this case, included properties + # pre-CIGA + + v5_surplus_from_dormant = res[ + ~res["ha_name"].isin(["HA9"]) + ]["surplus_from_dormant"].sum() + # 5539.0 + # 9471690 From ddb5de50e550190c74cd5a2be767f2960352143a Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 14 Mar 2024 13:58:29 +0000 Subject: [PATCH 135/248] testing with another stupid effing method --- .idea/.gitignore | 2 + .../ha_15_32/ha_analysis_batch_3.py | 230 +++++++++++++++++- .../epc_attributes/RoofAttributes.py | 17 +- 3 files changed, 241 insertions(+), 8 deletions(-) diff --git a/.idea/.gitignore b/.idea/.gitignore index 26d33521..8f00030d 100644 --- a/.idea/.gitignore +++ b/.idea/.gitignore @@ -1,3 +1,5 @@ # Default ignored files /shelf/ /workspace.xml +# GitHub Copilot persisted chat sessions +/copilot/chatSessions diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 5ad1aa27..767e13c8 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -7,7 +7,9 @@ import msgpack from datetime import datetime import pandas as pd import numpy as np -from utils.s3 import read_from_s3, read_dataframe_from_s3_parquet, save_pickle_to_s3, read_pickle_from_s3 +from utils.s3 import ( + read_from_s3, read_dataframe_from_s3_parquet, save_pickle_to_s3, read_pickle_from_s3, save_dataframe_to_s3_parquet +) from utils.logger import setup_logger from dotenv import load_dotenv from tqdm import tqdm @@ -2860,8 +2862,8 @@ def get_property_type_and_built_form(property_meta, ha_name): property_type = PROPERTY_TYPE_LOOKUP[ha_name]["property_type"][property_meta["Dwelling type"]] built_form = property_meta["built_form"] elif ha_name == "HA7": - property_type = PROPERTY_TYPE_LOOKUP[ha_name]["property_type"][property_meta["Archetype"]] - built_form = PROPERTY_TYPE_LOOKUP[ha_name]["built_form"][property_meta["Property Type"]] + property_type = PROPERTY_TYPE_LOOKUP[ha_name]["property_type"].get(property_meta["Archetype"]) + built_form = PROPERTY_TYPE_LOOKUP[ha_name]["built_form"].get(property_meta["Property Type"]) elif ha_name == "HA14": if property_meta["Asset Type Description"] == "Block - Repair": # We try and deduce if it's a flat or house, depending on if it has "room" or "flats" in the address @@ -4429,6 +4431,12 @@ def forecast_remaining_sales(loader): for ha_name, input_data in loader.data.items(): # Original warmfront figures - ECO4 original_warmfront_estimates = december_figures[december_figures["HA Name"] == ha_name] + if original_warmfront_estimates.empty: + # Append an empty row + original_warmfront_estimates = december_figures.head(1).copy() + for k in original_warmfront_estimates.columns: + original_warmfront_estimates[k] = 0 + original_warmfront_estimates["HA Name"] = ha_name original_warmfront_eco4 = original_warmfront_estimates["ECO4"].values[0] original_warmfront_remaining_eco4 = original_warmfront_estimates["ECO4 remaining"].values[0] @@ -4742,6 +4750,12 @@ def forecast_remaining_sales(loader): if gbis_variance_2 != 0: raise ValueError("Something went wrong in gbis_variance2") + # Update the GBIS sold, since Warmfront often sold more GBIS that expected + original_warmfront_gbis_revenue = original_warmfront_sold_gbis + original_warmfront_remaining_gbis_revenue + original_warmfront_gbis = ( + original_warmfront_sold_gbis / gbis_rate + original_warmfront_remaining_gbis_revenue / gbis_rate + ) + to_append = { ("", "", "", "HA Name"): ha_name, # ECO4 - original warmfront figures @@ -5077,6 +5091,216 @@ def forecast_remaining_sales(loader): results.to_csv(file, header=True, index=False) +def fml_data_pull(loader): + has_bruh = ["HA7"] + from backend.SearchEpc import SearchEpc + epc_api_key = "a2Nvbm5rb3dsZXNzYXJAZ21haWwuY29tOjY5MGJiMWM0NmIyOGI5ZDUxYzAxMzQzYzNiZGNlZGJjZDNmODQwMzA=" + + for ha in has_bruh: + asset_list = loader.data[ha]["asset_list"].copy() + # properties found as eligibile + fml = asset_list[asset_list["ECO Eligibility"] != "not eligible"] + + # For each property, search for the latest EPC + epc_data = [] + for _, row in tqdm(fml.iterrows(), total=fml.shape[0]): + property_type, built_form = get_property_type_and_built_form(property_meta=row, ha_name=ha) + searcher = SearchEpc( + address1=row["HouseNo"], + postcode=row["matching_postcode"], + auth_token=epc_api_key, + os_api_key="", + property_type=property_type, + full_address=row["matching_address"], + ) + searcher.ordnance_survey_client.property_type = property_type + searcher.ordnance_survey_client.built_form = built_form + + searcher.find_property(skip_os=True) + if searcher.newest_epc is None: + continue + + epc = { + "asset_list_row_id": row["asset_list_row_id"], + **searcher.newest_epc.copy() + } + + epc_data.append(epc) + + # Remove None entries + epc_data = [x for x in epc_data if x is not None] + # Save the data in S3 as a parquet + epc_data_df = pd.DataFrame(epc_data) + save_pickle_to_s3( + data=epc_data_df, + bucket_name="retrofit-datalake-dev", + s3_file_name=f"ha-analysis/revised/{ha}/epc_data.pickle" + ) + + +def extract_lower_bound(age_band): + if pd.isna(age_band): + return 1930 + try: + return int(age_band.split(':')[1].split('-')[0].strip()) + except (ValueError, IndexError): + return 1930 + + +def fml_analysis(loader): + from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes + from etl.epc.DataProcessor import EPCDataProcessor + assumed_ciga_pass_rate = 0.731 + has_bruh = ["HA7"] + + results = [] + for ha_name in has_bruh: + + original_figures = loader.december_figures[ + loader.december_figures["HA Name"] == ha_name + ].copy() + original_remaining = original_figures["ECO4 remaining"].values[0] + + # Read in the epc data + asset_list = loader.data[ha_name]["asset_list"].copy() + # properties found as eligibile + fml = asset_list[asset_list["ECO Eligibility"] != "not eligible"] + epc_data = read_pickle_from_s3( + bucket_name="retrofit-datalake-dev", + s3_file_name=f"ha-analysis/revised/{ha_name}/epc_data.pickle" + ) + + fuck_this = fml.merge( + epc_data, how="left", on="asset_list_row_id" + ) + if fuck_this.shape[0] != fml.shape[0]: + raise Exception("What the fuck bruv") + + # Take just remaining + if not loader.data[ha_name]["survey_list"].empty: + raise NotImplementedError("TAKE JUST REMAINING IDIOT") + + insulation_thicknesses = [] + for _, x in fuck_this.iterrows(): + if pd.isnull(x["roof-description"]): + continue + thickness = RoofAttributes(x["roof-description"]).process()["insulation_thickness"] + # If there is a + in the thickness, strip it out + thickness = str(thickness).replace("+", "") + insulation_thicknesses.append( + {'uprn': x["uprn"], "roof_insulation_thickness": thickness} + ) + insulation_thicknesses = pd.DataFrame(insulation_thicknesses) + + fuck_this = fuck_this.merge(insulation_thicknesses, how="left", on="uprn") + # clean roof insulation + fuck_this["roof_insulation_thickness"] = fuck_this["roof_insulation_thickness"].fillna("0") + fuck_this["roof_insulation_thickness"] = fuck_this[ + "roof_insulation_thickness" + ].str.replace("below average", "50") + fuck_this["roof_insulation_thickness"] = fuck_this[ + "roof_insulation_thickness" + ].str.replace("None", "0") + fuck_this["roof_insulation_thickness"] = fuck_this[ + "roof_insulation_thickness" + ].str.replace("none", "0") + fuck_this["roof_insulation_thickness"] = fuck_this[ + "roof_insulation_thickness" + ].str.replace("average", "150") + + fuck_this["construction-age-band"] = fuck_this["construction-age-band"].apply( + lambda x: EPCDataProcessor.clean_construction_age_band(x) + ) + + fuck_this['age_lower_bound'] = fuck_this['construction-age-band'].apply(extract_lower_bound) + + had_survey = fuck_this[pd.isnull(fuck_this["estimated"])] + + # proportion with a survey: + proportion_with_survey = 100 * had_survey.shape[0] / fuck_this.shape[0] + + # Let's look just at the ECO4 business + # For things that had a survey, take the properties that didn't need a CIGA check + no_ciga_check_needed = had_survey[ + had_survey["ECO Eligibility"] == "eco4" + ] + + no_ciga_check_needed_with_archetype = no_ciga_check_needed[ + (no_ciga_check_needed["walls-description"].str.lower().str.contains("cavity") == True) & + (no_ciga_check_needed["roof-description"].str.lower().str.contains("pitched") == True) & + (no_ciga_check_needed["current-energy-efficiency"].astype(float) <= 80) + ] + if not no_ciga_check_needed_with_archetype.empty: + raise Exception("SORT ME OUT") + + # Characterise no CIGA check needed + + # TODO: WHAT ABOUT PASSED CIGA - don't need to apply the further deduction + + ciga_check_needed = had_survey[ + had_survey["ECO Eligibility"].str.contains("subject to ciga") + ] + + # We take just the cavity walls + # UCL paper: https://discovery.ucl.ac.uk/id/eprint/10110371/ + # This paper is based on London properties + # The proportion of EPCs with building characteristics errors are shown to + # differ between variables; floor and wall type errors occur in ~10-15% of EPCs, + # compared with ~5% for wall insulation and glazing performance + + ciga_check_needed_with_archetype = ciga_check_needed[ + (ciga_check_needed["walls-description"].str.lower().str.contains("cavity") == True) & + (ciga_check_needed["roof-description"].str.lower().str.contains("pitched") == True) & + (ciga_check_needed["current-energy-efficiency"].astype(float) <= 80) + ] + + # We take properties that could feasibly be within install regions + ciga_check_needed_plausible = ciga_check_needed_with_archetype[ + ciga_check_needed_with_archetype["roof_insulation_thickness"].astype(float) < 270 + ] + + if not loader.data[ha_name]["ciga_list"].empty: + raise NotImplementedError("SORT OUT THE CIGA BRUV") + else: + ha_ciga_pass_rate = assumed_ciga_pass_rate + + ciga_check_expectation = np.round(ciga_check_needed_plausible.shape[0] * ha_ciga_pass_rate) + without_ciga_expectation = no_ciga_check_needed_with_archetype.shape[0] + + # Need to add on the non-ciga + total_expectation = ciga_check_expectation + without_ciga_expectation + + if proportion_with_survey < 100: + # We estimate the rest + without_survey_needing_ciga = fuck_this[ + (pd.isnull(fuck_this["estimated"]) == False) & + (fuck_this["ECO Eligibility"].str.contains("subject to ciga") == True) + ] + + # We apply the same conversion rate as the properties with a survey + without_survey_without_ciga_expected = np.round( + without_survey_needing_ciga.shape[0] * (ciga_check_expectation / ciga_check_needed.shape[0]) + ) + + total_expectation += without_survey_without_ciga_expected + + without_survey_without_ciga = fuck_this[ + (pd.isnull(fuck_this["estimated"]) == False) & (fuck_this["ECO Eligibility"].isin(["eco4"])) + ] + + if not without_survey_without_ciga.empty: + raise Exception("Estimate the rest!!") + + results.append( + { + "HA Name": ha_name, + "Original ECO4 Estimate - Remaining": original_remaining, + "Proportion with a survey": proportion_with_survey, + "total_expectation": total_expectation + } + ) + + def app(): """ This app contains the housin association analysis for HAs 1, 6, 14, 39 and 107. diff --git a/etl/epc_clean/epc_attributes/RoofAttributes.py b/etl/epc_clean/epc_attributes/RoofAttributes.py index 9d3b46b4..76f99f09 100644 --- a/etl/epc_clean/epc_attributes/RoofAttributes.py +++ b/etl/epc_clean/epc_attributes/RoofAttributes.py @@ -122,6 +122,13 @@ class RoofAttributes(Definitions): result["is_valid"] = "invalid" not in description description = description.replace("invalid", "") + # We handle an edge case where the description is "pitched, 150 loft insulation" and is missing the mm + if result["is_pitched"] or result["is_loft"]: + # Search for a regular expression that matches 150 insulation + match = re.search(r"(\d+\+?)\s*insulation", description) + if match: + result['insulation_thickness'] = match.group(1) + # insulation thickness thickness_map = { "ceiling insulated": "average", @@ -137,11 +144,11 @@ class RoofAttributes(Definitions): # Remove the match from the description # description = description.replace(key, "") break - else: - # Extract insulation thickness in mm, if present - match = re.search(r'(\d+\+?)\s*mm', description) - if match: - result['insulation_thickness'] = match.group(1) + + # Extract insulation thickness in mm, if present + match = re.search(r'(\d+\+?)\s*mm', description) + if match: + result['insulation_thickness'] = match.group(1) if "insulation_thickness" not in result: result['insulation_thickness'] = None From bee07a253b8285a67c4cb78b9051e2b000de30c0 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 14 Mar 2024 16:10:55 +0000 Subject: [PATCH 136/248] new method wip --- .../ha_15_32/ha_analysis_batch_3.py | 125 +++++++++++++++--- 1 file changed, 105 insertions(+), 20 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 767e13c8..9cadaf9f 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -92,6 +92,27 @@ PROPERTY_TYPE_LOOKUP = { 'Flat Over Shop': {"property-type": "Flat", "built-form": "Mid-Terrace"}, 'Mid Terraced Town House': {"property-type": "House", "built-form": "Mid-Terrace"}, }, + "HA25": { + 'Flat': 'Flat', + 'Mid Terrace House': 'House', + 'Semi Detached House': 'House', + 'End Terrace House': 'House', + 'House': 'House', + 'Semi Detached Bung': 'Bungalow', + 'Bungalow': 'Bungalow', + 'End Terrace Bungalow': 'Bungalow', + 'Maisonnette': 'Maisonette', + 'Mid Terrace Bungalow': 'Bungalow', + 'Bedspace': None, + 'Detached House': 'House', + 'Bedsit': 'Flat', + 'Coach House': 'House', + 'Detached Bungalow': 'Bungalow', + 'Office Buildings': None, + 'Guest Room': None, + 'Mid Terrace Housekeeping ': 'House', + 'End Terrace Housex': 'House' + }, "HA39": { "Semi house": {"property_type": "House", "built_form": "Semi-Detached"}, "1st floor flat": {"property_type": "Flat", "built_form": None}, @@ -2877,6 +2898,9 @@ def get_property_type_and_built_form(property_meta, ha_name): property_meta["Asset Type Description"] ] + built_form = None + elif ha_name == "HA25": + property_type = PROPERTY_TYPE_LOOKUP[ha_name][property_meta["T1_AssetType"]] built_form = None elif ha_name == "HA16": config = PROPERTY_TYPE_LOOKUP[ha_name][property_meta["Type"]] @@ -5092,7 +5116,8 @@ def forecast_remaining_sales(loader): def fml_data_pull(loader): - has_bruh = ["HA7"] + has_bruh = ["HA7", "HA14", "HA25", "HA39", "HA16"] + # DO from backend.SearchEpc import SearchEpc epc_api_key = "a2Nvbm5rb3dsZXNzYXJAZ21haWwuY29tOjY5MGJiMWM0NmIyOGI5ZDUxYzAxMzQzYzNiZGNlZGJjZDNmODQwMzA=" @@ -5104,7 +5129,7 @@ def fml_data_pull(loader): # For each property, search for the latest EPC epc_data = [] for _, row in tqdm(fml.iterrows(), total=fml.shape[0]): - property_type, built_form = get_property_type_and_built_form(property_meta=row, ha_name=ha) + property_type, _ = get_property_type_and_built_form(property_meta=row, ha_name=ha) searcher = SearchEpc( address1=row["HouseNo"], postcode=row["matching_postcode"], @@ -5113,8 +5138,9 @@ def fml_data_pull(loader): property_type=property_type, full_address=row["matching_address"], ) - searcher.ordnance_survey_client.property_type = property_type - searcher.ordnance_survey_client.built_form = built_form + # Force the skipping of estimating the EPC + searcher.ordnance_survey_client.property_type = None + searcher.ordnance_survey_client.built_form = None searcher.find_property(skip_os=True) if searcher.newest_epc is None: @@ -5147,11 +5173,32 @@ def extract_lower_bound(age_band): return 1930 +def classify_loft(x): + # high confidence + if float(x["roof_insulation_thickness"]) <= 100: + return "high" + + if float(x["roof_insulation_thickness"]) <= 200: + return "medium" + + if float(x["roof_insulation_thickness"]) <= 270 and x["epc_age"] >= 5 * 365: + return "medium" + + return "unlikely" + + def fml_analysis(loader): from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes from etl.epc.DataProcessor import EPCDataProcessor + from datetime import datetime assumed_ciga_pass_rate = 0.731 - has_bruh = ["HA7"] + has_bruh = ["HA7", "HA14", "HA25", "HA39", "HA16"] + + no_ciga_cavity_descriptions = [ + "Cavity wall, as built, insulated (assumed)", + "Cavity wall, as built, no insulation (assumed)", + "Cavity wall, as built, partial insulation (assumed)" + ] results = [] for ha_name in has_bruh: @@ -5170,6 +5217,11 @@ def fml_analysis(loader): s3_file_name=f"ha-analysis/revised/{ha_name}/epc_data.pickle" ) + # time from the inspection to now + epc_data["epc_age"] = (datetime.now() - pd.to_datetime(epc_data["inspection-date"])).dt.days + if "estimated" not in epc_data.columns: + epc_data["estimated"] = None + fuck_this = fml.merge( epc_data, how="left", on="asset_list_row_id" ) @@ -5178,12 +5230,27 @@ def fml_analysis(loader): # Take just remaining if not loader.data[ha_name]["survey_list"].empty: - raise NotImplementedError("TAKE JUST REMAINING IDIOT") + survey_list = ( + loader.data[ha_name]["survey_list"][ + ~pd.isnull(loader.data[ha_name]["survey_list"]["asset_list_row_id"]) + ] + ) + fuck_this = fuck_this.merge( + survey_list[["asset_list_row_id", "installation_status"]], + how="left", + on="asset_list_row_id" + ) + # Anything that has an installation has gone to installation, and therefore is not remaining + fuck_this = fuck_this[pd.isnull(fuck_this["installation_status"])] + fuck_this = fuck_this.drop(columns=["installation_status"]) insulation_thicknesses = [] for _, x in fuck_this.iterrows(): if pd.isnull(x["roof-description"]): continue + if x["roof-description"] == "SAP05:Roof": + continue + thickness = RoofAttributes(x["roof-description"]).process()["insulation_thickness"] # If there is a + in the thickness, strip it out thickness = str(thickness).replace("+", "") @@ -5208,11 +5275,13 @@ def fml_analysis(loader): "roof_insulation_thickness" ].str.replace("average", "150") - fuck_this["construction-age-band"] = fuck_this["construction-age-band"].apply( - lambda x: EPCDataProcessor.clean_construction_age_band(x) - ) + fuck_this["roof_classiciation"] = fuck_this.apply(lambda x: classify_loft(x), axis=1) - fuck_this['age_lower_bound'] = fuck_this['construction-age-band'].apply(extract_lower_bound) + # fuck_this["construction-age-band"] = fuck_this["construction-age-band"].apply( + # lambda x: EPCDataProcessor.clean_construction_age_band(x) + # ) + # + # fuck_this['age_lower_bound'] = fuck_this['construction-age-band'].apply(extract_lower_bound) had_survey = fuck_this[pd.isnull(fuck_this["estimated"])] @@ -5225,9 +5294,23 @@ def fml_analysis(loader): had_survey["ECO Eligibility"] == "eco4" ] + # Walls: + # Cavity wall, as built, insulated (assumed) + # Cavity wall, as built, no insulation (assumed) + # Cavity wall, as built, partial insulation (assumed) + + # Roof: + # Less than 100mm = high confidence + # Less than 270mm & EPC at least 5 years old = medium confidence + # Otherwise, low confidence + + # SAP criteria is EPC C or below + + # Pre is 54 or below + no_ciga_check_needed_with_archetype = no_ciga_check_needed[ - (no_ciga_check_needed["walls-description"].str.lower().str.contains("cavity") == True) & - (no_ciga_check_needed["roof-description"].str.lower().str.contains("pitched") == True) & + (no_ciga_check_needed["walls-description"].isin(no_ciga_cavity_descriptions)) & + (no_ciga_check_needed["roof_classiciation"].isin(["high", "medium"])) & (no_ciga_check_needed["current-energy-efficiency"].astype(float) <= 80) ] if not no_ciga_check_needed_with_archetype.empty: @@ -5239,7 +5322,14 @@ def fml_analysis(loader): ciga_check_needed = had_survey[ had_survey["ECO Eligibility"].str.contains("subject to ciga") - ] + ].copy() + + ciga_check_passed = had_survey[ + had_survey["ECO Eligibility"] == "eco4 - passed ciga" + ] + + if not ciga_check_passed.empty: + raise Exception("SORT ME BRUV") # We take just the cavity walls # UCL paper: https://discovery.ucl.ac.uk/id/eprint/10110371/ @@ -5248,17 +5338,12 @@ def fml_analysis(loader): # differ between variables; floor and wall type errors occur in ~10-15% of EPCs, # compared with ~5% for wall insulation and glazing performance - ciga_check_needed_with_archetype = ciga_check_needed[ + ciga_check_needed_plausible = ciga_check_needed[ (ciga_check_needed["walls-description"].str.lower().str.contains("cavity") == True) & - (ciga_check_needed["roof-description"].str.lower().str.contains("pitched") == True) & + (ciga_check_needed["roof_classiciation"].isin(["high", "medium"])) & (ciga_check_needed["current-energy-efficiency"].astype(float) <= 80) ] - # We take properties that could feasibly be within install regions - ciga_check_needed_plausible = ciga_check_needed_with_archetype[ - ciga_check_needed_with_archetype["roof_insulation_thickness"].astype(float) < 270 - ] - if not loader.data[ha_name]["ciga_list"].empty: raise NotImplementedError("SORT OUT THE CIGA BRUV") else: From 9b255029b3f58d9f8653aaf1bbbd0cc43b024803 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 14 Mar 2024 17:36:09 +0000 Subject: [PATCH 137/248] fml fml --- .../ha_15_32/ha_analysis_batch_3.py | 141 ++++++++++++------ 1 file changed, 96 insertions(+), 45 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 9cadaf9f..e1d7db4d 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -20,6 +20,9 @@ from backend.ml_models.api import ModelApi from etl.solar.SolarPhotoSupply import SolarPhotoSupply from recommendations.recommendation_utils import calculate_cavity_age from etl.epc.Record import EPCRecord +from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes +from etl.epc.DataProcessor import EPCDataProcessor +from datetime import datetime EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN") ENV_FILE = Path(__file__).parent / "etl" / "eligibility" / "ha_15_32" / ".env" @@ -5188,9 +5191,6 @@ def classify_loft(x): def fml_analysis(loader): - from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes - from etl.epc.DataProcessor import EPCDataProcessor - from datetime import datetime assumed_ciga_pass_rate = 0.731 has_bruh = ["HA7", "HA14", "HA25", "HA39", "HA16"] @@ -5216,15 +5216,20 @@ def fml_analysis(loader): bucket_name="retrofit-datalake-dev", s3_file_name=f"ha-analysis/revised/{ha_name}/epc_data.pickle" ) + # We make sure we don't have duplicated. We do a super basic drop duplicates because it shouldn't be a huge + # issue at this point + epc_data = epc_data.drop_duplicates("uprn") # time from the inspection to now epc_data["epc_age"] = (datetime.now() - pd.to_datetime(epc_data["inspection-date"])).dt.days if "estimated" not in epc_data.columns: - epc_data["estimated"] = None + # For all after HA7, we don't use estimated surveys + epc_data["estimated"] = False fuck_this = fml.merge( epc_data, how="left", on="asset_list_row_id" ) + fuck_this["estimated"] = fuck_this["estimated"].fillna(True) if fuck_this.shape[0] != fml.shape[0]: raise Exception("What the fuck bruv") @@ -5259,7 +5264,15 @@ def fml_analysis(loader): ) insulation_thicknesses = pd.DataFrame(insulation_thicknesses) + before_merge_shape = fuck_this.shape[0] fuck_this = fuck_this.merge(insulation_thicknesses, how="left", on="uprn") + + if fuck_this.shape[0] != before_merge_shape: + raise Exception("SOMETHING WENT WRONG") + + if any(fuck_this["ECO Eligibility"].str.contains("subject to archetype")): + blah + # clean roof insulation fuck_this["roof_insulation_thickness"] = fuck_this["roof_insulation_thickness"].fillna("0") fuck_this["roof_insulation_thickness"] = fuck_this[ @@ -5283,7 +5296,7 @@ def fml_analysis(loader): # # fuck_this['age_lower_bound'] = fuck_this['construction-age-band'].apply(extract_lower_bound) - had_survey = fuck_this[pd.isnull(fuck_this["estimated"])] + had_survey = fuck_this[fuck_this["estimated"] == False] # proportion with a survey: proportion_with_survey = 100 * had_survey.shape[0] / fuck_this.shape[0] @@ -5294,27 +5307,11 @@ def fml_analysis(loader): had_survey["ECO Eligibility"] == "eco4" ] - # Walls: - # Cavity wall, as built, insulated (assumed) - # Cavity wall, as built, no insulation (assumed) - # Cavity wall, as built, partial insulation (assumed) - - # Roof: - # Less than 100mm = high confidence - # Less than 270mm & EPC at least 5 years old = medium confidence - # Otherwise, low confidence - - # SAP criteria is EPC C or below - - # Pre is 54 or below - - no_ciga_check_needed_with_archetype = no_ciga_check_needed[ + no_ciga_check_needed_eligible = no_ciga_check_needed[ (no_ciga_check_needed["walls-description"].isin(no_ciga_cavity_descriptions)) & (no_ciga_check_needed["roof_classiciation"].isin(["high", "medium"])) & (no_ciga_check_needed["current-energy-efficiency"].astype(float) <= 80) ] - if not no_ciga_check_needed_with_archetype.empty: - raise Exception("SORT ME OUT") # Characterise no CIGA check needed @@ -5327,9 +5324,20 @@ def fml_analysis(loader): ciga_check_passed = had_survey[ had_survey["ECO Eligibility"] == "eco4 - passed ciga" ] + # These should be treated the same as one that have passed their ciga checks, from a detection perspective + ciga_check_passed_eligible = ciga_check_passed[ + (ciga_check_passed["walls-description"].str.lower().str.contains("cavity") == True) & + (ciga_check_passed["roof_classiciation"].isin(["high", "medium"])) & + (ciga_check_passed["current-energy-efficiency"].astype(float) <= 80) + ] - if not ciga_check_passed.empty: - raise Exception("SORT ME BRUV") + if not loader.data[ha_name]["ciga_list"].empty: + + proportions = loader.data[ha_name]["ciga_list"]["Guarantee"].value_counts(normalize=True) + ha_ciga_pass_rate = proportions[proportions.index == "No"].values[0] + + else: + ha_ciga_pass_rate = assumed_ciga_pass_rate # We take just the cavity walls # UCL paper: https://discovery.ucl.ac.uk/id/eprint/10110371/ @@ -5338,53 +5346,96 @@ def fml_analysis(loader): # differ between variables; floor and wall type errors occur in ~10-15% of EPCs, # compared with ~5% for wall insulation and glazing performance - ciga_check_needed_plausible = ciga_check_needed[ + ciga_check_needed_eligible = ciga_check_needed[ (ciga_check_needed["walls-description"].str.lower().str.contains("cavity") == True) & (ciga_check_needed["roof_classiciation"].isin(["high", "medium"])) & (ciga_check_needed["current-energy-efficiency"].astype(float) <= 80) ] - if not loader.data[ha_name]["ciga_list"].empty: - raise NotImplementedError("SORT OUT THE CIGA BRUV") - else: - ha_ciga_pass_rate = assumed_ciga_pass_rate - - ciga_check_expectation = np.round(ciga_check_needed_plausible.shape[0] * ha_ciga_pass_rate) - without_ciga_expectation = no_ciga_check_needed_with_archetype.shape[0] + ciga_check_expectation = np.round(ciga_check_needed_eligible.shape[0] * ha_ciga_pass_rate) + without_ciga_expectation = no_ciga_check_needed_eligible.shape[0] + passed_ciga_expectation = ciga_check_passed_eligible.shape[0] # Need to add on the non-ciga - total_expectation = ciga_check_expectation + without_ciga_expectation + total_expectation = ciga_check_expectation + without_ciga_expectation + passed_ciga_expectation if proportion_with_survey < 100: # We estimate the rest without_survey_needing_ciga = fuck_this[ - (pd.isnull(fuck_this["estimated"]) == False) & + (fuck_this["estimated"] == True) & (fuck_this["ECO Eligibility"].str.contains("subject to ciga") == True) ] - # We apply the same conversion rate as the properties with a survey - without_survey_without_ciga_expected = np.round( - without_survey_needing_ciga.shape[0] * (ciga_check_expectation / ciga_check_needed.shape[0]) - ) + if without_survey_needing_ciga.empty: + without_survey_without_ciga_expected = 0 + else: + # We apply the same conversion rate as the properties with a survey + without_survey_without_ciga_expected = np.round( + without_survey_needing_ciga.shape[0] * (ciga_check_expectation / ciga_check_needed.shape[0]) + ) - total_expectation += without_survey_without_ciga_expected - - without_survey_without_ciga = fuck_this[ - (pd.isnull(fuck_this["estimated"]) == False) & (fuck_this["ECO Eligibility"].isin(["eco4"])) + without_survey_passed_ciga = fuck_this[ + (fuck_this["estimated"] == True) & + (fuck_this["ECO Eligibility"] == "eco4 - passed ciga") ] - if not without_survey_without_ciga.empty: - raise Exception("Estimate the rest!!") + if without_survey_passed_ciga.empty: + without_survey_passed_ciga_expected = 0 + else: + # We apply the same conversion rate as the properties with a survey + without_survey_passed_ciga_expected = np.round( + without_survey_passed_ciga.shape[0] * (passed_ciga_expectation / ciga_check_passed.shape[0]) + ) + + # Finally, no ciga needed + without_survey_eco4 = fuck_this[ + (fuck_this["estimated"] == True) & + (fuck_this["ECO Eligibility"] == "eco4") + ] + + if without_survey_eco4.empty: + without_survey_eco4_expected = 0 + else: + # We apply the same conversion rate as the properties with a survey + without_survey_eco4_expected = np.round( + without_survey_eco4.shape[0] * (without_ciga_expectation / no_ciga_check_needed.shape[0]) + ) + + total_expectation = ( + total_expectation + + without_survey_without_ciga_expected + + without_survey_passed_ciga_expected + + without_survey_eco4_expected + ) + + surveys = loader.data[ha_name]["survey_list"] + sold_now = 0 + if not surveys.empty: + sold_now = surveys[ + surveys["installation_status"].str.lower().str.contains("eco4") + ].shape[0] + + sales_since_nov = sold_now - original_figures["No. of Tech surveys complete - Eco 4"].values[0] results.append( { "HA Name": ha_name, "Original ECO4 Estimate - Remaining": original_remaining, + "Of which sold": sales_since_nov, + "Of which ECO4 Eligible - Remaining": int(total_expectation), "Proportion with a survey": proportion_with_survey, - "total_expectation": total_expectation } ) + results_df = pd.DataFrame(results) + + results_df["Delta vs November"] = 100 * ( + results_df["Of which ECO4 Eligible - Remaining"] - results_df["Original ECO4 Estimate - Remaining"] + ) / results_df["Original ECO4 Estimate - Remaining"] + + # TODO: Split into high and low confidence? + # + def app(): """ From 3b65a71793721d65fd8356c215813a13d384bc4d Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 14 Mar 2024 18:25:50 +0000 Subject: [PATCH 138/248] added in extra shit to output --- .../ha_15_32/ha_analysis_batch_3.py | 47 ++++++++++++++++--- 1 file changed, 41 insertions(+), 6 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index e1d7db4d..53ce69e2 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -5200,6 +5200,22 @@ def fml_analysis(loader): "Cavity wall, as built, partial insulation (assumed)" ] + codes = [ + "HA39", "HA14", "HA24", "HA15", "HA32", "HA28", "HA6", "HA1", "HA7", + "HA16", "HA107", "HA25", "HA50", "HA41", "HA48", "HA2", "HA63", "HA12", + "HA117", "HA13", "HA35", "HA34", "HA56", "HA19", "HA18", "HA9", "HA27", + "HA30", "HA31", "HA54", "HAXX", "HA49", "HAXXX" + ] + + values = [ + 706, 2161, 1053, 793, 0, 656, 1200, 1647, 4248, 2703, 1087, 1876, 2135, + 1078, 775, 538, 518, 401, 466, 2627, 98, 1050, 524, 191, 538, 384, 204, + 281, 422, 74, 313, 71, 6 + ] + + # Create a dictionary mapping + remaining_eligible_mapping = dict(zip(codes, values)) + results = [] for ha_name in has_bruh: @@ -5207,6 +5223,7 @@ def fml_analysis(loader): loader.december_figures["HA Name"] == ha_name ].copy() original_remaining = original_figures["ECO4 remaining"].values[0] + postcode_list_remaining = remaining_eligible_mapping[ha_name] # Read in the epc data asset_list = loader.data[ha_name]["asset_list"].copy() @@ -5271,7 +5288,7 @@ def fml_analysis(loader): raise Exception("SOMETHING WENT WRONG") if any(fuck_this["ECO Eligibility"].str.contains("subject to archetype")): - blah + raise Exception("DO THE DAMN ARCHETYPE CHECK BRO") # clean roof insulation fuck_this["roof_insulation_thickness"] = fuck_this["roof_insulation_thickness"].fillna("0") @@ -5313,6 +5330,13 @@ def fml_analysis(loader): (no_ciga_check_needed["current-energy-efficiency"].astype(float) <= 80) ] + # For anything not needing a CIGA check, some of it will be GBIS + no_ciga_check_needed_eligible_gbis = no_ciga_check_needed[ + (no_ciga_check_needed["walls-description"].isin(no_ciga_cavity_descriptions)) & + (no_ciga_check_needed["current-energy-efficiency"].astype(float) <= 80) & + (~no_ciga_check_needed["asset_list_row_id"].isin(no_ciga_check_needed_eligible["asset_list_row_id"].values)) + ] + # Characterise no CIGA check needed # TODO: WHAT ABOUT PASSED CIGA - don't need to apply the further deduction @@ -5359,6 +5383,8 @@ def fml_analysis(loader): # Need to add on the non-ciga total_expectation = ciga_check_expectation + without_ciga_expectation + passed_ciga_expectation + total_gbis_expectation = no_ciga_check_needed_eligible_gbis.shape[0] + if proportion_with_survey < 100: # We estimate the rest without_survey_needing_ciga = fuck_this[ @@ -5395,12 +5421,17 @@ def fml_analysis(loader): if without_survey_eco4.empty: without_survey_eco4_expected = 0 + without_survey_gbis_expected = 0 else: # We apply the same conversion rate as the properties with a survey without_survey_eco4_expected = np.round( without_survey_eco4.shape[0] * (without_ciga_expectation / no_ciga_check_needed.shape[0]) ) + without_survey_gbis_expected = np.round( + without_survey_eco4.shape[0] * (total_gbis_expectation / no_ciga_check_needed.shape[0]) + ) + total_expectation = ( total_expectation + without_survey_without_ciga_expected + @@ -5408,6 +5439,8 @@ def fml_analysis(loader): without_survey_eco4_expected ) + total_gbis_expectation = total_gbis_expectation + without_survey_gbis_expected + surveys = loader.data[ha_name]["survey_list"] sold_now = 0 if not surveys.empty: @@ -5421,20 +5454,22 @@ def fml_analysis(loader): { "HA Name": ha_name, "Original ECO4 Estimate - Remaining": original_remaining, + "Postcode List - Remaining": postcode_list_remaining, "Of which sold": sales_since_nov, "Of which ECO4 Eligible - Remaining": int(total_expectation), + "Of which GBIS Eligibile - Remaining": int(total_gbis_expectation), "Proportion with a survey": proportion_with_survey, } ) results_df = pd.DataFrame(results) - results_df["Delta vs November"] = 100 * ( - results_df["Of which ECO4 Eligible - Remaining"] - results_df["Original ECO4 Estimate - Remaining"] - ) / results_df["Original ECO4 Estimate - Remaining"] + # results_df["Delta vs November"] = 100 * ( + # results_df["Of which ECO4 Eligible - Remaining"] - results_df["Original ECO4 Estimate - Remaining"] + # ) / results_df["Original ECO4 Estimate - Remaining"] - # TODO: Split into high and low confidence? - # + # TODO: Add in estimated GBIS (for eco jobs, of which look like gbis) + # TODO: Change the left hand side number for our post CIGA estimates def app(): From 479a2b08c33e2911a5ae98c3d315903af04e4980 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 14 Mar 2024 19:02:33 +0000 Subject: [PATCH 139/248] ffs --- .../ha_15_32/ha_analysis_batch_3.py | 22 +++++++++++++++++-- etl/epc_clean/app.py | 3 +++ 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 53ce69e2..9462642f 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -5119,7 +5119,9 @@ def forecast_remaining_sales(loader): def fml_data_pull(loader): - has_bruh = ["HA7", "HA14", "HA25", "HA39", "HA16"] + has_bruh = ["HA7", "HA14", "HA25", "HA39", "HA16", + # Do these + "HA1", "HA13", "HA50", "HA24"] # DO from backend.SearchEpc import SearchEpc epc_api_key = "a2Nvbm5rb3dsZXNzYXJAZ21haWwuY29tOjY5MGJiMWM0NmIyOGI5ZDUxYzAxMzQzYzNiZGNlZGJjZDNmODQwMzA=" @@ -5197,9 +5199,19 @@ def fml_analysis(loader): no_ciga_cavity_descriptions = [ "Cavity wall, as built, insulated (assumed)", "Cavity wall, as built, no insulation (assumed)", - "Cavity wall, as built, partial insulation (assumed)" + "Cavity wall, as built, partial insulation (assumed)", + "Cavity wall, no insulation (assumed)", + "Cavity wall, partial insulation (assumed)", + "Cavity wall,", + "Cavity wall, insulated (assumed)", + "Cavity wall, no insulation (assumed)", + "Cavity wall, as built, insulated (assumed)", + "Cavity wall, partial insulation (assumed)", ] + # TODO: There will be some properties that are subject to CIGA that do not look like they ned a CIGA check! pass + # them! + codes = [ "HA39", "HA14", "HA24", "HA15", "HA32", "HA28", "HA6", "HA1", "HA7", "HA16", "HA107", "HA25", "HA50", "HA41", "HA48", "HA2", "HA63", "HA12", @@ -5217,6 +5229,7 @@ def fml_analysis(loader): remaining_eligible_mapping = dict(zip(codes, values)) results = [] + wall_descriptions = [] for ha_name in has_bruh: original_figures = loader.december_figures[ @@ -5236,6 +5249,7 @@ def fml_analysis(loader): # We make sure we don't have duplicated. We do a super basic drop duplicates because it shouldn't be a huge # issue at this point epc_data = epc_data.drop_duplicates("uprn") + wall_descriptions.extend(epc_data["walls-description"].unique().tolist()) # time from the inspection to now epc_data["epc_age"] = (datetime.now() - pd.to_datetime(epc_data["inspection-date"])).dt.days @@ -5464,6 +5478,10 @@ def fml_analysis(loader): results_df = pd.DataFrame(results) + wall_descriptions = list(set(wall_descriptions)) + from pprint import pprint + pprint(wall_descriptions) + # results_df["Delta vs November"] = 100 * ( # results_df["Of which ECO4 Eligible - Remaining"] - results_df["Original ECO4 Estimate - Remaining"] # ) / results_df["Original ECO4 Estimate - Remaining"] diff --git a/etl/epc_clean/app.py b/etl/epc_clean/app.py index 53c1a329..3f1a1a80 100644 --- a/etl/epc_clean/app.py +++ b/etl/epc_clean/app.py @@ -36,8 +36,11 @@ def app(): cleaned_data = {} epc_directories = [entry for entry in EPC_DIRECTORY.iterdir() if entry.is_dir()] + WALLS = [] for directory in tqdm(epc_directories): data = pd.read_csv(directory / "certificates.csv", low_memory=False) + z = data["WALLS_DESCRIPTION"].unique().tolist() + WALLS.extend(z) # Rename the columns to the same format as the api returns data.columns = [c.replace("_", "-").lower() for c in data.columns] # Take just date before the date threshold From cc319ab91149f77dd04e691e6bc6b99bb9d39702 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 15 Mar 2024 10:09:26 +0000 Subject: [PATCH 140/248] new ha analysis wip --- .../ha_15_32/ha_analysis_batch_3.py | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 9462642f..a0b7e0bb 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -5210,7 +5210,7 @@ def fml_analysis(loader): ] # TODO: There will be some properties that are subject to CIGA that do not look like they ned a CIGA check! pass - # them! + # them! Non-invasices will have checked the wall though codes = [ "HA39", "HA14", "HA24", "HA15", "HA32", "HA28", "HA6", "HA1", "HA7", @@ -5352,16 +5352,11 @@ def fml_analysis(loader): ] # Characterise no CIGA check needed - - # TODO: WHAT ABOUT PASSED CIGA - don't need to apply the further deduction - ciga_check_needed = had_survey[ had_survey["ECO Eligibility"].str.contains("subject to ciga") ].copy() - ciga_check_passed = had_survey[ - had_survey["ECO Eligibility"] == "eco4 - passed ciga" - ] + ciga_check_passed = had_survey[had_survey["ECO Eligibility"] == "eco4 - passed ciga"] # These should be treated the same as one that have passed their ciga checks, from a detection perspective ciga_check_passed_eligible = ciga_check_passed[ (ciga_check_passed["walls-description"].str.lower().str.contains("cavity") == True) & @@ -5469,18 +5464,15 @@ def fml_analysis(loader): "HA Name": ha_name, "Original ECO4 Estimate - Remaining": original_remaining, "Postcode List - Remaining": postcode_list_remaining, - "Of which sold": sales_since_nov, + # "Of which sold": sales_since_nov, "Of which ECO4 Eligible - Remaining": int(total_expectation), "Of which GBIS Eligibile - Remaining": int(total_gbis_expectation), - "Proportion with a survey": proportion_with_survey, + # "Proportion with a survey": proportion_with_survey, } ) results_df = pd.DataFrame(results) - - wall_descriptions = list(set(wall_descriptions)) - from pprint import pprint - pprint(wall_descriptions) + results_df.to_csv("analysis - revised.csv") # results_df["Delta vs November"] = 100 * ( # results_df["Of which ECO4 Eligible - Remaining"] - results_df["Original ECO4 Estimate - Remaining"] From 12f780a08989e896235adf96e175d39240c3adbb Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 15 Mar 2024 16:54:48 +0000 Subject: [PATCH 141/248] setting up complete data pull --- .../ha_15_32/ha_analysis_batch_3.py | 380 +++++++++++++++++- 1 file changed, 369 insertions(+), 11 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index a0b7e0bb..902d48fd 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -42,6 +42,15 @@ PROPERTY_TYPE_LOOKUP = { 'Detached Local Connect': 'Detached', } }, + "HA2": { + 'HOUSE': 'House', + 'FLAT': 'Flat', + 'SHELTERED': None, + 'BUNGALOW': 'Bungalow', + 'BED-SIT': None, + 'MAISONETTE': "Maisonette", + 'HOSTEL': None + }, "HA6": { "property_type": { 'HOUSE': "House", @@ -69,6 +78,23 @@ PROPERTY_TYPE_LOOKUP = { "End Terraced": "End-Terrace", } }, + "HA12": { + "House": "House", + "Flat": "Flat", + "Bungalow": "Bungalow", + "Maisonette": "Maisonette", + "Bedsit": None, + }, + "HA13": { + 'House': "House", + 'Flat': "Flat", + 'House MT': "House", + 'House SD': "House", + 'House ET': "House", + 'Bungalow MT': "Bungalow", + 'Bungalow ET': "Bungalow", + 'ii': None, + }, "HA14": { "property_type": { "House": "House", @@ -77,6 +103,13 @@ PROPERTY_TYPE_LOOKUP = { "Maisonette": "Maisonette", } }, + "HA15": { + 'House': 'House', + 'Flat': 'Flat', + 'Bungalow': 'Bungalow', + 'Maisonette': 'Maisonette', + 'Flat over garage': 'Flat', + }, "HA16": { 'Semi Detached Bungalow': {"property-type": "Bungalow", "built-form": "Semi-Detached"}, 'Mid Terraced House': {"property-type": "House", "built-form": "Mid-Terrace"}, @@ -95,6 +128,30 @@ PROPERTY_TYPE_LOOKUP = { 'Flat Over Shop': {"property-type": "Flat", "built-form": "Mid-Terrace"}, 'Mid Terraced Town House': {"property-type": "House", "built-form": "Mid-Terrace"}, }, + "HA18": { + "House": "House", + "Flat": "Flat", + "Bungalow": "Bungalow", + "Maisonette": "Maisonette", + "Bedsit": None, + "Shop": None, + "Hostel": None, + "Block": None, + }, + "HA24": { + '01 HOUSE': 'House', + '02 FLAT': 'Flat', + '03 BUNGALOW': 'Bungalow', + '10 PBUNGALOW': 'Bungalow', + '01 HOUSE MID': 'House', + '13 SBUNGALOW': 'Bungalow', + '12 SBEDSIT': None, # BEDSIT does not match the specified property types + '14 SFLAT': 'Flat', + '05 BEDSIT': None, + '04 MAISONETTE': 'Maisonette', + '11 PFLAT': 'Flat', + '09 PBEDSIT': None + }, "HA25": { 'Flat': 'Flat', 'Mid Terrace House': 'House', @@ -116,6 +173,77 @@ PROPERTY_TYPE_LOOKUP = { 'Mid Terrace Housekeeping ': 'House', 'End Terrace Housex': 'House' }, + "HA28": { + 'Flat': 'Flat', + 'Semi detached house': 'House', + 'Terraced house': 'House', + 'Maisonette flat': 'Maisonette', + 'Sheltered bedsit': None, + 'APD flat': 'Flat', + 'Bungalow terraced': 'Bungalow', + 'Flat with partition': 'Flat', + 'Bungalow semi detached': 'Bungalow', + 'APD Bungalow': 'Bungalow', + 'Sheltered flat': 'Flat', + 'Bedsit Flat': 'Flat', + 'Bedsit bungalow semi detached': 'Bungalow', + 'Sheltered bungalow terraced': 'Bungalow', + 'Sheltered bedsit disabled': None, + 'Bedsit bungalow terraced': 'Bungalow', + 'Sheltered bungalow semi detached': 'Bungalow', + 'Sheltered warden flat': 'Flat', + 'Bungalow detached': 'Bungalow', + 'Block': None, # Does not match the specified property types + 'End Terraced House': 'House', + 'Mid Terraced House': 'House', + '#N/A': None, # Assuming this is an invalid or missing entry + 0: None # Assuming 0 is also an invalid or missing entry + }, + "HA30": { + 'House': 'House', + 'Flat': 'Flat', + 'Bungalow': 'Bungalow', + 'House with Attached Garage': 'House', + 'Bed Space': None, # Assuming this does not fit the specified property types + 'House with Garage': 'House', + 'Bungalow with Wheelchair Access': 'Bungalow', + 'Maisonette': 'Maisonette', + 'Flat with Wheelchair Access': 'Flat', + 'Bedsit': None, # Assuming this does not fit the specified property types + 'Flat w Wheelchair Access & Car Park': 'Flat', + 'House with Wheelchair Access': 'House', + 'Bungalow w Wheelchair Access & Car ': 'Bungalow' + }, + "HA32": { + 'Bungalow': 'Bungalow', + 'Flat': 'Flat', + 'Bungalow Disabled': 'Bungalow', # "Disabled" properties categorized with their base type + 'House': 'House', + 'Dormer Bungalow': 'Bungalow', + 'Pop-In': None, # Does not fit the specified property types + 'Flat Disabled': 'Flat', + 'Laundry': None, # Does not fit the specified property types + 'Bedsit': None, # Excluded from the given categories + 'Shed': None, # Does not fit the specified property types + 'Store Room': None # Does not fit the specified property types + }, + "HA34": { + 'Flat': 'Flat', + 'House': 'House', + 'Bungalow': 'Bungalow', + 'Maisonette': 'Maisonette', + 'ND': None, + }, + "HA35": { + "Flat": "Flat", + "Maisonette": "Maisonette", + "House": "House", + "Bedsit": None, + "2 Bedroom Unknown": None, + "1 Bedroom Unknown": None, + "3 Bedroom Unknown": None, + "4 Bedroom Unknown": None, + }, "HA39": { "Semi house": {"property_type": "House", "built_form": "Semi-Detached"}, "1st floor flat": {"property_type": "Flat", "built_form": None}, @@ -140,6 +268,105 @@ PROPERTY_TYPE_LOOKUP = { "1st floor flat with study room": {"property_type": "Flat", "built_form": None}, "2nd floor flat with study": {"property_type": "Flat", "built_form": None}, }, + "HA41": { + 'Garage': None, + 'House 1919-1945': 'House', + 'House 1946-1964': 'House', + 'Flats & Maisonettes post 1974': 'Flat', + 'Non traditional houses': 'House', + 'Sheltered': None, + 'Flats & Maisonettes 1965-1974': 'Flat', + 'House post 1974': 'House', + 'Block': None, + 'Flats & Maisonettes 1946-1964': 'Flat', + 'House 1965-1974': 'House', + 'Non traditional flats': 'Flat', + 'Bungalow 1965-1974': 'Bungalow', + 'PIMSS EMPTY': None, + 'Bungalow post 1974': 'Bungalow', + 'Bungalow 1946-1964': 'Bungalow', + 'Flats & Maisonettes 1919-1945': 'Flat', + 'House pre 1919': 'House', + 'Flats & Maisonettes pre 1919': 'Flat', + 'Bungalow 1919-1945': 'Bungalow', + 'Office': None + }, + "HA48": { + "House": "House", + "Flat": "Flat", + "Bungalow": "Bungalow", + "Maisonette": "Maisonette", + "Unit": None + }, + "HA50": { + 'House': 'House', + 'Bungalow': 'Bungalow', + 'Flat': 'Flat', + 'House SD': 'House', + 'House MT': 'House', + 'House ET': 'House', + 'Bungalow ET': 'Bungalow', + 'House SD ': 'House', + 'House. SD': 'House', + 'Bungalow SD': 'Bungalow', + 'Bungalow MT': 'Bungalow', + 'Bungalow D': 'Bungalow', + 'House D': 'House', + 'House. MT': 'House', + 'House ': 'House', + 'House ET ': 'House', + ' ': None, + 'Flat?': 'Flat', + 'Bungalow ': 'Bungalow' + }, + "HA56": { + 'House Non Specific': 'House', + 'HOUSE TERRACED': 'House', + 'HOUSE - SEMI DETACHD': 'House', + 'Bungalow': 'Bungalow', + 'House - End Terraced': 'House', + 'Block': None, + 'Block with Communal': None, + 'Bungalow - Terraced': 'Bungalow', + 'Bungalow - Semi Dtch': 'Bungalow', + 'Block House with rooms': None, + 'Bungalow - End Terr': 'Bungalow', + 'House - Mid Terraced': 'House', + 'Bungalow - Detached': 'Bungalow', + 'House - Detached': 'House', + 'HOUSE THREE STOREY': 'House', + 'Maisonette': 'Maisonette', + 'Communal Block': None, + 'Scheme': None + }, + "HA63": { + 'Flat': 'Flat', + 'House - Semi detached': 'House', + 'House - Detached': 'House', + 'House - End Terrace': 'House', + 'House - Mid Terrace': 'House', + 'Bungalow - Semi detached': 'Bungalow', + 'Bungalow': 'Bungalow', + 'Bedsit': None, # Considering as a non-specific residential category here + 'Maisonette': 'Maisonette', + 'Bungalow - End Terrace': 'Bungalow', + 'Bungalow - Detached': 'Bungalow', + 'Maisonette - Mid Terrace': 'Maisonette', + 'Maisonette - End Terrace': 'Maisonette', + 'Studio Flat': 'Flat', + 'Maisonette - Detached': 'Maisonette', + 'Bungalow - Mid Terrace': 'Bungalow', + 'Bedsit - Mid Terrace': None, + 'Bedsit - End Terrace': None, + 'Amenity Block - Semi detached': None, # Assuming non-residential + 'Maisonette - Semi Detached': 'Maisonette', + 'Amenity Block - Detached': None, # Assuming non-residential + 'Hostel': None, # Typically not considered a standard residential property for this context + 'Bungalow - Attached': 'Bungalow', + 'Unknown': None, # Not enough information to categorize + 'Studio Flat - Mid Terrace': 'Flat', + 'Chalet - Wheelchair': None # Specialized type, not categorized here + }, "HA107": { "property_type": { "HOUSE": "House", @@ -160,6 +387,27 @@ PROPERTY_TYPE_LOOKUP = { "Detached": "Detached", "Detatched": "Detached", } + }, + "HA117": { + "Flat": "Flat", + "House": "House", + "Bungalow": "Bungalow", + "Flat over garage/underpass": "Flat", + }, + "HAXXX": { + 'mid terraced house': 'House', + 'semi detached house': 'House', + '1st fl 4 in a block': 'Flat', + 'G/F 4 in a block': 'Flat', + 'end terraced house': 'House', + '1st floor flat': 'Flat', + 'G/F floor flat': 'Flat', + 'semi detached bungalow': 'Bungalow', + '2nd floor flat': 'Flat', + 'mid terrace bungalow': 'Bungalow', + 'detached bungalow': 'Bungalow', + 'end terrace bungalow': 'Bungalow', + 'Staff accommodation': None # Marked as None due to its special nature } } @@ -2882,12 +3130,36 @@ def get_property_type_and_built_form(property_meta, ha_name): property_type = "Flat" built_form = PROPERTY_TYPE_LOOKUP[ha_name]["built_form"].get(property_meta["Property Type"], None) + elif ha_name == "HA2": + property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Dwelling Type"].strip()) + built_form = None elif ha_name == "HA6": property_type = PROPERTY_TYPE_LOOKUP[ha_name]["property_type"][property_meta["Dwelling type"]] built_form = property_meta["built_form"] elif ha_name == "HA7": property_type = PROPERTY_TYPE_LOOKUP[ha_name]["property_type"].get(property_meta["Archetype"]) built_form = PROPERTY_TYPE_LOOKUP[ha_name]["built_form"].get(property_meta["Property Type"]) + elif ha_name == "HA9": + property_description = property_meta["Asset Type"].strip().lower() + if "house" in property_description: + return "House", None + + if "flat" in property_description: + return "Flat", None + + if "bungalow" in property_description: + return "Bungalow", None + + if "maisonette" in property_description: + return "Maisonette", None + + return None, None + elif ha_name == "HA12": + property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Asset_Type1"].strip()) + built_form = None + elif ha_name == "HA13": + property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Type Cd"].strip()) + built_form = None elif ha_name == "HA14": if property_meta["Asset Type Description"] == "Block - Repair": # We try and deduce if it's a flat or house, depending on if it has "room" or "flats" in the address @@ -2902,15 +3174,60 @@ def get_property_type_and_built_form(property_meta, ha_name): ] built_form = None - elif ha_name == "HA25": - property_type = PROPERTY_TYPE_LOOKUP[ha_name][property_meta["T1_AssetType"]] + elif ha_name == "HA15": + property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Property Type"].strip()) built_form = None elif ha_name == "HA16": config = PROPERTY_TYPE_LOOKUP[ha_name][property_meta["Type"]] property_type = config.get("property-type") built_form = config.get("built-form") - elif ha_name == "HA39": + elif ha_name == "HA18": + property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Asset Type"].strip()) + built_form = None + elif ha_name == "HA19": + property_type = property_meta["Dwelling Type"] + built_form = None + elif ha_name == "HA24": + property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Property Type"].strip()) + built_form = None + elif ha_name == "HA25": + property_type = PROPERTY_TYPE_LOOKUP[ha_name][property_meta["T1_AssetType"]] + built_form = None + elif ha_name == "HA27": + property_type = property_meta["Property Type"] + built_form = None + elif ha_name == "HA28": + property_type = PROPERTY_TYPE_LOOKUP[ha_name][property_meta["Property Type - Academy"]] + built_form = None + elif ha_name == "HA30": + property_type = PROPERTY_TYPE_LOOKUP[ha_name][property_meta["A_AssetType"]] + built_form = None + elif ha_name == "HA31": + property_description = property_meta["A_AssetType"].strip().lower() + if "house" in property_description: + return "House", None + if "flat" in property_description: + return "Flat", None + + if "bungalow" in property_description: + return "Bungalow", None + + if "maisonette" in property_description: + return "Maisonette", None + + return None, None + + elif ha_name == "HA32": + property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Dwelling type"].strip()) + built_form = None + elif ha_name == "HA34": + property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Property Type"].strip()) + built_form = None + elif ha_name == "HA35": + property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Property Type Grouping"].strip()) + built_form = None + elif ha_name == "HA39": property_type_config = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["ConstructionStyle"], {}) property_type = property_type_config.get("property_type", None) built_form = property_type_config.get("built_form", None) @@ -2921,11 +3238,35 @@ def get_property_type_and_built_form(property_meta, ha_name): property_type = "Flat" else: property_type = "House" + elif ha_name == "HA41": + property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Archetype"].strip()) + built_form = None + elif ha_name == "HA48": + property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Property Type"].strip()) + built_form = None + elif ha_name == "HA49": + property_type = property_meta["Property Class"].strip() + built_form = None + elif ha_name == "HA54": + property_type = property_meta["Property Type"] + built_form = None + elif ha_name == "HA56": + property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Dwelling Type Description"].strip()) + built_form = None + elif ha_name == "HA63": + property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["PropertyType"].strip()) + built_form = None elif ha_name == "HA107": - property_type = property_meta.get("property_type", None) built_form = property_meta.get("built_form", None) - + elif ha_name == "HA117": + property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Property Type"].strip()) + built_form = None + elif ha_name == "HAXX": + return property_meta["Property Type"].split(":")[0].strip(), None + elif ha_name == "HAXXX": + property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Unit Description"].strip()) + built_form = None else: raise NotImplementedError("Implement me") @@ -5119,9 +5460,16 @@ def forecast_remaining_sales(loader): def fml_data_pull(loader): - has_bruh = ["HA7", "HA14", "HA25", "HA39", "HA16", - # Do these - "HA1", "HA13", "HA50", "HA24"] + has_bruh = [ + # "HA7", "HA14", "HA25", "HA39", "HA16", "HA28", + # Updated get_property_type_and_built_form, still needs running + "HA13", "HA50", "HA24", "HA15", "HA32", "HA28", "HA6", "HA1", "HA107", "HA41", "HA48", "HA2", "HA63", "HA12", + "HA117", "HA35", "HA34", "HA56", "HA19", "HA18", "HA9", "HA27", "HA30", "HA31", "HA54", "HA49", + # todo + ] + + # Can't pull from EPC database because it's based in Scotland + # "HAXXX", "HAXX" # DO from backend.SearchEpc import SearchEpc epc_api_key = "a2Nvbm5rb3dsZXNzYXJAZ21haWwuY29tOjY5MGJiMWM0NmIyOGI5ZDUxYzAxMzQzYzNiZGNlZGJjZDNmODQwMzA=" @@ -5134,14 +5482,24 @@ def fml_data_pull(loader): # For each property, search for the latest EPC epc_data = [] for _, row in tqdm(fml.iterrows(), total=fml.shape[0]): + property_type, _ = get_property_type_and_built_form(property_meta=row, ha_name=ha) + + if ha == "HAXXX": + to_join = [str(x) for x in + [row["Door Number"], row["Address Line 1"], row["Address Line 2"], row["Address Line 3"], + row["Postcode"]] if x is not None] + full_address = ", ".join(to_join) + else: + full_address = row["matching_address"] + searcher = SearchEpc( - address1=row["HouseNo"], + address1=str(row["HouseNo"]), postcode=row["matching_postcode"], auth_token=epc_api_key, os_api_key="", property_type=property_type, - full_address=row["matching_address"], + full_address=full_address, ) # Force the skipping of estimating the EPC searcher.ordnance_survey_client.property_type = None @@ -5194,7 +5552,7 @@ def classify_loft(x): def fml_analysis(loader): assumed_ciga_pass_rate = 0.731 - has_bruh = ["HA7", "HA14", "HA25", "HA39", "HA16"] + has_bruh = ["HA7", "HA14", "HA25", "HA39", "HA16", "HA1"] no_ciga_cavity_descriptions = [ "Cavity wall, as built, insulated (assumed)", From 6423ab2fac732a905645260263ebc72149424712 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 15 Mar 2024 17:53:18 +0000 Subject: [PATCH 142/248] data pull pipeline ready --- backend/SearchEpc.py | 11 +- .../ha_15_32/ha_analysis_batch_3.py | 100 ++++++++++-------- 2 files changed, 61 insertions(+), 50 deletions(-) diff --git a/backend/SearchEpc.py b/backend/SearchEpc.py index 3d2df9fb..cc2ee4a9 100644 --- a/backend/SearchEpc.py +++ b/backend/SearchEpc.py @@ -147,6 +147,7 @@ class SearchEpc: uprn: [int, None] = None, size=None, property_type=None, + fast=False ): """ Address lines 1 and postcode are mandatory fields. The other address lines are optional @@ -187,6 +188,7 @@ class SearchEpc: self.size = size if size is not None else 25 self.property_type = property_type + self.fast = fast @classmethod def get_house_number(cls, address: str) -> str | None: @@ -365,9 +367,6 @@ class SearchEpc: # Finally, we identify the newest epc and the rest, and then return newest_epc, older_epcs = self.filter_newest_epc(list_of_epcs=rows) - # Retrieve postcode and address - address_epc, postcode_epc = self.format_address(newest_epc=newest_epc) - # Ge the uprn from the newest record for this home uprns = {r["uprn"] for r in rows if r["uprn"]} # We can sometimes have no uprn for a property @@ -384,6 +383,12 @@ class SearchEpc: uprn = uprns.pop() if uprns else None + if self.fast: + return newest_epc, [], {}, "", "", None + + # Retrieve postcode and address + address_epc, postcode_epc = self.format_address(newest_epc=newest_epc) + return newest_epc, older_epcs, full_sap_epc, address_epc, postcode_epc, uprn @staticmethod diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 902d48fd..7db97733 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -5461,9 +5461,9 @@ def forecast_remaining_sales(loader): def fml_data_pull(loader): has_bruh = [ - # "HA7", "HA14", "HA25", "HA39", "HA16", "HA28", + # "HA7", "HA14", "HA25", "HA39", "HA16", "HA28", "HA13", # Updated get_property_type_and_built_form, still needs running - "HA13", "HA50", "HA24", "HA15", "HA32", "HA28", "HA6", "HA1", "HA107", "HA41", "HA48", "HA2", "HA63", "HA12", + "HA50", "HA24", "HA15", "HA32", "HA28", "HA6", "HA1", "HA107", "HA41", "HA48", "HA2", "HA63", "HA12", "HA117", "HA35", "HA34", "HA56", "HA19", "HA18", "HA9", "HA27", "HA30", "HA31", "HA54", "HA49", # todo ] @@ -5474,57 +5474,63 @@ def fml_data_pull(loader): from backend.SearchEpc import SearchEpc epc_api_key = "a2Nvbm5rb3dsZXNzYXJAZ21haWwuY29tOjY5MGJiMWM0NmIyOGI5ZDUxYzAxMzQzYzNiZGNlZGJjZDNmODQwMzA=" + failed_has = [] for ha in has_bruh: - asset_list = loader.data[ha]["asset_list"].copy() - # properties found as eligibile - fml = asset_list[asset_list["ECO Eligibility"] != "not eligible"] + print(f"Pulling data for {ha}") + try: + asset_list = loader.data[ha]["asset_list"].copy() + # properties found as eligibile + fml = asset_list[asset_list["ECO Eligibility"] != "not eligible"] - # For each property, search for the latest EPC - epc_data = [] - for _, row in tqdm(fml.iterrows(), total=fml.shape[0]): + # For each property, search for the latest EPC + epc_data = [] + for _, row in tqdm(fml.iterrows(), total=fml.shape[0]): - property_type, _ = get_property_type_and_built_form(property_meta=row, ha_name=ha) + property_type, _ = get_property_type_and_built_form(property_meta=row, ha_name=ha) - if ha == "HAXXX": - to_join = [str(x) for x in - [row["Door Number"], row["Address Line 1"], row["Address Line 2"], row["Address Line 3"], - row["Postcode"]] if x is not None] - full_address = ", ".join(to_join) - else: - full_address = row["matching_address"] + if ha == "HAXXX": + to_join = [str(x) for x in + [row["Door Number"], row["Address Line 1"], row["Address Line 2"], row["Address Line 3"], + row["Postcode"]] if x is not None] + full_address = ", ".join(to_join) + else: + full_address = row["matching_address"] - searcher = SearchEpc( - address1=str(row["HouseNo"]), - postcode=row["matching_postcode"], - auth_token=epc_api_key, - os_api_key="", - property_type=property_type, - full_address=full_address, + searcher = SearchEpc( + address1=str(row["HouseNo"]), + postcode=row["matching_postcode"], + auth_token=epc_api_key, + os_api_key="", + property_type=property_type, + full_address=full_address, + fast=True + ) + # Force the skipping of estimating the EPC + searcher.ordnance_survey_client.property_type = None + searcher.ordnance_survey_client.built_form = None + + searcher.find_property(skip_os=True) + if searcher.newest_epc is None: + continue + + epc = { + "asset_list_row_id": row["asset_list_row_id"], + **searcher.newest_epc.copy() + } + + epc_data.append(epc) + + # Remove None entries + epc_data = [x for x in epc_data if x is not None] + # Save the data in S3 as a parquet + epc_data_df = pd.DataFrame(epc_data) + save_pickle_to_s3( + data=epc_data_df, + bucket_name="retrofit-datalake-dev", + s3_file_name=f"ha-analysis/revised/{ha}/epc_data.pickle" ) - # Force the skipping of estimating the EPC - searcher.ordnance_survey_client.property_type = None - searcher.ordnance_survey_client.built_form = None - - searcher.find_property(skip_os=True) - if searcher.newest_epc is None: - continue - - epc = { - "asset_list_row_id": row["asset_list_row_id"], - **searcher.newest_epc.copy() - } - - epc_data.append(epc) - - # Remove None entries - epc_data = [x for x in epc_data if x is not None] - # Save the data in S3 as a parquet - epc_data_df = pd.DataFrame(epc_data) - save_pickle_to_s3( - data=epc_data_df, - bucket_name="retrofit-datalake-dev", - s3_file_name=f"ha-analysis/revised/{ha}/epc_data.pickle" - ) + except Exception as e: + failed_has.append(ha) def extract_lower_bound(age_band): From 4e077053cd73b4e6cd27392440e4e179846f6f9a Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sat, 16 Mar 2024 14:51:39 +0000 Subject: [PATCH 143/248] Adding gbis to output --- .../ha_15_32/ha_analysis_batch_3.py | 92 +++++++++++++++---- 1 file changed, 74 insertions(+), 18 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 7db97733..0ca28927 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -3247,6 +3247,9 @@ def get_property_type_and_built_form(property_meta, ha_name): elif ha_name == "HA49": property_type = property_meta["Property Class"].strip() built_form = None + elif ha_name == "HA50": + property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Property Type"].strip()) + built_form = None elif ha_name == "HA54": property_type = property_meta["Property Type"] built_form = None @@ -5685,12 +5688,6 @@ def fml_analysis(loader): fuck_this["roof_classiciation"] = fuck_this.apply(lambda x: classify_loft(x), axis=1) - # fuck_this["construction-age-band"] = fuck_this["construction-age-band"].apply( - # lambda x: EPCDataProcessor.clean_construction_age_band(x) - # ) - # - # fuck_this['age_lower_bound'] = fuck_this['construction-age-band'].apply(extract_lower_bound) - had_survey = fuck_this[fuck_this["estimated"] == False] # proportion with a survey: @@ -5716,10 +5713,6 @@ def fml_analysis(loader): ] # Characterise no CIGA check needed - ciga_check_needed = had_survey[ - had_survey["ECO Eligibility"].str.contains("subject to ciga") - ].copy() - ciga_check_passed = had_survey[had_survey["ECO Eligibility"] == "eco4 - passed ciga"] # These should be treated the same as one that have passed their ciga checks, from a detection perspective ciga_check_passed_eligible = ciga_check_passed[ @@ -5743,20 +5736,60 @@ def fml_analysis(loader): # differ between variables; floor and wall type errors occur in ~10-15% of EPCs, # compared with ~5% for wall insulation and glazing performance + ciga_check_needed = had_survey[ + had_survey["ECO Eligibility"].str.contains("subject to ciga") + ].copy() + ciga_check_needed_eligible = ciga_check_needed[ (ciga_check_needed["walls-description"].str.lower().str.contains("cavity") == True) & (ciga_check_needed["roof_classiciation"].isin(["high", "medium"])) & (ciga_check_needed["current-energy-efficiency"].astype(float) <= 80) ] + # Finally, characterise gbis properties. Some of the business might look like ECO4 work, whereas we then + # qualify what actually looks like gbis + gbis_identified = had_survey[ + had_survey["ECO Eligibility"] == "gbis" + ].copy() + + gbis_looks_like_eco4 = gbis_identified[ + (gbis_identified["walls-description"].isin(no_ciga_cavity_descriptions)) & + (gbis_identified["roof_classiciation"].isin(["high", "medium"])) & + (gbis_identified["current-energy-efficiency"].astype(float) <= 80) & + ( + ( + (gbis_identified["property-type"] == "House") & + (gbis_identified["built-form"] != "Mid-Terrace") + ) | ( + (gbis_identified["property-type"] == "Bungalow") & + (gbis_identified["built-form"].isin(["Detached"])) + ) + ) + ] + + gbis_qualified = gbis_identified[ + (gbis_identified["walls-description"].isin(no_ciga_cavity_descriptions)) & + (gbis_identified["current-energy-efficiency"].astype(float) <= 80) & + (~gbis_identified["asset_list_row_id"].isin(gbis_looks_like_eco4["asset_list_row_id"].values)) + ] + ciga_check_expectation = np.round(ciga_check_needed_eligible.shape[0] * ha_ciga_pass_rate) without_ciga_expectation = no_ciga_check_needed_eligible.shape[0] passed_ciga_expectation = ciga_check_passed_eligible.shape[0] + identified_as_gbis_looks_like_eco4 = gbis_looks_like_eco4.shape[0] # Need to add on the non-ciga - total_expectation = ciga_check_expectation + without_ciga_expectation + passed_ciga_expectation + total_eco4_expectation = ( + ciga_check_expectation + + without_ciga_expectation + + passed_ciga_expectation + + identified_as_gbis_looks_like_eco4 + ) - total_gbis_expectation = no_ciga_check_needed_eligible_gbis.shape[0] + no_ciga_check_needed_actually_gbis = no_ciga_check_needed_eligible_gbis.shape[0] + gbis_qualified = gbis_qualified.shape[0] + + total_gbis_expectation = no_ciga_check_needed_actually_gbis + gbis_qualified if proportion_with_survey < 100: # We estimate the rest @@ -5805,14 +5838,38 @@ def fml_analysis(loader): without_survey_eco4.shape[0] * (total_gbis_expectation / no_ciga_check_needed.shape[0]) ) - total_expectation = ( - total_expectation + + # And gbis + without_survey_gbis = fuck_this[ + (fuck_this["estimated"] == True) & + (fuck_this["ECO Eligibility"] == "gbis") + ] + + if without_survey_gbis.empty: + without_survey_identified_as_gbis_qualified = 0 + without_survey_identified_as_gbis_eco4 = 0 + else: + # We apply the same conversion rate as the properties with a survey + without_survey_identified_as_gbis_qualified = np.round( + without_survey_gbis.shape[0] * (gbis_qualified / gbis_identified.shape[0]) + ) + + without_survey_identified_as_gbis_eco4 = np.round( + without_survey_eco4.shape[0] * (identified_as_gbis_looks_like_eco4 / gbis_identified.shape[0]) + ) + + total_eco4_expectation = ( + total_eco4_expectation + without_survey_without_ciga_expected + without_survey_passed_ciga_expected + - without_survey_eco4_expected + without_survey_eco4_expected + + without_survey_identified_as_gbis_eco4 ) - total_gbis_expectation = total_gbis_expectation + without_survey_gbis_expected + total_gbis_expectation = ( + total_gbis_expectation + + without_survey_gbis_expected + + without_survey_identified_as_gbis_qualified + ) surveys = loader.data[ha_name]["survey_list"] sold_now = 0 @@ -5829,9 +5886,8 @@ def fml_analysis(loader): "Original ECO4 Estimate - Remaining": original_remaining, "Postcode List - Remaining": postcode_list_remaining, # "Of which sold": sales_since_nov, - "Of which ECO4 Eligible - Remaining": int(total_expectation), + "Of which ECO4 Eligible - Remaining": int(total_eco4_expectation), "Of which GBIS Eligibile - Remaining": int(total_gbis_expectation), - # "Proportion with a survey": proportion_with_survey, } ) From a7ed3b84e560ea3e92517f8568bc7918e352d0e7 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sun, 17 Mar 2024 14:12:49 +0000 Subject: [PATCH 144/248] Added HA8 --- .../ha_15_32/ha_analysis_batch_3.py | 98 ++++++++++++++++++- 1 file changed, 93 insertions(+), 5 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 0ca28927..67139e40 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -517,6 +517,11 @@ class DataLoader: asset_list["Address3"].astype(str).str.lower().str.strip() + ", " + \ asset_list["Postcode"].astype(str).str.lower().str.strip() asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip() + elif ha_name == "HA8": + asset_list["matching_address"] = asset_list["AddressLine1"].astype(str).str.lower().str.strip() + ", " + \ + asset_list["AddressLine2"].astype(str).str.lower().str.strip() + ", " + \ + asset_list["Postcode"].astype(str).str.lower().str.strip() + asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip() elif ha_name == "HA9": asset_list["matching_address"] = asset_list["House Number"].astype(str).str.lower().str.strip() + ", " + \ asset_list["Address Line 1"].astype(str).str.lower().str.strip() + ", " + \ @@ -2293,6 +2298,30 @@ class DataLoader: def correct_ha49_survey_list(survey_list): return survey_list + @staticmethod + def correct_ha8_survey_list(survey_list): + # Split on / and take the first half + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.split("/").str[0] + + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "WESTONIA COURT HOUSE", "Westonia Court" + ) + + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "Hillesdon Avenue", "Hillesden Avenue" + ) + + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "Weston Street", "Western Street" + ) + + # Remove placeholder rows where postcode is missing + survey_list = survey_list[ + ~pd.isnull(survey_list["Post Code"]) + ] + + return survey_list + @staticmethod def levenstein_match(matching_string, df): match_to = df["matching_address"].tolist() @@ -5464,7 +5493,7 @@ def forecast_remaining_sales(loader): def fml_data_pull(loader): has_bruh = [ - # "HA7", "HA14", "HA25", "HA39", "HA16", "HA28", "HA13", + "HA7", "HA14", "HA25", "HA39", "HA16", "HA28", "HA13", # Updated get_property_type_and_built_form, still needs running "HA50", "HA24", "HA15", "HA32", "HA28", "HA6", "HA1", "HA107", "HA41", "HA48", "HA2", "HA63", "HA12", "HA117", "HA35", "HA34", "HA56", "HA19", "HA18", "HA9", "HA27", "HA30", "HA31", "HA54", "HA49", @@ -5561,7 +5590,13 @@ def classify_loft(x): def fml_analysis(loader): assumed_ciga_pass_rate = 0.731 - has_bruh = ["HA7", "HA14", "HA25", "HA39", "HA16", "HA1"] + has_bruh = [ + "HA7", "HA14", "HA25", "HA39", "HA16", "HA28", "HA13", + # Updated get_property_type_and_built_form, still needs running + "HA50", "HA24", "HA15", "HA32", "HA28", "HA6", "HA1", "HA107", "HA41", "HA48", "HA2", "HA63", "HA12", + "HA117", "HA35", "HA34", "HA56", "HA19", "HA18", "HA9", "HA27", "HA30", "HA31", "HA54", "HA49", + # todo + ] no_ciga_cavity_descriptions = [ "Cavity wall, as built, insulated (assumed)", @@ -5597,12 +5632,13 @@ def fml_analysis(loader): results = [] wall_descriptions = [] - for ha_name in has_bruh: + for ha_name in tqdm(has_bruh): original_figures = loader.december_figures[ loader.december_figures["HA Name"] == ha_name ].copy() original_remaining = original_figures["ECO4 remaining"].values[0] + original_gbis_remaining = original_figures["GBIS remaining"].values[0] postcode_list_remaining = remaining_eligible_mapping[ha_name] # Read in the epc data @@ -5669,7 +5705,54 @@ def fml_analysis(loader): raise Exception("SOMETHING WENT WRONG") if any(fuck_this["ECO Eligibility"].str.contains("subject to archetype")): - raise Exception("DO THE DAMN ARCHETYPE CHECK BRO") + # We perform the archetype test. If the property is a house, we it needs to be detached, semi-detached + # or end terrace. If it's a bungalow, it must be attached + fuck_this["passes_archetype"] = None + fuck_this["passes_archetype"] = np.where( + (fuck_this["property-type"] == "House") & + (fuck_this["built-form"].isin(["Semi-Detached", "End-Terrace", "Detached"])), + True, + fuck_this["passes_archetype"] + ) + + fuck_this["passes_archetype"] = np.where( + (fuck_this["property-type"] == "Bungalow") & + (fuck_this["built-form"].isin(["Detached"])), + True, + fuck_this["passes_archetype"] + ) + + fuck_this["ECO Eligibility"] = np.where( + (fuck_this["ECO Eligibility"] == "eco4 (subject to ciga) (subject to archetype)") & + (fuck_this["passes_archetype"] == True), + "eco4 (subject to ciga)", + fuck_this["ECO Eligibility"] + ) + + # If failed the archetype check and needs a CIGA, it's not eligibile + fuck_this["ECO Eligibility"] = np.where( + (fuck_this["ECO Eligibility"] == "eco4 (subject to ciga) (subject to archetype)") & + (fuck_this["passes_archetype"] != True), + "not eligible", + fuck_this["ECO Eligibility"] + ) + + fuck_this["ECO Eligibility"] = np.where( + (fuck_this["ECO Eligibility"] == "eco4 (subject to archetype)") & + (fuck_this["passes_archetype"] == True), + "eco4", + fuck_this["ECO Eligibility"] + ) + + fuck_this["ECO Eligibility"] = np.where( + (fuck_this["ECO Eligibility"] == "eco4 (subject to archetype)") & + (fuck_this["passes_archetype"] != True), + "gbis", + fuck_this["ECO Eligibility"] + ) + + if any(fuck_this["ECO Eligibility"].str.contains("subject to archetype")): + raise Exception("DO THE DAMN ARCHETYPE CHECK BRO") # clean roof insulation fuck_this["roof_insulation_thickness"] = fuck_this["roof_insulation_thickness"].fillna("0") @@ -5685,6 +5768,9 @@ def fml_analysis(loader): fuck_this["roof_insulation_thickness"] = fuck_this[ "roof_insulation_thickness" ].str.replace("average", "150") + fuck_this["roof_insulation_thickness"] = fuck_this[ + "roof_insulation_thickness" + ].str.replace("above 150", "150") fuck_this["roof_classiciation"] = fuck_this.apply(lambda x: classify_loft(x), axis=1) @@ -5884,6 +5970,7 @@ def fml_analysis(loader): { "HA Name": ha_name, "Original ECO4 Estimate - Remaining": original_remaining, + "Original GGBIS Estimate - Remaining": original_gbis_remaining, "Postcode List - Remaining": postcode_list_remaining, # "Of which sold": sales_since_nov, "Of which ECO4 Eligible - Remaining": int(total_eco4_expectation), @@ -5927,7 +6014,8 @@ def app(): "HA1", "HA2", "HA6", "HA7", "HA9", "HA12", "HA13", "HA14", "HA15", "HA16", "HA18", "HA19", "HA24", "HA25", "HA27", "HA28", "HA30", "HA31", "HA32", "HA34", "HA35", "HA39", "HA41", "HA48", "HA49", "HA50", "HA54", "HA56", "HA63", "HA107", "HA117", - + # Added as of March 17th + "HA8", # New HAS "HAXX", "HAXXX", ] From 94ad06726320972b02db779b8f2e9440a0ea9c0e Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sun, 17 Mar 2024 14:25:49 +0000 Subject: [PATCH 145/248] done ha11 --- etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 67139e40..920ec1b6 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -530,6 +530,12 @@ class DataLoader: asset_list["Address Line 4"].astype(str).str.lower().str.strip() + ", " + \ asset_list["Postcode"].astype(str).str.lower().str.strip() asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip() + elif ha_name == "HA11": + asset_list["matching_address"] = asset_list["Address 1"].astype(str).str.lower().str.strip() + ", " + \ + asset_list["Address 2"].astype(str).str.lower().str.strip() + ", " + \ + asset_list["Address 3"].astype(str).str.lower().str.strip() + ", " + \ + asset_list["Post Code"].astype(str).str.lower().str.strip() + asset_list["matching_postcode"] = asset_list["Post Code"].astype(str).str.lower().str.strip() elif ha_name == "HA13": asset_list["matching_address"] = asset_list["Address 1"].astype(str).str.lower().str.strip() + ", " + \ asset_list["address 2"].astype(str).str.lower().str.strip() + ", " + \ @@ -2322,6 +2328,15 @@ class DataLoader: return survey_list + @staticmethod + def correct_ha11_survey_list(survey_list): + # Remove 39 HOLLYWOOD WAY as it's not in the asset list + survey_list = survey_list[ + ~((survey_list["Street / Block Name"] == "HOLLYWOOD WAY") & + (survey_list["NO."] == 39)) + ] + return survey_list + @staticmethod def levenstein_match(matching_string, df): match_to = df["matching_address"].tolist() @@ -6015,7 +6030,7 @@ def app(): "HA27", "HA28", "HA30", "HA31", "HA32", "HA34", "HA35", "HA39", "HA41", "HA48", "HA49", "HA50", "HA54", "HA56", "HA63", "HA107", "HA117", # Added as of March 17th - "HA8", + "HA8", "HA11", # New HAS "HAXX", "HAXXX", ] From 9bbcbc881f3f1c50ab8ec422c5b38f04e864e676 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sun, 17 Mar 2024 14:42:24 +0000 Subject: [PATCH 146/248] Added ha21 --- etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 920ec1b6..e9de4695 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -573,6 +573,12 @@ class DataLoader: asset_list["Postcode"].astype(str).str.lower().str.strip() ) asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip() + elif ha_name == "HA21": + asset_list["matching_address"] = ( + asset_list["Address"].astype(str).str.lower().str.strip() + ", " + + asset_list["PostCode"].astype(str).str.lower().str.strip() + ) + asset_list["matching_postcode"] = asset_list["PostCode"].astype(str).str.lower().str.strip() elif ha_name == "HA25": asset_list["matching_address"] = asset_list[ self.COLUMN_CONFIG[ha_name]["address"] @@ -6030,7 +6036,7 @@ def app(): "HA27", "HA28", "HA30", "HA31", "HA32", "HA34", "HA35", "HA39", "HA41", "HA48", "HA49", "HA50", "HA54", "HA56", "HA63", "HA107", "HA117", # Added as of March 17th - "HA8", "HA11", + "HA8", "HA11", "HA21", # New HAS "HAXX", "HAXXX", ] @@ -6038,7 +6044,7 @@ def app(): # back on this], 28 [DONE], 41 [DONE], 50 [DONE], 48 [DONE], 2 [DONE], 63 [DONE], 12 [DONE], 117 [DONE], 13 [DONE], # 35 [DONE], 56 [DONE], 19 [DONE], 18 [DONE], 9 [DONE], 27 [DONE], 34 [DONE], 30 [DONE], 31 [DONE], 54 [DONE] # - # Consider for ECO4: + # Consider for ECO4: HA 70 - have to merge ECO3 list though, HA17 has LOTs of assets, but the asset list is a mess # Consider for GBIS: # Ignore for now: # 38 [problematic, but no ECO4], 10 problematic (no eligibility), 20 has barely any in From 897d58eec2ecc1e51d4a46878918f6c019a2705c Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 18 Mar 2024 10:40:12 +0000 Subject: [PATCH 147/248] Added ha44 --- .../ha_15_32/ha_analysis_batch_3.py | 189 +++++++++++++++++- 1 file changed, 178 insertions(+), 11 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index e9de4695..dc96d403 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -78,6 +78,29 @@ PROPERTY_TYPE_LOOKUP = { "End Terraced": "End-Terrace", } }, + "HA8": { + "House": "House", + "Flat": "Flat", + "Bungalow": "Bungalow", + "Maisonette": "Maisonette", + "Bedsit": None, + "Room": None, + "Other": None, + "Commerical": None + }, + "HA11": { + "Flat": "Flat", + "House": "House", + "Semi-Det House": "House", + "Bedsit": None, + "End-Terr House": "House", + "Mid-Terr House": "House", + "Bungalow": "Bungalow", + "Maisonette": "Maisonette", + "End Terr Flat": "Flat", + "Mid Terr Flat": "Flat", + "Detached Flat": "Flat", + }, "HA12": { "House": "House", "Flat": "Flat", @@ -244,6 +267,13 @@ PROPERTY_TYPE_LOOKUP = { "3 Bedroom Unknown": None, "4 Bedroom Unknown": None, }, + "HA37": { + "FLT": "Flat", + "HSE": "House", + "BNW": "Bungalow", + "MAS": "Maisonette", + "HSL": None + }, "HA39": { "Semi house": {"property_type": "House", "built_form": "Semi-Detached"}, "1st floor flat": {"property_type": "Flat", "built_form": None}, @@ -291,6 +321,21 @@ PROPERTY_TYPE_LOOKUP = { 'Bungalow 1919-1945': 'Bungalow', 'Office': None }, + "HA42": { + 'Flat': 'Flat', + 'House': 'House', + 'Flat Basement': 'Flat', + 'Room': None, + 'Bedsit Flat': 'Flat', + 'Maisonette': 'Maisonette', + 'Scheme Office': None, + 'Scheme Lounge': None, + 'Bungalow': 'Bungalow', + 'Garage': None, + 'Scheme Sleep Room': None, + 'Cluster': None, + 'Scheme Room': None + }, "HA48": { "House": "House", "Flat": "Flat", @@ -626,6 +671,12 @@ class DataLoader: asset_list["Address Line 4"].astype(str).str.lower().str.strip() + ", " + \ asset_list["Address Post Code"].astype(str).str.lower().str.strip() asset_list["matching_postcode"] = asset_list["Address Post Code"].astype(str).str.lower().str.strip() + elif ha_name == "HA37": + asset_list["matching_address"] = asset_list["ADDRESS LINE 1"].astype(str).str.lower().str.strip() + ", " + \ + asset_list["ADDRESS LINE 2"].astype(str).str.lower().str.strip() + ", " + \ + asset_list["ADDRESS LINE 3"].astype(str).str.lower().str.strip() + ", " + \ + asset_list["POSTCODE"].astype(str).str.lower().str.strip() + asset_list["matching_postcode"] = asset_list["POSTCODE"].astype(str).str.lower().str.strip() elif ha_name == "HA38": asset_list["matching_address"] = asset_list["House_Number"].astype(str).str.lower().str.strip() + ", " + \ asset_list["Address_Line_1"].astype(str).str.lower().str.strip() + ", " + \ @@ -650,6 +701,18 @@ class DataLoader: asset_list["AddressLine5"].astype(str).str.lower().str.strip() + ", " + \ asset_list["Postcode"].astype(str).str.lower().str.strip() asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip() + elif ha_name == "HA42": + asset_list["matching_address"] = asset_list["Dwelling Number"].astype(str).str.lower().str.strip() + " " + \ + asset_list["Street"].astype(str).str.lower().str.strip() + ", " + \ + asset_list["Locality"].astype(str).str.lower().str.strip() + ", " + \ + asset_list["Town"].astype(str).str.lower().str.strip() + ", " + \ + asset_list["Postcode"].astype(str).str.lower().str.strip() + asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip() + elif ha_name == "HA44": + asset_list["matching_address"] = asset_list["Address 1"].astype(str).str.lower().str.strip() + ", " + \ + asset_list["Address 2"].astype(str).str.lower().str.strip() + ", " + \ + asset_list["Postal Code"].astype(str).str.lower().str.strip() + asset_list["matching_postcode"] = asset_list["Postal Code"].astype(str).str.lower().str.strip() elif ha_name == "HA50": asset_list["matching_address"] = asset_list["Address Line 1"].astype(str).str.lower().str.strip() + ", " + \ asset_list["Post Code"].astype(str).str.lower().str.strip() @@ -1177,6 +1240,66 @@ class DataLoader: asset_list["matching_address"] ) + asset_list["HouseNo"] = np.where( + (asset_list["Address_Line_1"].isin( + [ + "10 SOUTH VIEW/FLAT C", + ] + )), + "10C", + asset_list["HouseNo"] + ) + + asset_list["matching_address"] = np.where( + (asset_list["Address_Line_1"].isin( + [ + "10 SOUTH VIEW/FLAT C", + ] + )), + "FLAT c, spennymoor, co. durham, dl16 7df, 10c, 10 south view", + asset_list["matching_address"] + ) + + asset_list["HouseNo"] = np.where( + (asset_list["Address_Line_1"].isin( + [ + "10 SOUTH VIEW/FLAT D", + ] + )), + "10D", + asset_list["HouseNo"] + ) + + asset_list["matching_address"] = np.where( + (asset_list["Address_Line_1"].isin( + [ + "10 SOUTH VIEW/FLAT D", + ] + )), + "FLAT d, spennymoor, co. durham, dl16 7df, 10d, 10 south view", + asset_list["matching_address"] + ) + + asset_list["HouseNo"] = np.where( + (asset_list["Address_Line_1"].isin( + [ + "10 SOUTH VIEW/FLAT E", + ] + )), + "10E", + asset_list["HouseNo"] + ) + + asset_list["matching_address"] = np.where( + (asset_list["Address_Line_1"].isin( + [ + "10 SOUTH VIEW/FLAT E", + ] + )), + 'FLAT e, spennymoor, co. durham, dl16 7df, 10e, 10 south view', + asset_list["matching_address"] + ) + return asset_list @staticmethod @@ -1730,6 +1853,13 @@ class DataLoader: survey_list["Street / Block Name"] ) + survey_list["Post Code"] = np.where( + (survey_list["Street / Block Name"] == "BEECH ROAD") & + (survey_list["Post Code"] == "DH6 1JD"), + "DH6 1JB", + survey_list["Post Code"] + ) + return survey_list @staticmethod @@ -2343,6 +2473,18 @@ class DataLoader: ] return survey_list + @staticmethod + def correct_ha42_survey_list(survey_list): + # original asset list has nothing in the street + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "Turnstone Terrace", "" + ) + + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "Pegasus place", "" + ) + return survey_list + @staticmethod def levenstein_match(matching_string, df): match_to = df["matching_address"].tolist() @@ -2926,7 +3068,7 @@ class DataLoader: "eco4 subject to ciga": "eco4 (subject to ciga)", "eco4 (subject to archetype/ciga)": "eco4 (subject to ciga) (subject to archetype)", "eco4( subject to ciga/archetype)": "eco4 (subject to ciga) (subject to archetype)", - "eco4 (subject to ciga/ archetype)": "eco4 (subject to ciga) (subject to archetype)" + "eco4 (subject to ciga/ archetype)": "eco4 (subject to ciga) (subject to archetype)", } ha_facts_and_figures = [] @@ -3189,6 +3331,9 @@ def get_property_type_and_built_form(property_meta, ha_name): elif ha_name == "HA7": property_type = PROPERTY_TYPE_LOOKUP[ha_name]["property_type"].get(property_meta["Archetype"]) built_form = PROPERTY_TYPE_LOOKUP[ha_name]["built_form"].get(property_meta["Property Type"]) + elif ha_name == "HA8": + property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Property Type"].strip()) + built_form = None elif ha_name == "HA9": property_description = property_meta["Asset Type"].strip().lower() if "house" in property_description: @@ -3204,6 +3349,9 @@ def get_property_type_and_built_form(property_meta, ha_name): return "Maisonette", None return None, None + elif ha_name == "HA11": + property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Property Type"].strip()) + built_form = None elif ha_name == "HA12": property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Asset_Type1"].strip()) built_form = None @@ -3237,6 +3385,21 @@ def get_property_type_and_built_form(property_meta, ha_name): elif ha_name == "HA19": property_type = property_meta["Dwelling Type"] built_form = None + elif ha_name == "HA21": + property_description = property_meta["Property Type"].strip().lower() + if "house" in property_description: + return "House", None + + if "flat" in property_description: + return "Flat", None + + if "bungalow" in property_description: + return "Bungalow", None + + if "maisonette" in property_description: + return "Maisonette", None + + return None, None elif ha_name == "HA24": property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Property Type"].strip()) built_form = None @@ -3277,6 +3440,9 @@ def get_property_type_and_built_form(property_meta, ha_name): elif ha_name == "HA35": property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Property Type Grouping"].strip()) built_form = None + elif ha_name == "HA37": + property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["PROPERTY TYPE"].strip()) + built_form = None elif ha_name == "HA39": property_type_config = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["ConstructionStyle"], {}) property_type = property_type_config.get("property_type", None) @@ -3291,6 +3457,9 @@ def get_property_type_and_built_form(property_meta, ha_name): elif ha_name == "HA41": property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Archetype"].strip()) built_form = None + elif ha_name == "HA42": + property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Dwelling use/type"].strip()) + built_form = None elif ha_name == "HA48": property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Property Type"].strip()) built_form = None @@ -5515,10 +5684,9 @@ def forecast_remaining_sales(loader): def fml_data_pull(loader): has_bruh = [ "HA7", "HA14", "HA25", "HA39", "HA16", "HA28", "HA13", - # Updated get_property_type_and_built_form, still needs running "HA50", "HA24", "HA15", "HA32", "HA28", "HA6", "HA1", "HA107", "HA41", "HA48", "HA2", "HA63", "HA12", "HA117", "HA35", "HA34", "HA56", "HA19", "HA18", "HA9", "HA27", "HA30", "HA31", "HA54", "HA49", - # todo + 'HA8', 'HA11', 'HA21', 'HA37', 'HA42', ] # Can't pull from EPC database because it's based in Scotland @@ -5613,10 +5781,9 @@ def fml_analysis(loader): assumed_ciga_pass_rate = 0.731 has_bruh = [ "HA7", "HA14", "HA25", "HA39", "HA16", "HA28", "HA13", - # Updated get_property_type_and_built_form, still needs running "HA50", "HA24", "HA15", "HA32", "HA28", "HA6", "HA1", "HA107", "HA41", "HA48", "HA2", "HA63", "HA12", "HA117", "HA35", "HA34", "HA56", "HA19", "HA18", "HA9", "HA27", "HA30", "HA31", "HA54", "HA49", - # todo + 'HA8', 'HA11', 'HA21', 'HA37', 'HA42', ] no_ciga_cavity_descriptions = [ @@ -5639,7 +5806,7 @@ def fml_analysis(loader): "HA39", "HA14", "HA24", "HA15", "HA32", "HA28", "HA6", "HA1", "HA7", "HA16", "HA107", "HA25", "HA50", "HA41", "HA48", "HA2", "HA63", "HA12", "HA117", "HA13", "HA35", "HA34", "HA56", "HA19", "HA18", "HA9", "HA27", - "HA30", "HA31", "HA54", "HAXX", "HA49", "HAXXX" + "HA30", "HA31", "HA54", "HAXX", "HA49", "HAXXX", ] values = [ @@ -5660,7 +5827,6 @@ def fml_analysis(loader): ].copy() original_remaining = original_figures["ECO4 remaining"].values[0] original_gbis_remaining = original_figures["GBIS remaining"].values[0] - postcode_list_remaining = remaining_eligible_mapping[ha_name] # Read in the epc data asset_list = loader.data[ha_name]["asset_list"].copy() @@ -5992,10 +6158,10 @@ def fml_analysis(loader): "HA Name": ha_name, "Original ECO4 Estimate - Remaining": original_remaining, "Original GGBIS Estimate - Remaining": original_gbis_remaining, - "Postcode List - Remaining": postcode_list_remaining, + # "Postcode List - Remaining": postcode_list_remaining, # "Of which sold": sales_since_nov, - "Of which ECO4 Eligible - Remaining": int(total_eco4_expectation), - "Of which GBIS Eligibile - Remaining": int(total_gbis_expectation), + "EPC verified ECO4 Eligible - Remaining": int(total_eco4_expectation), + "EPC verified GBIS Eligibile - Remaining": int(total_gbis_expectation), } ) @@ -6036,7 +6202,8 @@ def app(): "HA27", "HA28", "HA30", "HA31", "HA32", "HA34", "HA35", "HA39", "HA41", "HA48", "HA49", "HA50", "HA54", "HA56", "HA63", "HA107", "HA117", # Added as of March 17th - "HA8", "HA11", "HA21", + "HA8", "HA11", "HA21", "HA37", "HA42", + "HA44", # New HAS "HAXX", "HAXXX", ] From c58acadb730b6e6ab1ebb700b4669ab3cf171f5b Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 18 Mar 2024 12:19:15 +0000 Subject: [PATCH 148/248] HA51 eco3 matching --- .../ha_15_32/ha_analysis_batch_3.py | 80 ++++++++++++++++--- 1 file changed, 71 insertions(+), 9 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index dc96d403..af9af514 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -491,6 +491,10 @@ class DataLoader: "address": "A_Address", "postcode": "matching_postcode" }, + "HA45": { + "address": "Full postal address", + "postcode": "Postcode" + }, "HA48": { "address": "Full Address", "postcode": "Postcode" @@ -518,7 +522,8 @@ class DataLoader: "HA50": 4, "HA63": 15, "HA107": 51, - "HA48": 0 + "HA48": 0, + "HA45": 0 } UNMATCHED_ECO3 = { @@ -527,7 +532,8 @@ class DataLoader: "HA50": 5, "HA56": 320, "HA63": 0, - "HA117": 4 + "HA117": 4, + "HA51": 24 } def __init__(self, directories, december_figures_filepath, use_cache, rebuild): @@ -542,7 +548,7 @@ class DataLoader: def create_asset_list_matching_address(self, ha_name, asset_list): - if ha_name in ["HA1", "HA6", "HA12", "HA16", "HA24", "HA30", "HA31", "HA48", "HA49", "HA54"]: + if ha_name in ["HA1", "HA6", "HA12", "HA16", "HA24", "HA30", "HA31", "HA45", "HA48", "HA49", "HA54"]: asset_list["matching_address"] = asset_list[ self.COLUMN_CONFIG[ha_name]["address"] ].astype(str).str.lower().str.strip() @@ -717,6 +723,18 @@ class DataLoader: asset_list["matching_address"] = asset_list["Address Line 1"].astype(str).str.lower().str.strip() + ", " + \ asset_list["Post Code"].astype(str).str.lower().str.strip() asset_list["matching_postcode"] = asset_list["Post Code"].astype(str).str.lower().str.strip() + elif ha_name == "HA51": + asset_list["matching_address"] = asset_list["Address Line 1"].astype(str).str.lower().str.strip() + ", " + \ + asset_list["Address Line 2"].astype(str).str.lower().str.strip() + ", " + \ + asset_list["Address Line 3"].astype(str).str.lower().str.strip() + ", " + \ + asset_list["Postcode"].astype(str).str.lower().str.strip() + asset_list["matching_address"] = np.where( + asset_list["Block"].str.strip().str.len() > 0, + asset_list["Block"].astype(str).str.lower().str.strip() + ", " + \ + asset_list["matching_address"], + asset_list["matching_address"] + ) + asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip() elif ha_name == "HA56": asset_list["matching_address"] = asset_list["Address 1"].astype(str).str.lower().str.strip() + ", " + \ asset_list["Address 2"].astype(str).str.lower().str.strip() + ", " + \ @@ -2485,6 +2503,13 @@ class DataLoader: ) return survey_list + @staticmethod + def correct_ha45_survey_list(survey_list): + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "Norwich Road", "Norwich Avenue" + ) + return survey_list + @staticmethod def levenstein_match(matching_string, df): match_to = df["matching_address"].tolist() @@ -2744,6 +2769,38 @@ class DataLoader: return eco3_list + @staticmethod + def correct_ha51_eco3_list(eco3_list): + eco3_list["Street / Block Name"] = eco3_list["Street / Block Name"].str.replace( + "HASELEMERE AVENUE", "HASLEMERE AVENUE" + ) + eco3_list["Street / Block Name"] = eco3_list["Street / Block Name"].str.replace( + "THORVILLE GROVE", "THORNVILLE GROVE" + ) + eco3_list["Street / Block Name"] = eco3_list["Street / Block Name"].str.replace( + "MONTBRETA CLOSE", "MONTBRETIA CLOSE" + ) + eco3_list["Post Code"] = np.where( + (eco3_list["Street / Block Name"] == "SYDENHAM ROAD") & + (eco3_list["Post Code"] == "CR0 2DW"), + "CR0 2ED", + eco3_list["Post Code"] + ) + # Not in asset list + eco3_list = eco3_list[ + ~((eco3_list["Street / Block Name"] == "WOODLEY LANE") & + (eco3_list["Post Code"] == "SM5 2RJ") & + (eco3_list["NO "] == "FLAT 3, 11")) + ] + + eco3_list["NO "] = np.where( + (eco3_list["NO "] == "47 B"), + "47B", + eco3_list["NO "] + ) + + return eco3_list + def merge_eco3_to_assets(self, asset_list, eco3_list, ha_name): eco3_list_correction_function = getattr(self, f"correct_{ha_name.lower()}_eco3_list") @@ -2752,7 +2809,7 @@ class DataLoader: asset_list["matching_postcode_nospace"] = asset_list["matching_postcode"].str.replace(" ", "").str.lower() eco3_list["postcode_no_space"] = eco3_list["Post Code"].str.lower().str.replace(" ", "") - if ha_name in ["HA25", "HA56"]: + if ha_name in ["HA25", "HA56", "HA51"]: # HA25: 317 -> 259 missed_postcodes = { postcode for postcode in eco3_list["postcode_no_space"] if @@ -2774,7 +2831,7 @@ class DataLoader: matching_lookup = [] missed = [] for _, row in tqdm(eco3_list.iterrows(), total=len(eco3_list)): - # if row["eco3_list_row_id"] == "HA25_Eco3_5422": + # if row["eco3_list_row_id"] == "HA51_Eco3_22": # raise Exception() postcode = row["postcode_no_space"] @@ -2813,6 +2870,12 @@ class DataLoader: missed.append(row["eco3_list_row_id"]) continue + if df.shape[0] > 1: + if "flat" in str(row["NO "]).lower(): + df = df[df["matching_address"].str.contains("flat")] + else: + df = df[~df["matching_address"].str.contains("flat")] + if df.shape[0] != 1: print(row["Street / Block Name"]) print(house_number) @@ -6200,10 +6263,9 @@ def app(): priority_has = [ "HA1", "HA2", "HA6", "HA7", "HA9", "HA12", "HA13", "HA14", "HA15", "HA16", "HA18", "HA19", "HA24", "HA25", "HA27", "HA28", "HA30", "HA31", "HA32", "HA34", "HA35", "HA39", "HA41", "HA48", "HA49", "HA50", "HA54", "HA56", - "HA63", "HA107", "HA117", - # Added as of March 17th - "HA8", "HA11", "HA21", "HA37", "HA42", - "HA44", + "HA63", "HA107", "HA117", "HA8", "HA11", "HA21", "HA37", "HA42", + # Added as of March 18th + "HA44", "HA45", "HA51", # New HAS "HAXX", "HAXXX", ] From e7cd80eba0ef8f11c62506509b5a7d60c7a37ce7 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 18 Mar 2024 12:34:28 +0000 Subject: [PATCH 149/248] Added HA52 --- .../ha_15_32/ha_analysis_batch_3.py | 21 +++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index af9af514..056a4190 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -946,6 +946,17 @@ class DataLoader: else: return "ECO surveys" + @staticmethod + def correct_ha51_asset_list(asset_list): + # Correct this + asset_list["HouseNo"] = np.where( + asset_list["matching_address"].str.contains("61 wandle bank"), + asset_list["Block"].str.lower(), + asset_list["HouseNo"] + ) + + return asset_list + def load_asset_list(self, filepath, ha_name): workbook = openpyxl.load_workbook(filepath) asset_sheetname = self.get_asset_sheetname(workbook) @@ -2510,6 +2521,16 @@ class DataLoader: ) return survey_list + @staticmethod + def correct_ha51_survey_list(survey_list): + survey_list = survey_list.rename(columns={"NO ": "NO."}) + + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "Autum Close", "Autumn Close" + ) + + return survey_list + @staticmethod def levenstein_match(matching_string, df): match_to = df["matching_address"].tolist() From e6c9dd7074dfba12668b31651ec1a5d9eab6a27c Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 18 Mar 2024 12:55:37 +0000 Subject: [PATCH 150/248] Done HA52 --- .../ha_15_32/ha_analysis_batch_3.py | 37 +++++++++++++++++-- 1 file changed, 33 insertions(+), 4 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 056a4190..bdf15917 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -503,6 +503,10 @@ class DataLoader: "address": "Property Address Full", "postcode": "Property Postcode" }, + "HA52": { + "address": "Postal Address", + "postcode": "POSTCODE" + }, "HA54": { "address": "Postal Address", "postcode": "matching_postcode" @@ -523,7 +527,8 @@ class DataLoader: "HA63": 15, "HA107": 51, "HA48": 0, - "HA45": 0 + "HA45": 0, + "HA52": 5 } UNMATCHED_ECO3 = { @@ -548,7 +553,7 @@ class DataLoader: def create_asset_list_matching_address(self, ha_name, asset_list): - if ha_name in ["HA1", "HA6", "HA12", "HA16", "HA24", "HA30", "HA31", "HA45", "HA48", "HA49", "HA54"]: + if ha_name in ["HA1", "HA6", "HA12", "HA16", "HA24", "HA30", "HA31", "HA45", "HA48", "HA49", "HA52", "HA54"]: asset_list["matching_address"] = asset_list[ self.COLUMN_CONFIG[ha_name]["address"] ].astype(str).str.lower().str.strip() @@ -2531,6 +2536,25 @@ class DataLoader: return survey_list + @staticmethod + def correct_ha52_survey_list(survey_list): + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "Mardalle Avenue", "Mardale Avenue" + ) + + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "Ollerton Close, Grappenhall", "Ollerton Close" + ) + + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "Bradshaw Road, Grappenhall", "Bradshaw Lane" + ) + + # Drop a bunch of dupes + survey_list = survey_list.drop_duplicates(["NO.", "Street / Block Name", "Post Code"]) + + return survey_list + @staticmethod def levenstein_match(matching_string, df): match_to = df["matching_address"].tolist() @@ -3165,7 +3189,12 @@ class DataLoader: asset_list_starting_size = asset_list.shape[0] # Change the column name if it's ECO eligibility - asset_list = asset_list.rename(columns={"ECO eligibility": "ECO Eligibility"}) + asset_list = asset_list.rename( + columns={ + "ECO eligibility": "ECO Eligibility", + "ECO Eligibilty": "ECO Eligibility", + }, + ) # Remove surplus whitespace from the ECO Eligibility column asset_list["ECO Eligibility"] = asset_list["ECO Eligibility"].str.strip() # Push to lower case @@ -6286,7 +6315,7 @@ def app(): "HA27", "HA28", "HA30", "HA31", "HA32", "HA34", "HA35", "HA39", "HA41", "HA48", "HA49", "HA50", "HA54", "HA56", "HA63", "HA107", "HA117", "HA8", "HA11", "HA21", "HA37", "HA42", # Added as of March 18th - "HA44", "HA45", "HA51", + "HA44", "HA45", "HA51", "HA52", # New HAS "HAXX", "HAXXX", ] From 92193d773dbd72aca67da82870d3f7da5a4acfe7 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 18 Mar 2024 13:21:57 +0000 Subject: [PATCH 151/248] fix facts and figures bug for ha51 --- .../ha_15_32/ha_analysis_batch_3.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index bdf15917..e40bb98b 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -3305,11 +3305,18 @@ class DataLoader: ) else: # We have some examples, e.g. HA28, where we do not have the installed or cancelled column - survey_list["installation_status"] = np.where( - survey_list['INSTALL/ CANCELLATION DATE'].str.lower().str.contains("cancelled"), - "cancelled", - "installed", - ) + if 'INSTALL/ CANCELLATION DATE' in survey_list.columns: + survey_list["installation_status"] = np.where( + survey_list['INSTALL/ CANCELLATION DATE'].str.lower().str.contains("cancelled"), + "cancelled", + "installed", + ) + else: + survey_list["installation_status"] = np.where( + survey_list['INSTALL / CANCELLATION DATE'].str.lower().str.contains("cancelled"), + "cancelled", + "installed", + ) # Finally, for other cases, we set the status to "in progress" survey_list["installation_status"] = survey_list["installation_status"].fillna("in progress") @@ -5800,6 +5807,8 @@ def fml_data_pull(loader): "HA50", "HA24", "HA15", "HA32", "HA28", "HA6", "HA1", "HA107", "HA41", "HA48", "HA2", "HA63", "HA12", "HA117", "HA35", "HA34", "HA56", "HA19", "HA18", "HA9", "HA27", "HA30", "HA31", "HA54", "HA49", 'HA8', 'HA11', 'HA21', 'HA37', 'HA42', + # NEW - add property type + 'HA44', 'HA45', 'HA51', 'HA52' ] # Can't pull from EPC database because it's based in Scotland From 443aa585d0c3c35ae34718f0e8338ec48ba7ad3c Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 18 Mar 2024 15:40:52 +0000 Subject: [PATCH 152/248] Adding ha5 --- .../ha_15_32/ha_analysis_batch_3.py | 181 +++++++++++++++++- 1 file changed, 171 insertions(+), 10 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index e40bb98b..009064c6 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -336,6 +336,16 @@ PROPERTY_TYPE_LOOKUP = { 'Cluster': None, 'Scheme Room': None }, + "HA45": { + 'Large block of flats': 'Flat', + 'Small block of flats/dwelling converted in to flats': 'Flat', + 'Semi-detached house': 'House', + 'Mid-terraced house': 'House', + 'End-terraced house': 'House', + 'Block of flats': 'Flat', + 'Detached house': 'House', + 'Flat in mixed use building': 'Flat', + }, "HA48": { "House": "House", "Flat": "Flat", @@ -364,6 +374,30 @@ PROPERTY_TYPE_LOOKUP = { 'Flat?': 'Flat', 'Bungalow ': 'Bungalow' }, + "HA51": { + 'FLAT': 'Flat', + 'HOUSE': 'House', + 'MAISONETTE': 'Maisonette', + 'BEDSIT': None, # Considering as a non-specific residential category here + 'BUNGALOW': 'Bungalow', + }, + "HA52": { + 'House - Mid Terrace': 'House', + 'Flat - First Floor': 'Flat', + 'Flat - Ground Floor': 'Flat', + 'House - Semi-Detached': 'House', + 'House - End Terrace': 'House', + 'Flat - Second Floor': 'Flat', + 'Bedsit': None, # Considering as a non-specific residential category here + 'Bungalow - Semi-Detached': 'Bungalow', + 'Bungalow - Mid Terrace': 'Bungalow', + 'Bungalow - End Terrace': 'Bungalow', + 'House - Detached': 'House', + 'Flat - Third Floor': 'Flat', + 'House attached to flats': 'House', + 'Flat - Fourth Floor': 'Flat', + 'Bungalow - Detached': 'Bungalow' + }, "HA56": { 'House Non Specific': 'House', 'HOUSE TERRACED': 'House', @@ -463,6 +497,10 @@ class DataLoader: "address": "Address", "postcode": "Address - Postcode" }, + "HA5": { + "address": "Address", + "postcode": "matching_postcode" + }, "HA6": { "address": "propertyaddress", "postcode": "address" # The 'address' column actually contains postcode @@ -553,7 +591,9 @@ class DataLoader: def create_asset_list_matching_address(self, ha_name, asset_list): - if ha_name in ["HA1", "HA6", "HA12", "HA16", "HA24", "HA30", "HA31", "HA45", "HA48", "HA49", "HA52", "HA54"]: + if ha_name in [ + "HA1", "HA5", "HA6", "HA12", "HA16", "HA24", "HA30", "HA31", "HA45", "HA48", "HA49", "HA52", "HA54" + ]: asset_list["matching_address"] = asset_list[ self.COLUMN_CONFIG[ha_name]["address"] ].astype(str).str.lower().str.strip() @@ -750,6 +790,10 @@ class DataLoader: asset_list["matching_address"] = asset_list["Address1"].astype(str).str.lower().str.strip() + ", " + \ asset_list["POSTCODE"].astype(str).str.lower().str.strip() asset_list["matching_postcode"] = asset_list["POSTCODE"].astype(str).str.lower().str.strip() + elif ha_name == "HA70": + asset_list["matching_address"] = asset_list["Address1"].astype(str).str.lower().str.strip() + ", " + \ + asset_list["POSTCODE"].astype(str).str.lower().str.strip() + asset_list["matching_postcode"] = asset_list["POSTCODE"].astype(str).str.lower().str.strip() elif ha_name == "HA107": # Create matching_address by concatenating House No, Street, Town, District, Postcode asset_list["matching_address"] = asset_list["House No"].astype(str).str.lower().str.strip() + ", " + \ @@ -962,9 +1006,100 @@ class DataLoader: return asset_list + def prepare_ha17(self, workbook): + blocks_sheet = workbook["Blocks List - Cavity Wall only"] + blocks_data = [] + blocks_colnames = [cell.value for cell in blocks_sheet[2]] + for row in blocks_sheet.iter_rows(min_row=4, values_only=False): + row_data = [cell.value for cell in row] # This will get you the cell values + blocks_data.append(row_data) + + blocks_df = pd.DataFrame(blocks_data, columns=blocks_colnames) + + blocks_df["matching_address"] = ( + blocks_df["Block Name\n[as per Naming Convention procedure]"].astype(str).str.lower().str.strip() + ", " + + blocks_df["Block Street Name"].astype(str).str.lower().str.strip() + ", " + + blocks_df["Postcode"].astype(str).str.lower().str.strip() + ) + blocks_df["matching_postcode"] = blocks_df["Postcode"].astype(str).str.lower().str.strip() + blocks_df["property_type"] = "Flat" + + street_properties_sheet = workbook["Street Properties - Cavity Wall"] + street_properties_data = [] + street_properties_colnames = [cell.value for cell in street_properties_sheet[2]] + for row in street_properties_sheet.iter_rows(min_row=3, values_only=False): + row_data = [cell.value for cell in row] # This will get you the cell values + street_properties_data.append(row_data) + + street_properties_df = pd.DataFrame(street_properties_data, columns=street_properties_colnames) + + street_properties_df["matching_address"] = ( + street_properties_df["Block Name\n[as per Naming Convention procedure]"].astype( + str).str.lower().str.strip() + ", " + + street_properties_df["Postcode"].astype(str).str.lower().str.strip() + ) + street_properties_df["matching_postcode"] = street_properties_df["Postcode"].astype(str).str.lower().str.strip() + street_properties_df["property_type"] = street_properties_df[ + "Block typology based on dwelling type\n[defined list]" + ] + + asset_list_compressed = pd.concat( + [ + blocks_df[["matching_address", "matching_postcode", "property_type", "ECO Eligibility"]], + street_properties_df[["matching_address", "matching_postcode", "property_type", "ECO Eligibility"]] + ], + axis=0 + ) + # We expand + range_pattern = r"(\d+)\s+to\s+(\d+)\s+(.*)" + asset_list = [] + for _, row in tqdm(asset_list_compressed.iterrows(), total=len(asset_list_compressed)): + if row["ECO Eligibility"] == "Not Eligible": + asset_list.append(row.to_dict()) + continue + + # Detect a house number range + match = re.search(range_pattern, row["matching_address"]) + + if not match: + asset_list.append(row.to_dict()) + continue + + # Extracting the start and end of the range + start_number = int(match.group(1)) + end_number = int(match.group(2)) + rest_of_address = match.group(3) + + # Generating the list of house numbers + house_numbers = list(range(start_number, end_number + 1)) + data_to_extend = [] + for house_number in house_numbers: + new_adress = f"{house_number} {rest_of_address}" + + entry = row.to_dict().copy() + entry.update({"matching_address": new_adress}) + + data_to_extend.append(entry) + + asset_list.extend(data_to_extend) + + asset_list = pd.DataFrame(asset_list) + + # Add in asset_list_row_id + asset_list["asset_list_row_id"] = ["HA17" + str(i) for i in range(0, len(asset_list))] + + # Add on house number + asset_list = self.create_asset_list_house_no(ha_name="HA17", asset_list=asset_list) + + return asset_list + def load_asset_list(self, filepath, ha_name): workbook = openpyxl.load_workbook(filepath) - asset_sheetname = self.get_asset_sheetname(workbook) + if ha_name == "HA17": + asset_list = self.prepare_ha17(workbook) + return asset_list, pd.DataFrame(), pd.DataFrame(), pd.DataFrame() + else: + asset_sheetname = self.get_asset_sheetname(workbook) asset_sheet = workbook[asset_sheetname] asset_sheet_colnames = [cell.value for cell in asset_sheet[1]] @@ -977,6 +1112,9 @@ class DataLoader: if ha_name == "HA54": asset_sheet_colnames[10] = "matching_postcode" + if ha_name == "HA5": + asset_sheet_colnames[2] = "matching_postcode" + rows_data = [] for row in asset_sheet.iter_rows(min_row=2, values_only=False): @@ -2555,6 +2693,10 @@ class DataLoader: return survey_list + @staticmethod + def correct_ha5_survey_list(survey_list): + return survey_list + @staticmethod def levenstein_match(matching_string, df): match_to = df["matching_address"].tolist() @@ -3431,6 +3573,9 @@ class DataLoader: def get_property_type_and_built_form(property_meta, ha_name): + if ha_name in ["HA44"]: + return None, None + if ha_name == "HA1": property_type = property_meta["Asset Type"] # We correct a small error @@ -3499,6 +3644,8 @@ def get_property_type_and_built_form(property_meta, ha_name): config = PROPERTY_TYPE_LOOKUP[ha_name][property_meta["Type"]] property_type = config.get("property-type") built_form = config.get("built-form") + elif ha_name == "HA17": + return property_meta["property_type"], None elif ha_name == "HA18": property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Asset Type"].strip()) built_form = None @@ -3580,6 +3727,9 @@ def get_property_type_and_built_form(property_meta, ha_name): elif ha_name == "HA42": property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Dwelling use/type"].strip()) built_form = None + elif ha_name == "HA45": + property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Property type"].strip()) + built_form = None elif ha_name == "HA48": property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Property Type"].strip()) built_form = None @@ -3589,6 +3739,14 @@ def get_property_type_and_built_form(property_meta, ha_name): elif ha_name == "HA50": property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Property Type"].strip()) built_form = None + elif ha_name == "HA51": + property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Asset Type"].strip()) + built_form = None + elif ha_name == "HA52": + if property_meta["Property Type"] is None: + return None, None + property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Property Type"].strip()) + built_form = None elif ha_name == "HA54": property_type = property_meta["Property Type"] built_form = None @@ -5806,9 +5964,9 @@ def fml_data_pull(loader): "HA7", "HA14", "HA25", "HA39", "HA16", "HA28", "HA13", "HA50", "HA24", "HA15", "HA32", "HA28", "HA6", "HA1", "HA107", "HA41", "HA48", "HA2", "HA63", "HA12", "HA117", "HA35", "HA34", "HA56", "HA19", "HA18", "HA9", "HA27", "HA30", "HA31", "HA54", "HA49", - 'HA8', 'HA11', 'HA21', 'HA37', 'HA42', + 'HA8', 'HA11', 'HA21', 'HA37', 'HA42', 'HA44', 'HA45', 'HA51', 'HA52', # NEW - add property type - 'HA44', 'HA45', 'HA51', 'HA52' + "HA17" ] # Can't pull from EPC database because it's based in Scotland @@ -5905,7 +6063,7 @@ def fml_analysis(loader): "HA7", "HA14", "HA25", "HA39", "HA16", "HA28", "HA13", "HA50", "HA24", "HA15", "HA32", "HA28", "HA6", "HA1", "HA107", "HA41", "HA48", "HA2", "HA63", "HA12", "HA117", "HA35", "HA34", "HA56", "HA19", "HA18", "HA9", "HA27", "HA30", "HA31", "HA54", "HA49", - 'HA8', 'HA11', 'HA21', 'HA37', 'HA42', + 'HA8', 'HA11', 'HA21', 'HA37', 'HA42', 'HA44', 'HA45', 'HA51', 'HA52' ] no_ciga_cavity_descriptions = [ @@ -6320,11 +6478,11 @@ def app(): # Add in: priority_has = [ - "HA1", "HA2", "HA6", "HA7", "HA9", "HA12", "HA13", "HA14", "HA15", "HA16", "HA18", "HA19", "HA24", "HA25", - "HA27", "HA28", "HA30", "HA31", "HA32", "HA34", "HA35", "HA39", "HA41", "HA48", "HA49", "HA50", "HA54", "HA56", - "HA63", "HA107", "HA117", "HA8", "HA11", "HA21", "HA37", "HA42", + "HA1", "HA2", "HA5", "HA6", "HA7", "HA9", "HA12", "HA13", "HA14", "HA15", "HA16", "HA18", "HA19", "HA24", + "HA25", "HA27", "HA28", "HA30", "HA31", "HA32", "HA34", "HA35", "HA39", "HA41", "HA48", "HA49", "HA50", "HA54", + "HA56", "HA63", "HA107", "HA117", "HA8", "HA11", "HA21", "HA37", "HA42", # Added as of March 18th - "HA44", "HA45", "HA51", "HA52", + "HA44", "HA45", "HA51", "HA52", "HA17", # New HAS "HAXX", "HAXXX", ] @@ -6332,7 +6490,10 @@ def app(): # back on this], 28 [DONE], 41 [DONE], 50 [DONE], 48 [DONE], 2 [DONE], 63 [DONE], 12 [DONE], 117 [DONE], 13 [DONE], # 35 [DONE], 56 [DONE], 19 [DONE], 18 [DONE], 9 [DONE], 27 [DONE], 34 [DONE], 30 [DONE], 31 [DONE], 54 [DONE] # - # Consider for ECO4: HA 70 - have to merge ECO3 list though, HA17 has LOTs of assets, but the asset list is a mess + # Consider for ECO4: + # HA 70 - have to merge ECO3 list though, + # HA17 has LOTs of assets, but the asset list is a mess + # HA53 but has EPCs done # Consider for GBIS: # Ignore for now: # 38 [problematic, but no ECO4], 10 problematic (no eligibility), 20 has barely any in From 6ccfff0411ee2af58d6f7dc47b98f2deb70eac5c Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 18 Mar 2024 16:14:11 +0000 Subject: [PATCH 153/248] Added ha20 --- .../ha_15_32/ha_analysis_batch_3.py | 50 +++++++++++++++++-- 1 file changed, 46 insertions(+), 4 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 009064c6..627fcede 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -566,7 +566,8 @@ class DataLoader: "HA107": 51, "HA48": 0, "HA45": 0, - "HA52": 5 + "HA52": 5, + "HA20": 6 } UNMATCHED_ECO3 = { @@ -669,6 +670,17 @@ class DataLoader: asset_list["Postcode"].astype(str).str.lower().str.strip() ) asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip() + elif ha_name == "HA20": + asset_list["matching_address"] = ( + asset_list["House Name"].astype(str).str.lower().str.strip() + ", " + + asset_list["Block"].astype(str).str.lower().str.strip() + ", " + + asset_list["Address Line 1"].astype(str).str.lower().str.strip() + ", " + + asset_list["Address Line 2"].astype(str).str.lower().str.strip() + ", " + + asset_list["Address Line 3"].astype(str).str.lower().str.strip() + ", " + + asset_list["Address Line 4"].astype(str).str.lower().str.strip() + ", " + + asset_list["Postcode"].astype(str).str.lower().str.strip() + ) + asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip() elif ha_name == "HA21": asset_list["matching_address"] = ( asset_list["Address"].astype(str).str.lower().str.strip() + ", " + @@ -2697,6 +2709,35 @@ class DataLoader: def correct_ha5_survey_list(survey_list): return survey_list + @staticmethod + def correct_ha20_survey_list(survey_list): + # Not in the asset list + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "Abbot Close", "ABBOTS CLOSE" + ) + + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "Downbarns Road", "DOWN BARNS ROAD" + ) + + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "Austin Lane", "AUSTINS LANE" + ) + + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "South Park Way", "SOUTHPARK WAY" + ) + + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "OAKLAND ROAD", "OAKWOOD ROAD" + ) + + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "ACRE WAY/NORTHWOOD", "ACRE WAY" + ) + + return survey_list + @staticmethod def levenstein_match(matching_string, df): match_to = df["matching_address"].tolist() @@ -3301,7 +3342,8 @@ class DataLoader: "AFF0RDALE WARMTH": "ECO4", "ECO 4 RdSAP CL": "ECO4", "Affordable Warmth (R) ": "ECO4", - "Affordable Warmth ": "ECO4" + "Affordable Warmth ": "ECO4", + "ECO 4 AFFORDABLE WARMTH": "ECO4", } # Since it seems like "subject to archetype check" has some failure conditions, for simplicity, we @@ -6478,11 +6520,11 @@ def app(): # Add in: priority_has = [ - "HA1", "HA2", "HA5", "HA6", "HA7", "HA9", "HA12", "HA13", "HA14", "HA15", "HA16", "HA18", "HA19", "HA24", + "HA1", "HA2", "HA6", "HA7", "HA9", "HA12", "HA13", "HA14", "HA15", "HA16", "HA18", "HA19", "HA24", "HA25", "HA27", "HA28", "HA30", "HA31", "HA32", "HA34", "HA35", "HA39", "HA41", "HA48", "HA49", "HA50", "HA54", "HA56", "HA63", "HA107", "HA117", "HA8", "HA11", "HA21", "HA37", "HA42", # Added as of March 18th - "HA44", "HA45", "HA51", "HA52", "HA17", + "HA44", "HA45", "HA51", "HA52", "HA17", "HA5", "HA20", # New HAS "HAXX", "HAXXX", ] From 3dd30445f92635df45b5da2a756650ca116f3855 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 18 Mar 2024 19:37:11 +0000 Subject: [PATCH 154/248] HA Analysis finalised --- .../ha_15_32/ha_analysis_batch_3.py | 257 +++++++++++++++--- 1 file changed, 225 insertions(+), 32 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 627fcede..2f17ed73 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -51,6 +51,12 @@ PROPERTY_TYPE_LOOKUP = { 'MAISONETTE': "Maisonette", 'HOSTEL': None }, + "HA5": { + "House": "House", + "Flat": "Flat", + "Bungalow": "Bungalow", + "Bedsit": None + }, "HA6": { "property_type": { 'HOUSE': "House", @@ -161,6 +167,21 @@ PROPERTY_TYPE_LOOKUP = { "Hostel": None, "Block": None, }, + "HA20": { + "House": "House", + "Flat": "Flat", + 'Sheltered Flat': "Flat", + 'Maisonette': 'Maisonette', + 'Bungalow': 'Bungalow', + 'House. SD': 'House', + 'House. MT': 'House', + 'House. ET': 'House', + 'Sheltered Bungalow': 'Bungalow', + 'Guest Accomodation': None, + 'Sheltered House': 'House', + 'House. MT ': 'House', + 'House. D': 'House' + }, "HA24": { '01 HOUSE': 'House', '02 FLAT': 'Flat', @@ -3632,6 +3653,9 @@ def get_property_type_and_built_form(property_meta, ha_name): elif ha_name == "HA2": property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Dwelling Type"].strip()) built_form = None + elif ha_name == "HA5": + property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Asset Type"].strip()) + built_form = None elif ha_name == "HA6": property_type = PROPERTY_TYPE_LOOKUP[ha_name]["property_type"][property_meta["Dwelling type"]] built_form = property_meta["built_form"] @@ -3694,6 +3718,9 @@ def get_property_type_and_built_form(property_meta, ha_name): elif ha_name == "HA19": property_type = property_meta["Dwelling Type"] built_form = None + elif ha_name == "HA20": + property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Asset Type"].strip()) + built_form = None elif ha_name == "HA21": property_description = property_meta["Property Type"].strip().lower() if "house" in property_description: @@ -5775,6 +5802,7 @@ def forecast_remaining_sales(loader): results.append(to_append) results = pd.DataFrame(results) + results.to_csv("pipeline_remaining_raw.csv") totals_row = {} for col in results.columns: @@ -6006,9 +6034,7 @@ def fml_data_pull(loader): "HA7", "HA14", "HA25", "HA39", "HA16", "HA28", "HA13", "HA50", "HA24", "HA15", "HA32", "HA28", "HA6", "HA1", "HA107", "HA41", "HA48", "HA2", "HA63", "HA12", "HA117", "HA35", "HA34", "HA56", "HA19", "HA18", "HA9", "HA27", "HA30", "HA31", "HA54", "HA49", - 'HA8', 'HA11', 'HA21', 'HA37', 'HA42', 'HA44', 'HA45', 'HA51', 'HA52', - # NEW - add property type - "HA17" + 'HA8', 'HA11', 'HA21', 'HA37', 'HA42', 'HA44', 'HA45', 'HA51', 'HA52', "HA17", "HA5", "HA20", ] # Can't pull from EPC database because it's based in Scotland @@ -6105,7 +6131,7 @@ def fml_analysis(loader): "HA7", "HA14", "HA25", "HA39", "HA16", "HA28", "HA13", "HA50", "HA24", "HA15", "HA32", "HA28", "HA6", "HA1", "HA107", "HA41", "HA48", "HA2", "HA63", "HA12", "HA117", "HA35", "HA34", "HA56", "HA19", "HA18", "HA9", "HA27", "HA30", "HA31", "HA54", "HA49", - 'HA8', 'HA11', 'HA21', 'HA37', 'HA42', 'HA44', 'HA45', 'HA51', 'HA52' + 'HA8', 'HA11', 'HA21', 'HA37', 'HA42', 'HA44', 'HA45', 'HA51', 'HA52', "HA17", "HA5", "HA20", ] no_ciga_cavity_descriptions = [ @@ -6124,22 +6150,6 @@ def fml_analysis(loader): # TODO: There will be some properties that are subject to CIGA that do not look like they ned a CIGA check! pass # them! Non-invasices will have checked the wall though - codes = [ - "HA39", "HA14", "HA24", "HA15", "HA32", "HA28", "HA6", "HA1", "HA7", - "HA16", "HA107", "HA25", "HA50", "HA41", "HA48", "HA2", "HA63", "HA12", - "HA117", "HA13", "HA35", "HA34", "HA56", "HA19", "HA18", "HA9", "HA27", - "HA30", "HA31", "HA54", "HAXX", "HA49", "HAXXX", - ] - - values = [ - 706, 2161, 1053, 793, 0, 656, 1200, 1647, 4248, 2703, 1087, 1876, 2135, - 1078, 775, 538, 518, 401, 466, 2627, 98, 1050, 524, 191, 538, 384, 204, - 281, 422, 74, 313, 71, 6 - ] - - # Create a dictionary mapping - remaining_eligible_mapping = dict(zip(codes, values)) - results = [] wall_descriptions = [] for ha_name in tqdm(has_bruh): @@ -6397,9 +6407,13 @@ def fml_analysis(loader): without_survey_without_ciga_expected = 0 else: # We apply the same conversion rate as the properties with a survey - without_survey_without_ciga_expected = np.round( - without_survey_needing_ciga.shape[0] * (ciga_check_expectation / ciga_check_needed.shape[0]) - ) + + if ciga_check_needed.shape[0] == 0 and ciga_check_expectation == 0: + without_survey_without_ciga_expected = without_survey_needing_ciga.shape[0] + else: + without_survey_without_ciga_expected = np.round( + without_survey_needing_ciga.shape[0] * (ciga_check_expectation / ciga_check_needed.shape[0]) + ) without_survey_passed_ciga = fuck_this[ (fuck_this["estimated"] == True) & @@ -6466,15 +6480,6 @@ def fml_analysis(loader): without_survey_identified_as_gbis_qualified ) - surveys = loader.data[ha_name]["survey_list"] - sold_now = 0 - if not surveys.empty: - sold_now = surveys[ - surveys["installation_status"].str.lower().str.contains("eco4") - ].shape[0] - - sales_since_nov = sold_now - original_figures["No. of Tech surveys complete - Eco 4"].values[0] - results.append( { "HA Name": ha_name, @@ -6498,6 +6503,194 @@ def fml_analysis(loader): # TODO: Change the left hand side number for our post CIGA estimates +def create_final_report(): + """ + This function will produce the final output for the HA analysis + :return: + """ + epc_validated_results = pd.read_csv("analysis - revised.csv") + pipeline_results = pd.read_csv("pipeline_remaining_raw.csv") + + #################################### + # Original Warmfront estimates + #################################### + # Create the volumes result + all_ha_summary_remaining = pipeline_results[ + [ + "('', '', '', 'HA Name')", + "('ECO4 original', '', 'Remaining - #', '')", + "('GBIS original', '', 'Remaining - #', '')", + ] + ].copy().rename( + columns={ + "('', '', '', 'HA Name')": "HA Name", + "('ECO4 original', '', 'Remaining - #', '')": "# ECO4 remaining - All HA Summary", + "('GBIS original', '', 'Remaining - #', '')": "# GBIS remaining - All HA Summary", + } + ) + all_ha_summary_remaining["# Total remaining - All HA Summary"] = ( + all_ha_summary_remaining["# ECO4 remaining - All HA Summary"] + + all_ha_summary_remaining["# GBIS remaining - All HA Summary"] + ) + all_ha_summary_remaining = all_ha_summary_remaining.sort_values("HA Name") + + #################################### + # Postcode list - pre-CIGA + #################################### + postcode_list_pre_ciga_remaining = pipeline_results[ + [ + "('', '', '', 'HA Name')", + "('ECO4 pre-ciga', '', 'Remaining - #', '')", + "('GBIS Postcode list', 'Warmfront post code list', 'Remaining - #', 'GBIS total')", + ] + ].copy().rename( + columns={ + "('', '', '', 'HA Name')": "HA Name", + "('ECO4 pre-ciga', '', 'Remaining - #', '')": "# ECO4 remaining - Postcode list (pre CIGA)", + "('GBIS Postcode list', 'Warmfront post code list', 'Remaining - #', 'GBIS total')": ( + "# GBIS remaining - Postcode list (pre CIGA)" + ), + } + ) + + postcode_list_pre_ciga_remaining["# Total remaining - Postcode list (pre CIGA)"] = ( + postcode_list_pre_ciga_remaining["# ECO4 remaining - Postcode list (pre CIGA)"] + + postcode_list_pre_ciga_remaining["# GBIS remaining - Postcode list (pre CIGA)"] + ) + postcode_list_pre_ciga_remaining = postcode_list_pre_ciga_remaining.sort_values("HA Name") + + #################################### + # Postcode list - post-CIGA + #################################### + postcode_list_post_ciga_remaining = pipeline_results[ + [ + "('', '', '', 'HA Name')", + "('ECO4 post-ciga', '', 'Estimated remaining eligible - #', '')", + "('GBIS Postcode list', 'Warmfront post code list', 'Remaining - #', 'GBIS total')", + ] + ].copy().rename( + columns={ + "('', '', '', 'HA Name')": "HA Name", + "('ECO4 post-ciga', '', 'Estimated remaining eligible - #', '')": + "# ECO4 remaining - Postcode list (post CIGA)", + "('GBIS Postcode list', 'Warmfront post code list', 'Remaining - #', 'GBIS total')": ( + "# GBIS remaining - Postcode list (post CIGA)" + ), + } + ) + + postcode_list_post_ciga_remaining["# Total remaining - Postcode list (post CIGA)"] = ( + postcode_list_post_ciga_remaining["# ECO4 remaining - Postcode list (post CIGA)"] + + postcode_list_post_ciga_remaining["# GBIS remaining - Postcode list (post CIGA)"] + ) + postcode_list_post_ciga_remaining = postcode_list_post_ciga_remaining.sort_values("HA Name") + + #################################### + # From EPC Database + #################################### + from_epc_database = epc_validated_results[ + [ + "HA Name", + "EPC verified ECO4 Eligible - Remaining", + "EPC verified GBIS Eligibile - Remaining" + ] + ].copy().rename( + columns={ + "EPC verified ECO4 Eligible - Remaining": "# ECO4 remaining - From EPC Database (post CIGA)", + "EPC verified GBIS Eligibile - Remaining": "# GBIS remaining - From EPC Database (post CIGA)", + } + ) + + from_epc_database["# Total remaining - From EPC Database (post CIGA)"] = ( + from_epc_database["# ECO4 remaining - From EPC Database (post CIGA)"] + + from_epc_database["# GBIS remaining - From EPC Database (post CIGA)"] + ) + from_epc_database = from_epc_database.sort_values("HA Name") + + # Combine the datasets + volumes = all_ha_summary_remaining.merge( + postcode_list_pre_ciga_remaining, how="left", on="HA Name" + ).merge( + postcode_list_post_ciga_remaining, how="left", on="HA Name" + ).merge( + from_epc_database, how="inner", on="HA Name" + ) + + revenue = volumes.copy() + # Convert the ECO4 volumes to revenue + for col in [ + '# ECO4 remaining - All HA Summary', + '# ECO4 remaining - Postcode list (pre CIGA)', + '# ECO4 remaining - Postcode list (post CIGA)', + '# ECO4 remaining - From EPC Database (post CIGA)' + ]: + revenue[col] = revenue[col] * 1710 + + # Convert the GBIS volumes to revenue + for col in [ + '# GBIS remaining - All HA Summary', + '# GBIS remaining - Postcode list (pre CIGA)', + '# GBIS remaining - Postcode list (post CIGA)', + '# GBIS remaining - From EPC Database (post CIGA)' + ]: + revenue[col] = revenue[col] * 600 + + # Re-calculate the totals + revenue['# Total remaining - All HA Summary'] = ( + revenue['# ECO4 remaining - All HA Summary'] + revenue['# GBIS remaining - All HA Summary'] + ) + + revenue['# Total remaining - Postcode list (pre CIGA)'] = ( + revenue['# ECO4 remaining - Postcode list (pre CIGA)'] + revenue['# GBIS remaining - Postcode list (pre CIGA)'] + ) + + revenue['# Total remaining - Postcode list (post CIGA)'] = ( + revenue['# ECO4 remaining - Postcode list (post CIGA)'] + revenue[ + '# GBIS remaining - Postcode list (post CIGA)'] + ) + + revenue['# Total remaining - From EPC Database (post CIGA)'] = ( + revenue['# ECO4 remaining - From EPC Database (post CIGA)'] + + revenue['# GBIS remaining - From EPC Database (post CIGA)'] + ) + + # Replace the # with £ in the columns + revnue_colnames = [col.replace("#", "£") for col in revenue.columns] + revenue.columns = revnue_colnames + + # We check that each column gets smaller + decreasing_check1 = all( + volumes["# ECO4 remaining - Postcode list (pre CIGA)"] >= volumes[ + '# ECO4 remaining - Postcode list (post CIGA)'] + ) + if not decreasing_check1: + raise ValueError("decreasing_check1 failed") + + # Just HA32 and HA17 should fail this, and it's due to GBIS jobs looking like ECO4 + decreasing_check2 = volumes[volumes["# ECO4 remaining - From EPC Database (post CIGA)"] > volumes[ + "# ECO4 remaining - Postcode list (post CIGA)"]] + + if set(decreasing_check2["HA Name"].tolist()) != {"HA17", "HA32"}: + raise ValueError("decreasing_check2 failed") + + # Check for GBIS + decreasing_check3 = all( + volumes["# GBIS remaining - Postcode list (pre CIGA)"] >= volumes[ + '# GBIS remaining - Postcode list (post CIGA)'] + ) + + if not decreasing_check3: + raise ValueError("decreasing_check3 failed") + + # Don't perform this - this happens for multiple + # decreasing_check4 = volumes[volumes["# GBIS remaining - From EPC Database (post CIGA)"] > volumes[ + # "# GBIS remaining - Postcode list (post CIGA)"]] + + # Store final outputs + volumes.to_csv("HA Analysis Final - volumes.csv") + revenue.to_csv("HA Analysis Final - revenue.csv") + + def app(): """ This app contains the housin association analysis for HAs 1, 6, 14, 39 and 107. From 724379a86d1bd9b79159f2f8f9e5d8abe9496f5f Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 26 Mar 2024 18:05:08 +0000 Subject: [PATCH 155/248] wrapping up ha analysis --- .../ha_15_32/ha_analysis_batch_3.py | 170 ++++++++++-------- 1 file changed, 94 insertions(+), 76 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 2f17ed73..e414cd00 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -5366,6 +5366,7 @@ def forecast_remaining_sales(loader): results = [] for ha_name, input_data in loader.data.items(): + # Original warmfront figures - ECO4 original_warmfront_estimates = december_figures[december_figures["HA Name"] == ha_name] if original_warmfront_estimates.empty: @@ -6032,7 +6033,7 @@ def forecast_remaining_sales(loader): def fml_data_pull(loader): has_bruh = [ "HA7", "HA14", "HA25", "HA39", "HA16", "HA28", "HA13", - "HA50", "HA24", "HA15", "HA32", "HA28", "HA6", "HA1", "HA107", "HA41", "HA48", "HA2", "HA63", "HA12", + "HA50", "HA24", "HA15", "HA32", "HA6", "HA1", "HA107", "HA41", "HA48", "HA2", "HA63", "HA12", "HA117", "HA35", "HA34", "HA56", "HA19", "HA18", "HA9", "HA27", "HA30", "HA31", "HA54", "HA49", 'HA8', 'HA11', 'HA21', 'HA37', 'HA42', 'HA44', 'HA45', 'HA51', 'HA52', "HA17", "HA5", "HA20", ] @@ -6129,7 +6130,7 @@ def fml_analysis(loader): assumed_ciga_pass_rate = 0.731 has_bruh = [ "HA7", "HA14", "HA25", "HA39", "HA16", "HA28", "HA13", - "HA50", "HA24", "HA15", "HA32", "HA28", "HA6", "HA1", "HA107", "HA41", "HA48", "HA2", "HA63", "HA12", + "HA50", "HA24", "HA15", "HA32", "HA6", "HA1", "HA107", "HA41", "HA48", "HA2", "HA63", "HA12", "HA117", "HA35", "HA34", "HA56", "HA19", "HA18", "HA9", "HA27", "HA30", "HA31", "HA54", "HA49", 'HA8', 'HA11', 'HA21', 'HA37', 'HA42', 'HA44', 'HA45', 'HA51', 'HA52', "HA17", "HA5", "HA20", ] @@ -6738,89 +6739,106 @@ def app(): loader = DataLoader(directories, december_figures_filepath, use_cache, rebuild_inputs) loader.load() loader.ha_facts_and_figures() - forecast_remaining_sales(loader) - conversion_rate = 0.95 - archetype_check_conversion = 0.7 - res = [] - for k, v in loader.data.items(): - asset_list = v["asset_list"].copy() - agg = asset_list["ECO Eligibility"].value_counts() - # We find a case where there are properties that have passed CIGA - if not any("passed" in x for x in agg.index): + # gbis rate + # breakdowns = [] + # for ha, data_assets in loader.data.items(): + # asset_list = data_assets["asset_list"].copy() + # breakdown = asset_list["ECO Eligibility"].value_counts().to_dict() + # breakdowns.append(breakdown) + # breakdowns = pd.DataFrame(breakdowns) + # + # installer = [] + # for ha, data_assets in loader.data.items(): + # survey_list = data_assets["survey_list"] + # if survey_list.empty: + # continue + # if "INSTALLER" not in survey_list.columns: + # continue + # + # installers = survey_list["INSTALLER"].value_counts().to_dict() + # installers["ha_name"] = ha + # installer.append(installers) + # installer = pd.DataFrame(installer) + # installer.drop(columns=["ha_name"]).sum().sum() + + # Adhoc - for HA16, get the properties that still need a CIGA check + asset_list_ha16 = loader.data["HA16"]["asset_list"].copy() + ha_16_need_ciga = asset_list_ha16[ + asset_list_ha16["ECO Eligibility"].str.contains("subject to ciga") + ] + completed_cigas = loader.data["HA16"]["ciga_list"].copy() + # Store the results + ha_16_need_ciga.to_csv("ha16_need_ciga.csv") + completed_cigas.to_csv("ha16_completed_cigas.csv") + + # Adhoc - look at the current pipeline and identify how many dormant, CIGA dependent properties there are for + # live projects + + # Read excel + orderbook_filepath = "local_data/ha_data/Warmfront HA client order book overview_20240129.xlsx" + orderbook_workbook = openpyxl.load_workbook(orderbook_filepath) + orderbook_sheet = orderbook_workbook["Contractual Info"] + orderbook_colnames = [cell.value for cell in orderbook_sheet[1]] + + rows = [] + for row in orderbook_sheet.iter_rows(min_row=2, values_only=False): + row_data = [cell.value for cell in row] # This will get you the cell values + rows.append(row_data) + + orderbook = pd.DataFrame(rows, columns=orderbook_colnames) + live_orderbook = orderbook[orderbook["Live, New, or Historic?"] == "LIVE"].copy() + live_orderbook['Redacted HA'] = live_orderbook['Redacted HA'].str.replace(" ", "") + + dormant_properties = [] + missed_has = [] + for _, customer in live_orderbook.iterrows(): + if customer['Redacted HA'] not in loader.data.keys(): + missed_has.append(customer['Redacted HA']) continue + asset_list = loader.data[customer['Redacted HA']]["asset_list"].copy() + survey_list = loader.data[customer['Redacted HA']]["survey_list"].copy() + # Remove sold + if not survey_list.empty: + survey_list = survey_list[~pd.isnull(survey_list["asset_list_row_id"])] + asset_list = asset_list.merge( + survey_list[["asset_list_row_id", "installation_status"]], + how="left", + on="asset_list_row_id" + ) + # Anything that has an installation has gone to installation, and therefore is not remaining + asset_list = asset_list[pd.isnull(asset_list["installation_status"])] + asset_list = asset_list.drop(columns=["installation_status"]) - agg = pd.DataFrame(agg).reset_index() - - passed_ciga = agg[agg["ECO Eligibility"] == "eco4 - passed ciga"] - passed_ciga = passed_ciga["count"].values[0] if not passed_ciga.empty else 0 - - failed_ciga = agg[agg["ECO Eligibility"] == "failed ciga"] - failed_ciga = failed_ciga["count"].values[0] if not failed_ciga.empty else 0 - - ciga_pass_rate = passed_ciga / (passed_ciga + failed_ciga) if (passed_ciga + failed_ciga) > 0 else 1 - - dormant_ciga = agg[ - agg["ECO Eligibility"].str.contains("subject to ciga") & - ~agg["ECO Eligibility"].str.contains("subject to archetype") + # We pull out the properties that need a CIGA check + need_ciga = asset_list[asset_list["ECO Eligibility"] == "eco4 (subject to ciga)"] + need_archetype = asset_list[asset_list["ECO Eligibility"] == "eco4 (subject to archetype)"] + need_ciga_and_archetype = asset_list[ + asset_list["ECO Eligibility"] == "eco4 (subject to ciga) (subject to archetype)" ] - dormant_ciga = dormant_ciga['count'].values[0] if not dormant_ciga.empty else 0 - - dormant_ciga_archetype = agg[ - agg["ECO Eligibility"].str.contains("subject to ciga") & - agg["ECO Eligibility"].str.contains("subject to archetype") - ] - - dormant_ciga_archetype = dormant_ciga_archetype['count'].values[0] if not dormant_ciga_archetype.empty else 0 - - needing_check = dormant_ciga + dormant_ciga_archetype * archetype_check_conversion - needing_check = np.round(needing_check) - - additional_jobs = (dormant_ciga * ciga_pass_rate * conversion_rate) + ( - dormant_ciga_archetype * archetype_check_conversion * ciga_pass_rate * conversion_rate - ) - additional_jobs = np.round(additional_jobs) - - # We attempt to estimate the uplift and how much of that is attributed to surplus subject to ciga jobs - original_estimate = loader.december_figures[ - loader.december_figures["HA Name"] == k - ] - - original_estimate = original_estimate["ECO4"].values[0] if not original_estimate.empty else 0 - base_eco_figures = agg[ - agg["ECO Eligibility"].isin(["eco4", "eco4 - passed ciga"]) - ]["count"].sum() - eco4_from_ciga = original_estimate - base_eco_figures - eco4_from_ciga = eco4_from_ciga if eco4_from_ciga > 0 else 0 - surplus_from_dormant = additional_jobs - eco4_from_ciga - surplus_from_dormant = 0 if surplus_from_dormant < 0 else surplus_from_dormant - - res.append( + dormant_properties.append( { - "ha_name": k, - "additional_eco4": additional_jobs, - "needing_check": needing_check, - "surplus_from_dormant": surplus_from_dormant + "HA Name": customer['Redacted HA'], + "Need CIGA": need_ciga.shape[0], + "Need Archetype": need_archetype.shape[0], + "Need CIGA and Archetype": need_ciga_and_archetype.shape[0] } ) - res = pd.DataFrame(res) - # Drop the HAs that are not in that pervious draft - # In the v2 draft, there are 12 HAs + dormant_properties = pd.DataFrame(dormant_properties) + totals = dormant_properties.sum() + totals["HA Name"] = "Total" - v5_surplus = res[ - ~res["ha_name"].isin(["HA9"]) - ]["additional_eco4"].sum() - # 7212 properties - # This is not a perfect difference though, because of the variations in how the numbers are recorded in the November - # all HAs sheet. E.g for HA 107, there were 1239 properties identified. In the postcode list, there are 1255, - # however 531 are still needing a CIGA check. Therefore their original figures, in this case, included properties - # pre-CIGA + dormant_properties = pd.concat([dormant_properties, totals.to_frame().T]) + dormant_properties.to_csv("dormant_properties.csv") - v5_surplus_from_dormant = res[ - ~res["ha_name"].isin(["HA9"]) - ]["surplus_from_dormant"].sum() - # 5539.0 - # 9471690 + loader.december_figures["ECO4 remaining"].sum() + december_figures = loader.december_figures.copy() + december_figures["ECO4 remaining"] = np.where( + december_figures["ECO4 remaining"] < 0, + 0, + december_figures["ECO4 remaining"] + ) + december_figures["ECO4 remaining"].sum() From ebb28236617abff1e3a5f91dd6b06b66a001a4d7 Mon Sep 17 00:00:00 2001 From: Michael Duong Date: Wed, 27 Mar 2024 11:39:51 +0000 Subject: [PATCH 156/248] override scenerio data to have average insulation thickness, change impact values --- etl/epc/generate_scenarios_data.py | 48 +++++++++++++++++++++++++----- 1 file changed, 41 insertions(+), 7 deletions(-) diff --git a/etl/epc/generate_scenarios_data.py b/etl/epc/generate_scenarios_data.py index d5bece8b..f9f66034 100644 --- a/etl/epc/generate_scenarios_data.py +++ b/etl/epc/generate_scenarios_data.py @@ -54,9 +54,19 @@ scenario_properties = [ "postcode": "NN1 5JY", "lmk-key": "1459796789102016070507274146560098", "measures": [ - [["internal_wall_insulation"], "11", None, [0]], - [["external_wall_insulation"], "10", None, [0]], - [["solar", "windows"], "12-15", {"photo_supply_ending": 50}, [0, 1]], + [ + ["internal_wall_insulation"], + "11", + {"walls_insulation_thickness_ending": "average"}, + [0], + ], + [ + ["external_wall_insulation"], + "10", + {"walls_insulation_thickness_ending": "average"}, + [0], + ], + [["solar", "windows"], "15", {"photo_supply_ending": 50}, [0, 1]], ], }, { @@ -64,7 +74,12 @@ scenario_properties = [ "postcode": "HP1 2HA", "lmk-key": "c14029235739827d5f627dc8aa9bb567d026b267e851e0db0001db24638667b1", "measures": [ - [["cavity_wall_insulation", "loft_insulation"], "15", None, [0, 1]], + [ + ["cavity_wall_insulation", "loft_insulation"], + "15", + {"walls_insulation_thickness_ending": "average"}, + [0, 1], + ], ], }, { @@ -72,7 +87,12 @@ scenario_properties = [ "postcode": "HP1 2HE", "lmk-key": "99296a6dda21314fef3a61cda59e441e9a2aacf115eb96f4a0fa85696bf7b117", "measures": [ - [["cavity_wall_insulation", "loft_insulation"], "15", None, [0, 1]], + [ + ["cavity_wall_insulation", "loft_insulation"], + "15", + {"walls_insulation_thickness_ending": "average"}, + [0, 1], + ], ], }, { @@ -80,7 +100,12 @@ scenario_properties = [ "postcode": "HP1 2AN", "lmk-key": "d1e0534be3a44c33003323b21d0e322e3daddc65b5ee71936f89c59ddab96b50", "measures": [ - [["cavity_wall_insulation", "loft_insulation"], "15", None, [0, 1]], + [ + ["cavity_wall_insulation", "loft_insulation"], + "15", + {"walls_insulation_thickness_ending": "average"}, + [0, 1], + ], ], }, { @@ -88,11 +113,17 @@ scenario_properties = [ "postcode": "HP1 2HX", "lmk-key": "1eae354db522a95188018d9cd0502ed8c609910b6c88f8797d3a25f59b11770a", "measures": [ - [["cavity_wall_insulation", "loft_insulation"], "15", None, [0, 1]], + [ + ["cavity_wall_insulation", "loft_insulation"], + "15", + {"walls_insulation_thickness_ending": "average"}, + [0, 1], + ], ], }, ] + recommendations_scoring_data = [] for scenario_property in scenario_properties: @@ -217,6 +248,9 @@ for scenario_property in scenario_properties: recommendations_scoring_data.extend(scoring_list) recommendations_scoring_data = pd.DataFrame(recommendations_scoring_data) +recommendations_scoring_data["impact"] = recommendations_scoring_data["impact"].astype( + int +) recommendations_scoring_data = recommendations_scoring_data.drop( columns=[ "rdsap_change", From dbeba4db43645ee999eb49f40c0359457ae0f703 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 27 Mar 2024 18:12:57 +0000 Subject: [PATCH 157/248] set up first basic asset list for gla demo --- etl/customers/gla_croydon_demo/asset_list.py | 145 ++++++++++++++++++ .../ha_15_32/ha_analysis_batch_3.py | 109 ++++++++++--- 2 files changed, 232 insertions(+), 22 deletions(-) create mode 100644 etl/customers/gla_croydon_demo/asset_list.py diff --git a/etl/customers/gla_croydon_demo/asset_list.py b/etl/customers/gla_croydon_demo/asset_list.py new file mode 100644 index 00000000..526c34a0 --- /dev/null +++ b/etl/customers/gla_croydon_demo/asset_list.py @@ -0,0 +1,145 @@ +import pandas as pd +from utils.s3 import save_csv_to_s3 + +USER_ID = 8 +PORTFOLIO_ID = 67 + + +def app(): + """ + We shall define a small portfolio of properties, based in Croydon + :return: + """ + + # Firstly, read in the EPC data for Croydon + epc_data = pd.read_csv( + "local_data/all-domestic-certificates/domestic-E09000008-Croydon/certificates.csv", + low_memory=False + ) + + # Filter on entries where we have a UPRN + epc_data = epc_data[~pd.isnull(epc_data["UPRN"])] + + # Get the newest EPC for each UPRN. We use LODGEMENT_DATE as a proxy for this + epc_data["LODGEMENT_DATE"] = pd.to_datetime(epc_data["LODGEMENT_DATE"]) + + epc_data = epc_data.sort_values("LODGEMENT_DATE", ascending=False).drop_duplicates("UPRN") + + # Now filter on social properties + epc_data = epc_data[epc_data["TENURE"].isin(["rental (social)", "Rented (social)"])] + # There are 17337 properties with a registered EPC in Croydon + # Take below EPC C properties + epc_data = epc_data[epc_data["CURRENT_ENERGY_EFFICIENCY"].astype(int) < 69] + # 7994 properties are below EPC C (46%) + + # 79% D, 19% E, 1% F, 0.2% G - it probably makes the most sense to focus on E and D properties + epc_data["CURRENT_ENERGY_RATING"].value_counts(normalize=True) + + # For the purpose of the sample, take the properties have surveys done in the last 2 years + # This gives us 1023 remaining properties + two_years_ago = pd.Timestamp.now() - pd.DateOffset(days=int(2.5 * 365)) + epc_data = epc_data[epc_data["LODGEMENT_DATE"] >= two_years_ago] + + # Archetype 1: defined below: + # 1) House + # 2) Unfilled cavity + # 3) A roof that could be insulated (flat or pitched with no more than 50mm insulation) + # 4) EPC E + # Different buckets of properties + archetype_1_sample = epc_data[ + epc_data["PROPERTY_TYPE"].isin(["House"]) & + (epc_data["CURRENT_ENERGY_RATING"] == "E") & + epc_data["WALLS_DESCRIPTION"].isin(["Cavity wall, as built, no insulation (assumed)"]) & + epc_data["ROOF_DESCRIPTION"].isin( + [ + "Pitched, 12 mm loft insulation", + "Pitched, 0 mm loft insulation", + "Pitched, no insulation", + "Pitched, 50 mm loft insulation", + "Flat, no insulation (assumed)", + "Pitched, no insulation (assumed)" + ] + ) + ] + archetype_1_sample_asset_list = archetype_1_sample[["UPRN", "ADDRESS1", "POSTCODE"]].copy() + archetype_1_sample_asset_list["ARCHETYPE"] = "Archetype 1" + + # Archetype 2: defined below: + # 1) Flat + # 2) Unfilled cavity + # 3) Another property above + # 4) EPC E + archetype_2_sample = epc_data[ + epc_data["PROPERTY_TYPE"].isin(["Flat"]) & + (epc_data["CURRENT_ENERGY_RATING"] == "E") & + epc_data["WALLS_DESCRIPTION"].isin(["Cavity wall, as built, no insulation (assumed)"]) & + epc_data["ROOF_DESCRIPTION"].isin( + [ + "(another dwelling above)" + ] + ) + ] + archetype_2_sample_asset_list = archetype_2_sample[["UPRN", "ADDRESS1", "POSTCODE"]].copy() + archetype_2_sample_asset_list["ARCHETYPE"] = "Archetype 2" + + # Archetype 3: defined below: + # 1) EPC F + # 2) Solid brick wall + # 3) House + # 4) Pitched roof with no insulation + # Just 1 property (more expensive to retrofit) + archetype_3_sample = epc_data[ + epc_data["PROPERTY_TYPE"].isin(["House"]) & + (epc_data["CURRENT_ENERGY_RATING"] == "F") & + epc_data["ROOF_DESCRIPTION"].isin(["Pitched, no insulation"]) + ] + archetype_3_sample_asset_list = archetype_3_sample[["UPRN", "ADDRESS1", "POSTCODE"]].copy() + archetype_3_sample_asset_list["ARCHETYPE"] = "Archetype 3" + + # Archetype 4: defined below: + # 1) Maisonette + # 2) Empty cavity + # 3) EPC E + # 14 properties here + archetype_4_sample = epc_data[ + epc_data["PROPERTY_TYPE"].isin(["Maisonette"]) & + epc_data["WALLS_DESCRIPTION"].isin(["Cavity wall, as built, no insulation (assumed)"]) + ] + archetype_4_sample_asset_list = archetype_4_sample[["UPRN", "ADDRESS1", "POSTCODE"]].copy() + archetype_4_sample_asset_list["ARCHETYPE"] = "Archetype 4" + + asset_list = pd.concat( + [ + archetype_1_sample_asset_list, + archetype_2_sample_asset_list, + archetype_3_sample_asset_list, + archetype_4_sample_asset_list + ] + ) + + asset_list = asset_list.rename( + columns={ + "UPRN": "uprn", + "ADDRESS1": "address", + "POSTCODE": "postcode", + "ARCHETYPE": "archetype" + } + ) + + filename = f"{USER_ID}/{PORTFOLIO_ID}/inputs.csv" + save_csv_to_s3( + dataframe=asset_list, + bucket_name="retrofit-plan-inputs-dev", + file_name=filename + ) + + body = { + "portfolio_id": str(PORTFOLIO_ID), + "housing_type": "Social", + "goal": "Increase EPC", + "goal_value": "C", + "trigger_file_path": filename, + "budget": None, + "exclusions": ["floor_insulation"] + } + print(body) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index e414cd00..b4b82d0b 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -6692,6 +6692,92 @@ def create_final_report(): revenue.to_csv("HA Analysis Final - revenue.csv") +def identify_eco_works(loader): + # ha_names = [ + # "HA16", # For Housing + # "HA39", # Rooftop + # "HA41", # Settle + # "HA23", # Lambeth + # "HA14", # EMH + # "HA7", # Believe + # "HA102", # Thrive + # ] + + # Unitas, fairhive, acis, LHP + ha_names = [ + "HA50", # Unitas + "HA15", # Fairhive + "HA107", # ACIS + "HA24", # LHP + ] + names = { + "HA50": "Unitas", + "HA15": "Fairhive", + "HA107": "ACIS", + "HA24": "LHP" + } + + # gbis rate + breakdowns = [] + # lists = {} + for ha, data_assets in loader.data.items(): + if ha not in ha_names: + continue + + asset_list = data_assets["asset_list"].copy() + survey_list = data_assets["survey_list"].copy() + # Remove things that have sold + if not survey_list.empty: + asset_list = asset_list.merge( + survey_list[["asset_list_row_id", "installation_status"]], + how="left", + on="asset_list_row_id" + ) + # Anything that has an installation has gone to installation, and therefore is not remaining + asset_list = asset_list[pd.isnull(asset_list["installation_status"])] + asset_list = asset_list.drop(columns=["installation_status"]) + + # Needing a CIGA check + needs_cga = asset_list[ + asset_list["ECO Eligibility"] == "eco4 (subject to ciga)" + ].copy() + + eco4 = asset_list[ + asset_list["ECO Eligibility"] == "eco4" + ].copy() + + eco4_passed_ciga = asset_list[ + asset_list["ECO Eligibility"] == "eco4 - passed ciga" + ].copy() + + # lists[ha] = { + # "needs_cga": needs_cga, + # "eco4": eco4, + # "eco4_passed_ciga": eco4_passed_ciga + # } + + # Store the data + if not needs_cga.empty: + needs_cga.to_csv(f"local_data/{names[ha]} - needs ciga.csv") + + if not eco4.empty: + eco4.to_csv(f"local_data/{names[ha]} - eco4.csv") + + if not eco4_passed_ciga.empty: + eco4_passed_ciga.to_csv(f"local_data/{names[ha]} - eco4 passed ciga.csv") + + summary = { + "HA Name": ha, + "n_needing_ciga": needs_cga.shape[0], + "eco4": eco4.shape[0], + "eco4_passed_ciga": eco4_passed_ciga.shape[0] + } + + breakdowns.append(summary) + breakdowns = pd.DataFrame(breakdowns) + breakdowns = breakdowns.fillna(0) + + def app(): """ This app contains the housin association analysis for HAs 1, 6, 14, 39 and 107. @@ -6739,29 +6825,8 @@ def app(): loader = DataLoader(directories, december_figures_filepath, use_cache, rebuild_inputs) loader.load() loader.ha_facts_and_figures() - forecast_remaining_sales(loader) - # gbis rate - # breakdowns = [] - # for ha, data_assets in loader.data.items(): - # asset_list = data_assets["asset_list"].copy() - # breakdown = asset_list["ECO Eligibility"].value_counts().to_dict() - # breakdowns.append(breakdown) - # breakdowns = pd.DataFrame(breakdowns) - # - # installer = [] - # for ha, data_assets in loader.data.items(): - # survey_list = data_assets["survey_list"] - # if survey_list.empty: - # continue - # if "INSTALLER" not in survey_list.columns: - # continue - # - # installers = survey_list["INSTALLER"].value_counts().to_dict() - # installers["ha_name"] = ha - # installer.append(installers) - # installer = pd.DataFrame(installer) - # installer.drop(columns=["ha_name"]).sum().sum() + forecast_remaining_sales(loader) # Adhoc - for HA16, get the properties that still need a CIGA check asset_list_ha16 = loader.data["HA16"]["asset_list"].copy() From d34a4d4d963d349877d63a44753549186247a64d Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 28 Mar 2024 14:32:29 +0000 Subject: [PATCH 158/248] allowing passage of uprn to Searcher in api --- .idea/Model.iml | 2 +- .idea/misc.xml | 2 +- backend/app/plan/router.py | 4 ++++ backend/app/plan/schemas.py | 2 ++ 4 files changed, 8 insertions(+), 2 deletions(-) diff --git a/.idea/Model.iml b/.idea/Model.iml index b0f9c00d..4413bb06 100644 --- a/.idea/Model.iml +++ b/.idea/Model.iml @@ -7,7 +7,7 @@ - + diff --git a/.idea/misc.xml b/.idea/misc.xml index 1122b380..6f308057 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -3,7 +3,7 @@ - + diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index 0b98cf2c..5456cdb6 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -91,10 +91,14 @@ async def trigger_plan(body: PlanTriggerRequest): input_properties = [] for config in tqdm(plan_input): # We validate each record in the file. If the record is NOT valid, we need to handle this accordingly + uprn = config.get("uprn", None) + if uprn: + uprn = int(float(uprn)) epc_searcher = SearchEpc( address1=config["address"], postcode=config["postcode"], + uprn=uprn, auth_token=get_settings().EPC_AUTH_TOKEN, os_api_key=get_settings().ORDNANCE_SURVEY_API_KEY ) diff --git a/backend/app/plan/schemas.py b/backend/app/plan/schemas.py index 9801375f..1e95fb2f 100644 --- a/backend/app/plan/schemas.py +++ b/backend/app/plan/schemas.py @@ -8,3 +8,5 @@ class PlanTriggerRequest(BaseModel): goal_value: str portfolio_id: int trigger_file_path: str + # optional exclusions list + exclusions: list[str] | None = None From 91eb9c68f1600970541606fdae3869d19ee724cb Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 28 Mar 2024 14:49:19 +0000 Subject: [PATCH 159/248] Adding validation to PlanTriggerRequest --- backend/app/plan/schemas.py | 47 +++++++++++++-- recommendations/Recommendations.py | 94 +++++++++++++++++------------- 2 files changed, 95 insertions(+), 46 deletions(-) diff --git a/backend/app/plan/schemas.py b/backend/app/plan/schemas.py index 1e95fb2f..c13e754e 100644 --- a/backend/app/plan/schemas.py +++ b/backend/app/plan/schemas.py @@ -1,12 +1,51 @@ -from pydantic import BaseModel +from pydantic import BaseModel, conlist, validator +from typing import Optional class PlanTriggerRequest(BaseModel): - budget: float | None = None + budget: Optional[float] = None goal: str housing_type: str goal_value: str portfolio_id: int trigger_file_path: str - # optional exclusions list - exclusions: list[str] | None = None + exclusions: Optional[conlist(str, min_items=1)] = None + + # Pre-defined list of possibilities for exclusions + _allowed_exclusions = { + "wall_insulation", + "ventilation", + "roof_insulation", + "floor_insulation", + "windows", + "fireplace", + "heating", + "hot_water", + "lighting", + "solar_pv" + } + + _allowed_goals = {"Increase EPC"} + + _allowed_housing_types = {"Social", "Private"} + + # Validator to ensure exclusions are within the pre-defined possibilities + @validator('exclusions', each_item=True) + def check_exclusions(self, v): + if v not in self._allowed_exclusions: + raise ValueError(f"{v} is not an allowed exclusion") + return v + + # Validator to ensure that the goal is within the pre-defined possibilities + @validator('goal') + def check_goal(self, v): + if v not in self._allowed_goals: + raise ValueError(f"{v} is not a valid goal") + return v + + # Validator to ensure that the housing type is within the pre-defined possibilities + @validator('housing_type') + def check_housing_type(self, v): + if v not in self.allowed_housing_types: + raise ValueError(f"{v} is not a valid housing type") + return v diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py index 9f838e1c..d3436ef0 100644 --- a/recommendations/Recommendations.py +++ b/recommendations/Recommendations.py @@ -22,7 +22,8 @@ class Recommendations: def __init__( self, property_instance: Property, - materials: List + materials: List, + exclusions: List[str] = None, ): """ :param property_instance: Instance of the Property class, for the home associated to property_id @@ -31,6 +32,7 @@ class Recommendations: self.property_instance = property_instance self.materials = materials + self.exclusions = exclusions if exclusions else [] self.floor_recommender = FloorRecommendations(property_instance=property_instance, materials=materials) self.wall_recomender = WallRecommendations(property_instance=property_instance, materials=materials) @@ -58,67 +60,75 @@ class Recommendations: property_recommendations = [] phase = 0 - print("WALL RECOMMENDATIONS HAVE BEEN COMMENTED OUT TEMPORARILY - ADD ME BACK IN") - if portfolio_id != 66: - # Building Fabric + # Building Fabric + if "wall_insulation" not in self.exclusions: self.wall_recomender.recommend(phase=phase) if self.wall_recomender.recommendations: property_recommendations.append(self.wall_recomender.recommendations) phase += 1 - # Ventilation recommendations - # We only produce a ventilation recommendation if the property is recommended to have wall or roof - # insulation - # We will not attribute a SAP impact to the ventilation recommendation, since we've seen that this has no - # real impact on the SAP score. Therefore, we don't need to include phasing for ventilation. If we have any - # wall or roof recommendations, we will ensure that ventilation is included in the simulation + # Ventilation recommendations + # We only produce a ventilation recommendation if the property is recommended to have wall or roof + # insulation + # We will not attribute a SAP impact to the ventilation recommendation, since we've seen that this has no + # real impact on the SAP score. Therefore, we don't need to include phasing for ventilation. If we have any + # wall or roof recommendations, we will ensure that ventilation is included in the simulation + if "ventilation" not in self.exclusions: if self.wall_recomender.recommendations or self.roof_recommender.recommendations: self.ventilation_recomender.recommend() if self.ventilation_recomender.recommendation: property_recommendations.append(self.ventilation_recomender.recommendation) - self.roof_recommender.recommend(phase=phase) - if self.roof_recommender.recommendations: - property_recommendations.append(self.roof_recommender.recommendations) - phase += 1 + if "roof_insulation" not in self.exclusions: + self.roof_recommender.recommend(phase=phase) + if self.roof_recommender.recommendations: + property_recommendations.append(self.roof_recommender.recommendations) + phase += 1 - self.floor_recommender.recommend(phase=phase) - if self.floor_recommender.recommendations: - property_recommendations.append(self.floor_recommender.recommendations) - phase += 1 + if "floor_insulation" not in self.exclusions: + self.floor_recommender.recommend(phase=phase) + if self.floor_recommender.recommendations: + property_recommendations.append(self.floor_recommender.recommendations) + phase += 1 - self.windows_recommender.recommend(phase=phase) - if self.windows_recommender.recommendation: - property_recommendations.append(self.windows_recommender.recommendation) - phase += 1 + if "windows" not in self.exclusions: + self.windows_recommender.recommend(phase=phase) + if self.windows_recommender.recommendation: + property_recommendations.append(self.windows_recommender.recommendation) + phase += 1 - self.fireplace_recommender.recommend(phase=phase) - if self.fireplace_recommender.recommendation: - property_recommendations.append(self.fireplace_recommender.recommendation) - phase += 1 + if "fireplace" not in self.exclusions: + self.fireplace_recommender.recommend(phase=phase) + if self.fireplace_recommender.recommendation: + property_recommendations.append(self.fireplace_recommender.recommendation) + phase += 1 # Heating and Electical systems - self.heating_recommender.recommend(phase=phase) - if self.heating_recommender.recommendations: - property_recommendations.append(self.heating_recommender.recommendations) - phase += 1 + if "heating" not in self.exclusions: + self.heating_recommender.recommend(phase=phase) + if self.heating_recommender.recommendations: + property_recommendations.append(self.heating_recommender.recommendations) + phase += 1 # Hot water - self.hotwater_recommender.recommend(phase=phase) - if self.hotwater_recommender.recommendations: - property_recommendations.append(self.hotwater_recommender.recommendations) - phase += 1 + if "hot_water" not in self.exclusions: + self.hotwater_recommender.recommend(phase=phase) + if self.hotwater_recommender.recommendations: + property_recommendations.append(self.hotwater_recommender.recommendations) + phase += 1 - self.lighting_recommender.recommend(phase=phase) - if self.lighting_recommender.recommendation: - property_recommendations.append(self.lighting_recommender.recommendation) - phase += 1 + if "lighting" not in self.exclusions: + self.lighting_recommender.recommend(phase=phase) + if self.lighting_recommender.recommendation: + property_recommendations.append(self.lighting_recommender.recommendation) + phase += 1 # Renewables - self.solar_recommender.recommend(phase=phase) - if self.solar_recommender.recommendation: - property_recommendations.append(self.solar_recommender.recommendation) - phase += 1 + if "solar_pv" not in self.exclusions: + self.solar_recommender.recommend(phase=phase) + if self.solar_recommender.recommendation: + property_recommendations.append(self.solar_recommender.recommendation) + phase += 1 # We insert temporary ids into the recommendations which is important for the optimiser later property_recommendations = self.insert_temp_recommendation_id(property_recommendations) From 22a3e21f523b79da4ec65fa12d8d901242c5cfb6 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 28 Mar 2024 14:52:24 +0000 Subject: [PATCH 160/248] update validation of PlanTriggerRequest to use cls rather than self --- backend/app/plan/router.py | 4 +--- backend/app/plan/schemas.py | 12 ++++++------ recommendations/Recommendations.py | 2 +- 3 files changed, 8 insertions(+), 10 deletions(-) diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index 5456cdb6..e25c04a5 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -170,9 +170,7 @@ async def trigger_plan(body: PlanTriggerRequest): p.get_components(cleaned, photo_supply_lookup, floor_area_decile_thresholds) recommender = Recommendations(property_instance=p, materials=materials) - # TODO: portfolio id as an input is temp - print("DELETE PORTFOLIO ID AS AN INPUT!!") - property_recommendations, property_representative_recommendations = recommender.recommend(body.portfolio_id) + property_recommendations, property_representative_recommendations = recommender.recommend() if not property_recommendations: continue diff --git a/backend/app/plan/schemas.py b/backend/app/plan/schemas.py index c13e754e..b8a99704 100644 --- a/backend/app/plan/schemas.py +++ b/backend/app/plan/schemas.py @@ -31,21 +31,21 @@ class PlanTriggerRequest(BaseModel): # Validator to ensure exclusions are within the pre-defined possibilities @validator('exclusions', each_item=True) - def check_exclusions(self, v): - if v not in self._allowed_exclusions: + def check_exclusions(cls, v): + if v not in cls._allowed_exclusions: raise ValueError(f"{v} is not an allowed exclusion") return v # Validator to ensure that the goal is within the pre-defined possibilities @validator('goal') - def check_goal(self, v): - if v not in self._allowed_goals: + def check_goal(cls, v): + if v not in cls._allowed_goals: raise ValueError(f"{v} is not a valid goal") return v # Validator to ensure that the housing type is within the pre-defined possibilities @validator('housing_type') - def check_housing_type(self, v): - if v not in self.allowed_housing_types: + def check_housing_type(cls, v): + if v not in cls._allowed_housing_types: raise ValueError(f"{v} is not a valid housing type") return v diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py index d3436ef0..b2e6d991 100644 --- a/recommendations/Recommendations.py +++ b/recommendations/Recommendations.py @@ -47,7 +47,7 @@ class Recommendations: self.heating_recommender = HeatingRecommender(property_instance=property_instance) self.hotwater_recommender = HotwaterRecommendations(property_instance=property_instance) - def recommend(self, portfolio_id): + def recommend(self): """ This method runs the recommendations for the individual measures and then appends them to a list for output From 8dbd69eef9140efdb3feab6933f195c762a2ba8c Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 28 Mar 2024 15:54:31 +0000 Subject: [PATCH 161/248] Updating router for chunked scoring --- backend/Property.py | 2 +- backend/app/plan/router.py | 36 ++++++++++++++++++++++++++---------- 2 files changed, 27 insertions(+), 11 deletions(-) diff --git a/backend/Property.py b/backend/Property.py index f86e33dc..d97ce8cf 100644 --- a/backend/Property.py +++ b/backend/Property.py @@ -233,7 +233,7 @@ class Property: output["walls_thermal_transmittance_ending"] = recommendation["new_u_value"] # Setting the insulation thickness here to above average should be tested further because we # don't see a high volume of instances for this - output["walls_insulation_thickness_ending"] = "above average" + output["walls_insulation_thickness_ending"] = "average" output["walls_energy_eff_ending"] = "Good" # Note: often when the wall is insulatied, the internal/external insulation is not noted so we should diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index e25c04a5..bcbc4332 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -41,6 +41,7 @@ from backend.ml_models.Valuation import PropertyValuation logger = setup_logger() BATCH_SIZE = 5 +SCORING_BATCH_SIZE = 400 def patch_epc(config, epc_records): @@ -164,7 +165,7 @@ async def trigger_plan(body: PlanTriggerRequest): recommendations = {} recommendations_scoring_data = [] representative_recommendations = {} - for p in input_properties: + for p in tqdm(input_properties): # Property recommendations p.get_components(cleaned, photo_supply_lookup, floor_area_decile_thresholds) @@ -196,15 +197,30 @@ async def trigger_plan(body: PlanTriggerRequest): model_api = ModelApi(portfolio_id=body.portfolio_id, timestamp=created_at) - all_predictions = model_api.predict_all( - df=recommendations_scoring_data, - bucket=get_settings().DATA_BUCKET, - prediction_buckets={ - "sap_change_predictions": get_settings().SAP_PREDICTIONS_BUCKET, - "heat_demand_predictions": get_settings().HEAT_PREDICTIONS_BUCKET, - "carbon_change_predictions": get_settings().CARBON_PREDICTIONS_BUCKET - } - ) + all_predictions = { + "sap_change_predictions": pd.DataFrame(), + "heat_demand_predictions": pd.DataFrame(), + "carbon_change_predictions": pd.DataFrame() + } + to_loop_over = range(0, recommendations_scoring_data.shape[0], SCORING_BATCH_SIZE) + for chunk in tqdm(to_loop_over, total=len(to_loop_over)): + predictions_dict = model_api.predict_all( + df=recommendations_scoring_data.iloc[chunk:chunk + SCORING_BATCH_SIZE], + bucket=get_settings().DATA_BUCKET, + prediction_buckets={ + "sap_change_predictions": get_settings().SAP_PREDICTIONS_BUCKET, + "heat_demand_predictions": get_settings().HEAT_PREDICTIONS_BUCKET, + "carbon_change_predictions": get_settings().CARBON_PREDICTIONS_BUCKET + } + ) + + # Append the predictions to the predictions dictionary + for key, scored in predictions_dict.items(): + all_predictions[key] = pd.concat([all_predictions[key], scored]) + + # TODO: TEMP + # all_predictions["heat_demand_predictions"] = all_predictions["sap_change_predictions"].copy() + # all_predictions["carbon_change_predictions"] = all_predictions["sap_change_predictions"].copy() # Insert the predictions into the recommendations and run the optimiser # TODO: If a recommendation has a negative impact on SAP, we should remove it - this seems to have become a From bd15ce65c2b05cdffe7304121d1fd8282fea55cb Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 28 Mar 2024 16:29:23 +0000 Subject: [PATCH 162/248] debugging optimisation with ventilation, when ventilation already exists --- backend/app/plan/router.py | 16 +++++++++------- recommendations/Recommendations.py | 18 ++++++++++-------- 2 files changed, 19 insertions(+), 15 deletions(-) diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index bcbc4332..a0d93190 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -170,7 +170,7 @@ async def trigger_plan(body: PlanTriggerRequest): # Property recommendations p.get_components(cleaned, photo_supply_lookup, floor_area_decile_thresholds) - recommender = Recommendations(property_instance=p, materials=materials) + recommender = Recommendations(property_instance=p, materials=materials, exclusions=body.exclusions) property_recommendations, property_representative_recommendations = recommender.recommend() if not property_recommendations: @@ -196,6 +196,7 @@ async def trigger_plan(body: PlanTriggerRequest): ) model_api = ModelApi(portfolio_id=body.portfolio_id, timestamp=created_at) + # model_api.MODEL_PREFIXES = ["sap_change_predictions"] all_predictions = { "sap_change_predictions": pd.DataFrame(), @@ -274,14 +275,15 @@ async def trigger_plan(body: PlanTriggerRequest): if any(x in [r["type"] for r in solution] for x in [ "internal_wall_insulation", "external_wall_insulation", "cavity_wall_insulation" ]): - ventilation_rec = [ - r for r in recommendations_with_impact if r[0]["type"] == "mechanical_ventilation" - ][0] - - selected_recommendations = set( - list(selected_recommendations) + [ventilation_rec[0]["recommendation_id"]] + ventilation_rec = next( + (r[0] for r in recommendations_with_impact if r[0]["type"] == "mechanical_ventilation"), + None ) + # If a matching recommendation was found, add its ID to the selected recommendations + if ventilation_rec: + selected_recommendations.add(ventilation_rec["recommendation_id"]) + # We check if the selected recommendation is wall ventilation and if so, we make sure # mechanical ventilation is selected diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py index b2e6d991..944fec7a 100644 --- a/recommendations/Recommendations.py +++ b/recommendations/Recommendations.py @@ -67,11 +67,19 @@ class Recommendations: property_recommendations.append(self.wall_recomender.recommendations) phase += 1 + if "roof_insulation" not in self.exclusions: + self.roof_recommender.recommend(phase=phase) + if self.roof_recommender.recommendations: + property_recommendations.append(self.roof_recommender.recommendations) + phase += 1 + # Ventilation recommendations # We only produce a ventilation recommendation if the property is recommended to have wall or roof # insulation - # We will not attribute a SAP impact to the ventilation recommendation, since we've seen that this has no - # real impact on the SAP score. Therefore, we don't need to include phasing for ventilation. If we have any + # We will not attribute a SAP impact to the ventilation recommendation, since we've seen that this + # has no + # real impact on the SAP score. Therefore, we don't need to include phasing for ventilation. If we + # have any # wall or roof recommendations, we will ensure that ventilation is included in the simulation if "ventilation" not in self.exclusions: if self.wall_recomender.recommendations or self.roof_recommender.recommendations: @@ -79,12 +87,6 @@ class Recommendations: if self.ventilation_recomender.recommendation: property_recommendations.append(self.ventilation_recomender.recommendation) - if "roof_insulation" not in self.exclusions: - self.roof_recommender.recommend(phase=phase) - if self.roof_recommender.recommendations: - property_recommendations.append(self.roof_recommender.recommendations) - phase += 1 - if "floor_insulation" not in self.exclusions: self.floor_recommender.recommend(phase=phase) if self.floor_recommender.recommendations: From 72a4feb6af3967dc6ce00bb4df7d7d47c4772dc1 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 28 Mar 2024 17:18:08 +0000 Subject: [PATCH 163/248] minor tweak to asset list to make uprn int --- etl/customers/gla_croydon_demo/asset_list.py | 8 ++++++-- etl/customers/gla_croydon_demo/slides.py | 0 2 files changed, 6 insertions(+), 2 deletions(-) create mode 100644 etl/customers/gla_croydon_demo/slides.py diff --git a/etl/customers/gla_croydon_demo/asset_list.py b/etl/customers/gla_croydon_demo/asset_list.py index 526c34a0..01220d0a 100644 --- a/etl/customers/gla_croydon_demo/asset_list.py +++ b/etl/customers/gla_croydon_demo/asset_list.py @@ -36,7 +36,7 @@ def app(): epc_data["CURRENT_ENERGY_RATING"].value_counts(normalize=True) # For the purpose of the sample, take the properties have surveys done in the last 2 years - # This gives us 1023 remaining properties + # This gives us 1167 remaining properties two_years_ago = pd.Timestamp.now() - pd.DateOffset(days=int(2.5 * 365)) epc_data = epc_data[epc_data["LODGEMENT_DATE"] >= two_years_ago] @@ -45,7 +45,7 @@ def app(): # 2) Unfilled cavity # 3) A roof that could be insulated (flat or pitched with no more than 50mm insulation) # 4) EPC E - # Different buckets of properties + # 12 properties archetype_1_sample = epc_data[ epc_data["PROPERTY_TYPE"].isin(["House"]) & (epc_data["CURRENT_ENERGY_RATING"] == "E") & @@ -69,6 +69,7 @@ def app(): # 2) Unfilled cavity # 3) Another property above # 4) EPC E + # 14 properties here archetype_2_sample = epc_data[ epc_data["PROPERTY_TYPE"].isin(["Flat"]) & (epc_data["CURRENT_ENERGY_RATING"] == "E") & @@ -108,6 +109,7 @@ def app(): archetype_4_sample_asset_list = archetype_4_sample[["UPRN", "ADDRESS1", "POSTCODE"]].copy() archetype_4_sample_asset_list["ARCHETYPE"] = "Archetype 4" + # 41 total properties asset_list = pd.concat( [ archetype_1_sample_asset_list, @@ -126,6 +128,8 @@ def app(): } ) + asset_list["uprn"] = asset_list["uprn"].astype(int) + filename = f"{USER_ID}/{PORTFOLIO_ID}/inputs.csv" save_csv_to_s3( dataframe=asset_list, diff --git a/etl/customers/gla_croydon_demo/slides.py b/etl/customers/gla_croydon_demo/slides.py new file mode 100644 index 00000000..e69de29b From 80fc7c821e0923918252edde9b90ab32a18cc765 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 28 Mar 2024 17:38:52 +0000 Subject: [PATCH 164/248] moed reading csv function --- backend/app/plan/router.py | 7 ++-- backend/app/utils.py | 21 ----------- etl/customers/gla_croydon_demo/slides.py | 44 ++++++++++++++++++++++++ utils/s3.py | 24 +++++++++++-- 4 files changed, 69 insertions(+), 27 deletions(-) diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index a0d93190..2067d796 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -24,7 +24,7 @@ from backend.app.db.models.portfolio import rating_lookup from backend.app.dependencies import validate_token from backend.app.plan.schemas import PlanTriggerRequest from backend.app.plan.utils import get_cleaned -from backend.app.utils import epc_to_sap_lower_bound, read_csv_from_s3, sap_to_epc +from backend.app.utils import epc_to_sap_lower_bound, sap_to_epc from backend.ml_models.api import ModelApi from backend.Property import Property @@ -35,7 +35,7 @@ from recommendations.optimiser.GainOptimiser import GainOptimiser from recommendations.optimiser.optimiser_functions import prepare_input_measures from recommendations.Recommendations import Recommendations from utils.logger import setup_logger -from utils.s3 import read_dataframe_from_s3_parquet +from utils.s3 import read_dataframe_from_s3_parquet, read_csv_from_s3 from backend.ml_models.Valuation import PropertyValuation logger = setup_logger() @@ -196,7 +196,7 @@ async def trigger_plan(body: PlanTriggerRequest): ) model_api = ModelApi(portfolio_id=body.portfolio_id, timestamp=created_at) - # model_api.MODEL_PREFIXES = ["sap_change_predictions"] + # model_api.MODEL_PREFIXES = ['sap_change_predictions', 'carbon_change_predictions'] all_predictions = { "sap_change_predictions": pd.DataFrame(), @@ -221,7 +221,6 @@ async def trigger_plan(body: PlanTriggerRequest): # TODO: TEMP # all_predictions["heat_demand_predictions"] = all_predictions["sap_change_predictions"].copy() - # all_predictions["carbon_change_predictions"] = all_predictions["sap_change_predictions"].copy() # Insert the predictions into the recommendations and run the optimiser # TODO: If a recommendation has a negative impact on SAP, we should remove it - this seems to have become a diff --git a/backend/app/utils.py b/backend/app/utils.py index ba5509e1..b3843206 100644 --- a/backend/app/utils.py +++ b/backend/app/utils.py @@ -1,6 +1,4 @@ import boto3 -import csv -from io import StringIO import string import secrets import logging @@ -41,25 +39,6 @@ def setup_logger(log_file=None, level=logging.INFO, overwrite_handler=False): return logger -def read_csv_from_s3(bucket_name, filepath): - s3 = boto3.client('s3') - - # Get the object from s3 - s3_object = s3.get_object(Bucket=bucket_name, Key=filepath) - - # Read the CSV body from the s3 object - body = s3_object['Body'].read() - - # Use StringIO to create a file-like object from the string - csv_data = StringIO(body.decode('utf-8')) - - # Use csv library to read it into a list of dictionaries - reader = csv.DictReader(csv_data) - data = list(reader) - - return data - - def generate_api_key(): # Define the characters that will be used to generate the api key characters = string.ascii_letters + string.digits diff --git a/etl/customers/gla_croydon_demo/slides.py b/etl/customers/gla_croydon_demo/slides.py index e69de29b..5954f604 100644 --- a/etl/customers/gla_croydon_demo/slides.py +++ b/etl/customers/gla_croydon_demo/slides.py @@ -0,0 +1,44 @@ +""" +This script contains the code to generate the data required to populate the slides +We connect to the database amd extract the data for the portfolio needed so it is recommended to use +a environment akin to the backend to run this script +""" +import pandas as pd +import numpy as np +from backend.app.db.connection import db_engine +from sqlalchemy.orm import sessionmaker +from utils.s3 import read_csv_from_s3 +from etl.customers.slide_utils import ( + plot_epc_distribution, + get_property_details_by_portfolio_id, + get_plan_by_portfolio_id, + get_properties_with_default_recommendations, + create_powerpoint, + create_recommendations_summary +) + +USER_ID = 8 +PORTFOLIO_ID_1 = 67 +EPC_TARGET_1 = "C" +SAP_TARGET_1 = 69 +CUSTOMER_KEY = "gla-demo" + + +def app(): + # Connect to database + session = sessionmaker(bind=db_engine)() + + ######################################################################## + # Get the data we need + ######################################################################## + + portfolio_id = PORTFOLIO_ID_1 + + # Get the asset list + asset_list = read_csv_from_s3( + "retrofit-plan-inputs-dev", f"{USER_ID}/{portfolio_id}/inputs.csv" + ) + + # Get the properties for the portfolio + properties = get_properties_with_default_recommendations(session, portfolio_id) + properties_df = pd.DataFrame(properties) diff --git a/utils/s3.py b/utils/s3.py index 8d36bdb3..fd5992ce 100644 --- a/utils/s3.py +++ b/utils/s3.py @@ -1,9 +1,10 @@ import pickle import boto3 -from io import BytesIO, StringIO -from botocore.exceptions import NoCredentialsError, PartialCredentialsError +import csv import pandas as pd +from io import BytesIO, StringIO from utils.logger import setup_logger +from botocore.exceptions import NoCredentialsError, PartialCredentialsError logger = setup_logger() @@ -224,3 +225,22 @@ def read_excel_from_s3(bucket_name, file_key, header_row): df.reset_index(drop=True, inplace=True) return df + + +def read_csv_from_s3(bucket_name, filepath): + s3 = boto3.client('s3') + + # Get the object from s3 + s3_object = s3.get_object(Bucket=bucket_name, Key=filepath) + + # Read the CSV body from the s3 object + body = s3_object['Body'].read() + + # Use StringIO to create a file-like object from the string + csv_data = StringIO(body.decode('utf-8')) + + # Use csv library to read it into a list of dictionaries + reader = csv.DictReader(csv_data) + data = list(reader) + + return data From 053218b3fd9ef7bec918baed43473f3d3485fa4e Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 2 Apr 2024 11:18:58 +0100 Subject: [PATCH 165/248] updated price cap figures --- backend/app/plan/router.py | 4 -- backend/ml_models/AnnualBillSavings.py | 10 ++--- etl/customers/gla_croydon_demo/asset_list.py | 40 +++++++++++------- etl/customers/gla_croydon_demo/slides.py | 43 ++++++++++++++++++++ 4 files changed, 73 insertions(+), 24 deletions(-) diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index 2067d796..50b8a837 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -196,7 +196,6 @@ async def trigger_plan(body: PlanTriggerRequest): ) model_api = ModelApi(portfolio_id=body.portfolio_id, timestamp=created_at) - # model_api.MODEL_PREFIXES = ['sap_change_predictions', 'carbon_change_predictions'] all_predictions = { "sap_change_predictions": pd.DataFrame(), @@ -219,9 +218,6 @@ async def trigger_plan(body: PlanTriggerRequest): for key, scored in predictions_dict.items(): all_predictions[key] = pd.concat([all_predictions[key], scored]) - # TODO: TEMP - # all_predictions["heat_demand_predictions"] = all_predictions["sap_change_predictions"].copy() - # Insert the predictions into the recommendations and run the optimiser # TODO: If a recommendation has a negative impact on SAP, we should remove it - this seems to have become a # possibility with heating system diff --git a/backend/ml_models/AnnualBillSavings.py b/backend/ml_models/AnnualBillSavings.py index 99fae4db..4a433a7f 100644 --- a/backend/ml_models/AnnualBillSavings.py +++ b/backend/ml_models/AnnualBillSavings.py @@ -10,13 +10,13 @@ class AnnualBillSavings: AVERAGE_ELECTRICITY_CONSUMPTION = 2700 AVERAGE_GAS_CONSUMPTION = 11500 - # Latest price cap figures from Ofgem are for January 2024 - # https://www.ofgem.gov.uk/publications/changes-energy-price-cap-1-january-2024 - ELECTRICITY_PRICE_CAP = 0.29 - GAS_PRICE_CAP = 0.07 + # Latest price cap figures from Ofgem are for April 2024 + # https://www.ofgem.gov.uk/publications/new-energy-price-cap-level-april-june-2024-starts-today + ELECTRICITY_PRICE_CAP = 0.245 + GAS_PRICE_CAP = 0.0604 # This is a weighted mean of the price caps, using the consumption figures above as weights - PRICE_FACTOR = 0.11183098591549295 + PRICE_FACTOR = 0.09549999999999999 EPC_BANDS = ["G", "F", "E", "D", "C", "B", "A"] diff --git a/etl/customers/gla_croydon_demo/asset_list.py b/etl/customers/gla_croydon_demo/asset_list.py index 01220d0a..a0475807 100644 --- a/etl/customers/gla_croydon_demo/asset_list.py +++ b/etl/customers/gla_croydon_demo/asset_list.py @@ -35,20 +35,20 @@ def app(): # 79% D, 19% E, 1% F, 0.2% G - it probably makes the most sense to focus on E and D properties epc_data["CURRENT_ENERGY_RATING"].value_counts(normalize=True) - # For the purpose of the sample, take the properties have surveys done in the last 2 years - # This gives us 1167 remaining properties - two_years_ago = pd.Timestamp.now() - pd.DateOffset(days=int(2.5 * 365)) - epc_data = epc_data[epc_data["LODGEMENT_DATE"] >= two_years_ago] + # For the purpose of the sample, take the properties have surveys done in the last 3 years + # This gives us 1351 remaining properties + three_years_ago = pd.Timestamp.now() - pd.DateOffset(days=int(3 * 365)) + epc_data = epc_data[epc_data["LODGEMENT_DATE"] >= three_years_ago] # Archetype 1: defined below: # 1) House # 2) Unfilled cavity # 3) A roof that could be insulated (flat or pitched with no more than 50mm insulation) - # 4) EPC E - # 12 properties + # 4) EPC E or D + # 24 properties archetype_1_sample = epc_data[ epc_data["PROPERTY_TYPE"].isin(["House"]) & - (epc_data["CURRENT_ENERGY_RATING"] == "E") & + (epc_data["CURRENT_ENERGY_RATING"].isin(["D", "E"])) & epc_data["WALLS_DESCRIPTION"].isin(["Cavity wall, as built, no insulation (assumed)"]) & epc_data["ROOF_DESCRIPTION"].isin( [ @@ -69,10 +69,10 @@ def app(): # 2) Unfilled cavity # 3) Another property above # 4) EPC E - # 14 properties here + # 57 properties here archetype_2_sample = epc_data[ epc_data["PROPERTY_TYPE"].isin(["Flat"]) & - (epc_data["CURRENT_ENERGY_RATING"] == "E") & + (epc_data["CURRENT_ENERGY_RATING"].isin(["E", "D"])) & epc_data["WALLS_DESCRIPTION"].isin(["Cavity wall, as built, no insulation (assumed)"]) & epc_data["ROOF_DESCRIPTION"].isin( [ @@ -88,11 +88,18 @@ def app(): # 2) Solid brick wall # 3) House # 4) Pitched roof with no insulation - # Just 1 property (more expensive to retrofit) + # Just 7 properties (more expensive to retrofit) archetype_3_sample = epc_data[ epc_data["PROPERTY_TYPE"].isin(["House"]) & - (epc_data["CURRENT_ENERGY_RATING"] == "F") & - epc_data["ROOF_DESCRIPTION"].isin(["Pitched, no insulation"]) + (epc_data["CURRENT_ENERGY_RATING"].isin(["F", "G"])) & + epc_data["ROOF_DESCRIPTION"].isin( + [ + "Pitched, no insulation", + "Pitched, limited insulation (assumed)", + "Pitched, 100 mm loft insulation", + "Pitched, no insulation (assumed)", + ] + ) ] archetype_3_sample_asset_list = archetype_3_sample[["UPRN", "ADDRESS1", "POSTCODE"]].copy() archetype_3_sample_asset_list["ARCHETYPE"] = "Archetype 3" @@ -101,15 +108,18 @@ def app(): # 1) Maisonette # 2) Empty cavity # 3) EPC E - # 14 properties here + # 16 properties here archetype_4_sample = epc_data[ epc_data["PROPERTY_TYPE"].isin(["Maisonette"]) & - epc_data["WALLS_DESCRIPTION"].isin(["Cavity wall, as built, no insulation (assumed)"]) + epc_data["WALLS_DESCRIPTION"].isin( + ["Cavity wall, as built, no insulation (assumed)"] + ) ] + archetype_4_sample_asset_list = archetype_4_sample[["UPRN", "ADDRESS1", "POSTCODE"]].copy() archetype_4_sample_asset_list["ARCHETYPE"] = "Archetype 4" - # 41 total properties + # 104 total properties asset_list = pd.concat( [ archetype_1_sample_asset_list, diff --git a/etl/customers/gla_croydon_demo/slides.py b/etl/customers/gla_croydon_demo/slides.py index 5954f604..ebca7dc3 100644 --- a/etl/customers/gla_croydon_demo/slides.py +++ b/etl/customers/gla_croydon_demo/slides.py @@ -38,7 +38,50 @@ def app(): asset_list = read_csv_from_s3( "retrofit-plan-inputs-dev", f"{USER_ID}/{portfolio_id}/inputs.csv" ) + asset_list = pd.DataFrame(asset_list) # Get the properties for the portfolio properties = get_properties_with_default_recommendations(session, portfolio_id) properties_df = pd.DataFrame(properties) + + # We now pull the data for the property details + property_details = get_property_details_by_portfolio_id(session, portfolio_id) + property_details_df = pd.DataFrame(property_details) + # Merge on uprn + property_details_df = property_details_df.merge( + properties_df[["uprn", "id"]].rename(columns={"id": "property_id"}), + on="property_id" + ) + + plans = get_plan_by_portfolio_id(session, portfolio_id) + plans_df = pd.DataFrame(plans) + + # Unnest the recommendations. Each recommendation is a list of dictionaries + recommendations_exploded = properties_df["recommendations"].explode().tolist() + recommendations_df = pd.DataFrame([r for r in recommendations_exploded if not pd.isnull(r)]) + # Add uprn on + recommendations_df = recommendations_df.merge( + properties_df[["uprn", "id"]].rename(columns={"id": "property_id"}), + how="left", + on="property_id" + ) + + # Summary information by each archetype + archetype_1 = asset_list[asset_list["archetype"] == "Archetype 1"] + + recommendations_arch_1_summary = create_recommendations_summary( + recommendations_df[recommendations_df["uprn"].astype(str).isin(archetype_1["uprn"].values)], + properties_df[properties_df["uprn"].astype(str).isin(archetype_1["uprn"].values)], + SAP_TARGET_1 + ) + + # Take the mean, median and maximum of each value + arch_1_recommendation_means = recommendations_arch_1_summary.mean() + + arch_1_property_details = property_details_df[ + property_details_df["uprn"].astype(str).isin(archetype_1["uprn"].values) + ] + + arch_1_property_details_means = arch_1_property_details.mean() + + arch_1_recommendation_means["total_bill_savings"] / arch_1_property_details_means["adjusted_energy_consumption"] From 08a657eb9f505a10608377eff1c0c10b76bd2f0a Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 3 Apr 2024 12:18:08 +0100 Subject: [PATCH 166/248] Adding costs for ttzc --- backend/ml_models/AnnualBillSavings.py | 13 +++ etl/customers/gla_croydon_demo/asset_list.py | 13 +++ etl/customers/gla_croydon_demo/slides.py | 100 ++++++++++++++--- etl/customers/slide_utils.py | 22 +++- recommendations/Costs.py | 83 +++++++++++++- recommendations/HeatingControlRecommender.py | 108 +++++++++++++++++++ recommendations/HeatingRecommender.py | 17 +++ 7 files changed, 338 insertions(+), 18 deletions(-) diff --git a/backend/ml_models/AnnualBillSavings.py b/backend/ml_models/AnnualBillSavings.py index 4a433a7f..9be9d78a 100644 --- a/backend/ml_models/AnnualBillSavings.py +++ b/backend/ml_models/AnnualBillSavings.py @@ -18,6 +18,9 @@ class AnnualBillSavings: # This is a weighted mean of the price caps, using the consumption figures above as weights PRICE_FACTOR = 0.09549999999999999 + # Daily standard charge, based on average across England, Scotland and Wales, and includes VAT + DAILY_STANDARD_CHARGE = 0.3143 + EPC_BANDS = ["G", "F", "E", "D", "C", "B", "A"] @classmethod @@ -38,6 +41,16 @@ class AnnualBillSavings: """ return cls.ELECTRICITY_PRICE_CAP * kwh + @classmethod + def calculate_annual_bill(cls, kwh): + """ + This method will estimate the total annual bill for a property + :param kwh: The total kwh consumption + :return: An estimate for annual bill + """ + + return cls.PRICE_FACTOR * kwh + cls.DAILY_STANDARD_CHARGE * 365 + @classmethod def adjust_energy_to_metered(cls, epc_energy_consumption, current_epc_rating): """ diff --git a/etl/customers/gla_croydon_demo/asset_list.py b/etl/customers/gla_croydon_demo/asset_list.py index a0475807..3a3f02a3 100644 --- a/etl/customers/gla_croydon_demo/asset_list.py +++ b/etl/customers/gla_croydon_demo/asset_list.py @@ -140,6 +140,19 @@ def app(): asset_list["uprn"] = asset_list["uprn"].astype(int) + # We end up with some properties that are currently an EPC C, but we do not have this data in the download, so we + # manually remove + # 1) 3 Reid Close, CR5 3BL + # 2) Flat 6, Collier Court 2A, St. Peters Road CR0 1HD + asset_list = asset_list[ + ~asset_list["uprn"].isin( + [ + 100020576460, + 100020624352, + ] + ) + ] + filename = f"{USER_ID}/{PORTFOLIO_ID}/inputs.csv" save_csv_to_s3( dataframe=asset_list, diff --git a/etl/customers/gla_croydon_demo/slides.py b/etl/customers/gla_croydon_demo/slides.py index ebca7dc3..1d217226 100644 --- a/etl/customers/gla_croydon_demo/slides.py +++ b/etl/customers/gla_croydon_demo/slides.py @@ -16,11 +16,15 @@ from etl.customers.slide_utils import ( create_powerpoint, create_recommendations_summary ) +from backend.ml_models.AnnualBillSavings import AnnualBillSavings USER_ID = 8 PORTFOLIO_ID_1 = 67 +PORTFOLIO_ID_2 = 68 EPC_TARGET_1 = "C" +EPC_TARGET_2 = "A" SAP_TARGET_1 = 69 +SAP_TARGET_2 = 100 CUSTOMER_KEY = "gla-demo" @@ -32,11 +36,13 @@ def app(): # Get the data we need ######################################################################## - portfolio_id = PORTFOLIO_ID_1 + # TODO: Update to portfolio desired + # portfolio_id = PORTFOLIO_ID_1 + portfolio_id = PORTFOLIO_ID_2 # Get the asset list asset_list = read_csv_from_s3( - "retrofit-plan-inputs-dev", f"{USER_ID}/{portfolio_id}/inputs.csv" + "retrofit-plan-inputs-dev", f"{USER_ID}/67/inputs.csv" ) asset_list = pd.DataFrame(asset_list) @@ -47,6 +53,10 @@ def app(): # We now pull the data for the property details property_details = get_property_details_by_portfolio_id(session, portfolio_id) property_details_df = pd.DataFrame(property_details) + # We estimate bills based on the adjusted_energy_consumption + property_details_df["energy_bill"] = property_details_df["adjusted_energy_consumption"].apply( + lambda x: AnnualBillSavings.calculate_annual_bill(x) + ) # Merge on uprn property_details_df = property_details_df.merge( properties_df[["uprn", "id"]].rename(columns={"id": "property_id"}), @@ -66,22 +76,84 @@ def app(): on="property_id" ) - # Summary information by each archetype - archetype_1 = asset_list[asset_list["archetype"] == "Archetype 1"] - - recommendations_arch_1_summary = create_recommendations_summary( - recommendations_df[recommendations_df["uprn"].astype(str).isin(archetype_1["uprn"].values)], - properties_df[properties_df["uprn"].astype(str).isin(archetype_1["uprn"].values)], + recommendations_summary = create_recommendations_summary( + recommendations_df, + properties_df, + property_details_df, SAP_TARGET_1 ) - # Take the mean, median and maximum of each value - arch_1_recommendation_means = recommendations_arch_1_summary.mean() + # Calculate % changes of energ, co2 and abs + recommendations_summary["carbon_percent_change"] = ( + recommendations_summary["total_carbon"] / recommendations_summary["current_co2"] + ) - arch_1_property_details = property_details_df[ - property_details_df["uprn"].astype(str).isin(archetype_1["uprn"].values) + recommendations_summary["energy_percent_change"] = ( + recommendations_summary["adjusted_heat_demand"] / recommendations_summary["current_energy"] + ) + + recommendations_summary["bills_percent_change"] = ( + recommendations_summary["total_bill_savings"] / recommendations_summary["current_energy_bill"] + ) + + # Summary information by each archetype + ######################## + # Archetype 1 + ######################## + archetype_1 = asset_list[asset_list["archetype"] == "Archetype 1"] + recommendations_arch_1_summary = recommendations_summary[ + recommendations_summary["uprn"].astype(str).isin(archetype_1["uprn"].values) ] - arch_1_property_details_means = arch_1_property_details.mean() + # Take the mean, median and maximum of each value + arch_1_recommendation_min = recommendations_arch_1_summary.min() + arch_1_recommendation_max = recommendations_arch_1_summary.max() + arch_1_recommendation_means = recommendations_arch_1_summary.mean() - arch_1_recommendation_means["total_bill_savings"] / arch_1_property_details_means["adjusted_energy_consumption"] + ######################## + # Archetype 2 + ######################## + archetype_2 = asset_list[asset_list["archetype"] == "Archetype 2"] + recommendations_arch_2_summary = recommendations_summary[ + recommendations_summary["uprn"].astype(str).isin(archetype_2["uprn"].values) + ] + + # Take the mean, median and maximum of each value + arch_2_recommendation_min = recommendations_arch_2_summary.min() + arch_2_recommendation_max = recommendations_arch_2_summary.max() + arch_2_recommendation_means = recommendations_arch_2_summary.mean().round(2) + + ######################## + # Archetype 3 + ######################## + archetype_3 = asset_list[asset_list["archetype"] == "Archetype 3"] + recommendations_arch_3_summary = recommendations_summary[ + recommendations_summary["uprn"].astype(str).isin(archetype_3["uprn"].values) + ] + + # Take the mean, median and maximum of each value + arch_3_recommendation_min = recommendations_arch_3_summary.min() + arch_3_recommendation_max = recommendations_arch_3_summary.max() + arch_3_recommendation_means = recommendations_arch_3_summary.mean() + + ######################## + # Archetype 4 + ######################## + archetype_4 = asset_list[asset_list["archetype"] == "Archetype 4"] + recommendations_arch_4_summary = recommendations_summary[ + recommendations_summary["uprn"].astype(str).isin(archetype_4["uprn"].values) + ] + + # Take the mean, median and maximum of each value + arch_4_recommendation_min = recommendations_arch_4_summary.min() + arch_4_recommendation_max = recommendations_arch_4_summary.max() + arch_4_recommendation_means = recommendations_arch_4_summary.mean() + + property_details_df[ + property_details_df["uprn"].astype(str).isin(archetype_4["uprn"].values) + ]["total_floor_area"].mean() + + ######################## + # Overview + ######################## + overview_totals = recommendations_summary.sum() diff --git a/etl/customers/slide_utils.py b/etl/customers/slide_utils.py index d1efce47..9170ab17 100644 --- a/etl/customers/slide_utils.py +++ b/etl/customers/slide_utils.py @@ -246,7 +246,7 @@ def create_powerpoint(data, save_location): prs.save(save_location) -def create_recommendations_summary(recommendations_df, properties_df, sap_target): +def create_recommendations_summary(recommendations_df, properties_df, property_details_df, sap_target): # Aggregate the impact of the recommendations # We want: # Total number of sap points @@ -259,13 +259,15 @@ def create_recommendations_summary(recommendations_df, properties_df, sap_target total_valuation_impact=("property_valuation_increase", "sum"), total_bill_savings=("energy_cost_savings", "sum"), total_cost=("estimated_cost", "sum"), - total_carbon=("co2_equivalent_savings", "sum") + total_carbon=("co2_equivalent_savings", "sum"), + adjusted_heat_demand=("adjusted_heat_demand", "sum") ).reset_index() - # Merge on current sap points + # Merge on current sap points, current CO2, current adjusted_heat_demand, current annual bill recommendations_summary = recommendations_summary.merge( properties_df[["id", "uprn", "current_sap_points"]].rename(columns={"id": "property_id"}), on="property_id", how="left" ) + recommendations_summary["expected_sap_points"] = ( recommendations_summary["current_sap_points"] + recommendations_summary["total_sap_points"] ) @@ -274,4 +276,18 @@ def create_recommendations_summary(recommendations_df, properties_df, sap_target ) recommendations_summary["sap_difference"] = sap_target - recommendations_summary["expected_sap_points"] + if property_details_df is not None: + recommendations_summary = recommendations_summary.merge( + property_details_df[["uprn", "co2_emissions", "adjusted_energy_consumption", "energy_bill"]].rename( + columns={ + "id": "property_id", + "co2_emissions": "current_co2", + "adjusted_energy_consumption": "current_energy", + "energy_bill": "current_energy_bill" + } + ), + on="uprn", + how="left" + ) + return recommendations_summary diff --git a/recommendations/Costs.py b/recommendations/Costs.py index b2874f28..47844657 100644 --- a/recommendations/Costs.py +++ b/recommendations/Costs.py @@ -42,7 +42,22 @@ BATTERY_COST = 3500 # This is based on https://www.checkatrade.com/blog/cost-guides/cost-smart-thermostat/ SMART_APPLIANCE_THERMOSTAT_COST = 400 -PROGRAMMER_COST = 200 +PROGRAMMER_COST = 120 +ROOM_THERMOSTAT_COST = 150 +TRVS_COST = 35 + +# Cost for TTZC +# Smart thermostat based on checkatrade https://www.checkatrade.com/blog/cost-guides/cost-smart-thermostat/ +# Based on the Nest system +TTZC_SMART_THERMOSTAT_COST = 205 +TTZC_SMART_THERMOSTAT_LABOUR_HOURS = 2 +TTZC_ELECTRICIAN_HOURLY_RATE = 45 +# Based on cost of a Nest temperature sensor +TTZC_ROOM_TEMPERATURE_SENSOR_COST = 50 +TTZC_ROOM_TEMPERATURE_SENSOR_LABOUR_HOURS = 0.17 # (Assume ~ 10 mins install per sensor) +# Basedon an average cost of smart radiator values +TTZC_SMART_RADIATOR_VALUES = 50 +TTZC_SMART_RADIATOR_VALUES_LABOUR_HOURS = 0.37 # (Assume ~ 15-30 mins install per valve) class Costs: @@ -998,3 +1013,69 @@ class Costs: "labour_hours": 0, "labour_days": 0, } + + def roomstat_programmer_trvs( + self, number_heated_rooms, has_programmer, has_trvs, has_room_thermostat + ): + """ + + :return: + """ + + total_cost = 0 + labour_hours = 0 + + if not has_programmer: + total_cost += PROGRAMMER_COST + labour_hours += 1 + + if not has_trvs: + total_cost += TRVS_COST * number_heated_rooms + labour_hours += 0.25 * number_heated_rooms + + if not has_room_thermostat: + total_cost += ROOM_THERMOSTAT_COST + labour_hours += 0.5 + + subtotal_before_vat = total_cost / (1 + self.VAT_RATE) + vat = total_cost - subtotal_before_vat + + return { + "total": total_cost, + "subtotal": subtotal_before_vat, + "vat": vat, + "labour_hours": labour_hours, + "labour_days": 1, + } + + def time_and_temperature_zone_control(self, number_heated_rooms): + + # The product costs are inclusive of VAT + product_costs = ( + TTZC_SMART_THERMOSTAT_COST + + TTZC_ROOM_TEMPERATURE_SENSOR_COST * number_heated_rooms + + TTZC_SMART_RADIATOR_VALUES * number_heated_rooms + ) + labour_hours = ( + TTZC_SMART_THERMOSTAT_LABOUR_HOURS + + TTZC_ROOM_TEMPERATURE_SENSOR_LABOUR_HOURS * number_heated_rooms + + TTZC_SMART_RADIATOR_VALUES_LABOUR_HOURS * number_heated_rooms + ) + labour_costs = TTZC_ELECTRICIAN_HOURLY_RATE * labour_hours + # Add continency and preliminaries to the labour to account for the complexity of the job + labour_costs = labour_costs * (1 + self.CONTINGENCY + self.PRELIMINARIES) + + vat = labour_costs * self.VAT_RATE + + subtotal_before_vat = product_costs + labour_costs + total_cost = subtotal_before_vat + vat + + labour_days = np.ceil(labour_hours / 8) + + return { + "total": total_cost, + "subtotal": subtotal_before_vat, + "vat": vat, + "labour_hours": labour_hours, + "labour_days": labour_days, + } diff --git a/recommendations/HeatingControlRecommender.py b/recommendations/HeatingControlRecommender.py index 81597f61..99b41469 100644 --- a/recommendations/HeatingControlRecommender.py +++ b/recommendations/HeatingControlRecommender.py @@ -27,6 +27,14 @@ class HeatingControlRecommender: self.recommend_high_heat_retention_controls() return + if heating_description in ["Boiler and radiators, mains gas"]: + # We can recommend roomstat programmer trvs + self.recommend_roomstat_programmer_trvs() + # We can also recommend time and temperature zone controls + self.recommend_time_temperature_zone_controls() + + return + def recommend_room_heaters_electric_controls(self): """ If the home has Room heaters, electric, we start by identifying potential heating controls that could @@ -105,3 +113,103 @@ class HeatingControlRecommender: # We don't implement any other recommendations right now return + + def recommend_roomstat_programmer_trvs(self): + """ + If the home has a boiler and radiators, mains gas, we start by identifying potential heating controls that could + be upgraded, that would provide a practical impact. + + The criteria for recommending an upgrade to heating controls are (one of these must be true) + 1) There are no controls + 2) No programmer + 3) No room thermostat + 4) No TRVs + + + :return: + """ + + # We check if we have the conditions to recommend this upgrade + + needs_programmer = self.property.main_heating_controls["switch_system"] is None + needs_room_thermostat = self.property.main_heating_controls["thermostatic_control"] is None + needs_trvs = self.property.main_heating_controls["trvs"] is None + + can_recommend = ( + (self.property.main_heating_controls["no_control"] is not None) or + needs_programmer or + needs_room_thermostat or + needs_trvs + ) + + if not can_recommend: + return + + ending_config = MainheatControlAttributes("Programmer, room thermostat and TRVS").process() + # We use this to determine how we should be updating the config + simulation_config = check_simulation_difference( + new_config=ending_config, old_config=self.property.main_heating_controls + ) + # This upgrade will only take the heating system to average energy efficiency + # If the current system is below good, we make it good + if self.property.data["mainheatc-energy-eff"] in ["Poor", "Very Poor", "Average"]: + simulation_config["mainheatc_energy_eff_ending"] = "Good" + + has_programmer = not needs_programmer + has_room_thermostat = not needs_room_thermostat + has_trvs = not needs_trvs + + self.recommendation.append( + { + "description": "upgrade heating controls to Room thermostat, programmer and TRVs", + **self.costs.roomstat_programmer_trvs( + number_heated_rooms=int(self.property.data["number-heated-rooms"]), + has_programmer=has_programmer, + has_room_thermostat=has_room_thermostat, + has_trvs=has_trvs + ), + "simulation_config": simulation_config + } + ) + + return + + def recommend_time_temperature_zone_controls(self): + """ + If the home has a boiler, we can recommend time and temperature zone controls. This is a more advanced + and more efficient control system than the standard controls that come with a boiler. However, it may come + with a higher cost and more involved usage + :return: + """ + + # We check if the efficiency of the current heating controls is good or below, and + + # Conditions for installation are as follows: + # 1) The current heating controls are not time and temperature zone controls + # 2) The current heating controls are not already at 'Very Good' or above + + if ( + (self.property["thermostatic_control"] == "time and temperature zone control") or + (self.property.data["mainheatc-energy-eff"] in ["Very Good"]) + ): + # No recommendation needed + return + + ending_config = MainheatControlAttributes("Time and temperature zone control").process() + + # We use this to determine how we should be updating the config + simulation_config = check_simulation_difference( + new_config=ending_config, old_config=self.property.main_heating_controls + ) + + # If the current system is below very good, we make it very good + if self.property.data["mainheatc-energy-eff"] in ["Poor", "Very Poor", "Average", "Good"]: + simulation_config["mainheatc_energy_eff_ending"] = "Very Good" + + self.recommendation.append( + { + "description": "upgrade heating controls to Room thermostat, programmer and TRVs", + **self.costs.time_and_temperature_zone_control(), + "simulation_config": simulation_config + } + ) diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py index 11ae3da6..6467bd2f 100644 --- a/recommendations/HeatingRecommender.py +++ b/recommendations/HeatingRecommender.py @@ -26,6 +26,11 @@ class HeatingRecommender: self.recommend_electric_storage_heaters(phase=phase, system_change=True, heating_controls_only=False) return + # if the property has mains heating with boiler and radiators, we recommend optimal heating controls + if self.property.main_heating["clean_description"] in ["Boiler and radiators, mains gas"]: + self.recommend_roomstat_programmer_trvs(phase=phase) + return + @staticmethod def check_simulation_difference(old_config, new_config): """ @@ -182,3 +187,15 @@ class HeatingRecommender: ) self.recommendations.extend(recommendations) + + def recommend_roomstat_programmer_trvs(self, phase): + """ + + :param phase: + :return: + """ + # We recommend the heating controls + controls_recommender = HeatingControlRecommender(self.property) + controls_recommender.recommend(heating_description="Boiler and radiators, mains gas") + + controls_recommender.recommendation From 45552f5e06d3b814729cc57b6ca4329d19a8c31e Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 3 Apr 2024 14:39:28 +0100 Subject: [PATCH 167/248] Added costing for boiler --- recommendations/Costs.py | 51 ++++++++++++ recommendations/HeatingControlRecommender.py | 6 +- recommendations/HeatingRecommender.py | 83 +++++++++++++++++++- recommendations/Recommendations.py | 3 + 4 files changed, 137 insertions(+), 6 deletions(-) diff --git a/recommendations/Costs.py b/recommendations/Costs.py index 47844657..e5ceb0c0 100644 --- a/recommendations/Costs.py +++ b/recommendations/Costs.py @@ -59,6 +59,26 @@ TTZC_ROOM_TEMPERATURE_SENSOR_LABOUR_HOURS = 0.17 # (Assume ~ 10 mins install pe TTZC_SMART_RADIATOR_VALUES = 50 TTZC_SMART_RADIATOR_VALUES_LABOUR_HOURS = 0.37 # (Assume ~ 15-30 mins install per valve) +# Low carbon combi boiler - median value based on £2200 - £3000 range +LOW_CARBON_COMBI_BOILER = 2200 + +# boiler prices based on +# https://www.greenmatch.co.uk/boilers/30kw-boiler +# https://www.greenmatch.co.uk/boilers/35kw-boiler +# https://www.greenmatch.co.uk/boilers/40kw-boiler +# These are exclusive of installation costs +COMBI_BOILER_COSTS = { + "30kw": 1550, + "35kw": 1610, + "40kw": 1625 +} + +CONVENTIONAL_BOILER_COSTS = { + "30kw": 1117, + "35kw": 1546, + "40kw": 1776 +} + class Costs: """ @@ -1079,3 +1099,34 @@ class Costs: "labour_hours": labour_hours, "labour_days": labour_days, } + + def low_carbon_boiler(self, is_combi, size): + """ + Based on a basic estimate of median value £2600 to install a low carbon combi boiler + :return: + """ + + unit_cost = COMBI_BOILER_COSTS[size] if is_combi else CONVENTIONAL_BOILER_COSTS[size] + # The unit cost is the cost without VAT + # We now need to estimate the cost of the works + labour_days = 2 + labour_rate = 500 + + # Average cost of installation is 1 (maybe 2days) at £300 per day + # https://www.checkatrade.com/blog/cost-guides/new-boiler-cost/ + # To be pessimistic, assume 2 days work and £500 day rate + labour_cost = labour_rate * self.labour_adjustment_factor * labour_days + # Add contingency and preliminaries + labour_cost = labour_cost * (1 + self.CONTINGENCY + self.PRELIMINARIES) + vat = labour_cost * self.VAT_RATE + + subtotal_before_vat = unit_cost + labour_cost + total_cost = subtotal_before_vat + vat + + return { + "total": total_cost, + "subtotal": subtotal_before_vat, + "vat": vat, + "labour_hours": labour_days * 8, + "labour_days": labour_days, + } diff --git a/recommendations/HeatingControlRecommender.py b/recommendations/HeatingControlRecommender.py index 99b41469..547ea497 100644 --- a/recommendations/HeatingControlRecommender.py +++ b/recommendations/HeatingControlRecommender.py @@ -189,7 +189,7 @@ class HeatingControlRecommender: # 2) The current heating controls are not already at 'Very Good' or above if ( - (self.property["thermostatic_control"] == "time and temperature zone control") or + (self.property.main_heating_controls["thermostatic_control"] == "time and temperature zone control") or (self.property.data["mainheatc-energy-eff"] in ["Very Good"]) ): # No recommendation needed @@ -209,7 +209,9 @@ class HeatingControlRecommender: self.recommendation.append( { "description": "upgrade heating controls to Room thermostat, programmer and TRVs", - **self.costs.time_and_temperature_zone_control(), + **self.costs.time_and_temperature_zone_control( + number_heated_rooms=int(self.property.data["number-heated-rooms"]) + ), "simulation_config": simulation_config } ) diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py index 6467bd2f..c7064274 100644 --- a/recommendations/HeatingRecommender.py +++ b/recommendations/HeatingRecommender.py @@ -28,7 +28,7 @@ class HeatingRecommender: # if the property has mains heating with boiler and radiators, we recommend optimal heating controls if self.property.main_heating["clean_description"] in ["Boiler and radiators, mains gas"]: - self.recommend_roomstat_programmer_trvs(phase=phase) + self.recommend_boiler_upgrades(phase=phase) return @staticmethod @@ -188,14 +188,89 @@ class HeatingRecommender: self.recommendations.extend(recommendations) - def recommend_roomstat_programmer_trvs(self, phase): - """ + @staticmethod + def estimate_boiler_size(property_type, built_form, floor_area, floor_height, num_heated_rooms): + # Step 1: Base size estimation based on property type (as a starting point) + base_size = { + 'Flat': 25, + 'House': 30, + 'Maisonette': 28, + 'Bungalow': 27 + } + # Step 2: Calculate the volume of the property + volume = floor_area * floor_height + + # Step 3: Adjust base size for built form (to account for heat retention) + form_adjustment = { + 'Mid-Terrace': 0, + 'End-Terrace': 2, + 'Semi-Detached': 4, + 'Detached': 6 + } + + # Step 4: Further adjust for the total volume and number of heated rooms + volume_adjustment = (volume / 100) # Simplified adjustment factor for volume + rooms_adjustment = (num_heated_rooms - 5) * 0.5 # Assuming base case of 5 rooms + + # Calculate the estimated boiler size + estimated_size = base_size[property_type] + form_adjustment[built_form] + volume_adjustment + rooms_adjustment + + # Step 5: Align with available boiler sizes and ensure it does not exceed 35kW, as it's rare to need more + available_sizes = [30, 35, 40, 45, 50] + estimated_size = min(max(estimated_size, 30), 40) # Ensure within 30kW to 35kW range + + # Find the closest available size (in this case, either rounding up or down to align with 30 or 35) + closest_size = min(available_sizes, key=lambda x: abs(x - estimated_size)) + + return closest_size + + def recommend_boiler_upgrades(self, phase): + """ + This boiler recommendation will only recommend a like-for-like upgrade, since changing the system + is generally more expensive :param phase: :return: """ + + # We now recommend boiler upgrades, if applicable + if self.property.data["mainheat-energy-eff"] in ["Very Poor", "Poor", "Average"]: + boiler_size = self.estimate_boiler_size( + property_type=self.property.data["property-type"], + built_form=self.property.data["built-form"], + floor_area=self.property.floor_area, + floor_height=self.property.floor_height, + num_heated_rooms=self.property.data["number-heated-rooms"], + ) + + # If heating and hot water come from the mains, we need a combi boiler, otherwise we need a regular boiler + is_combi = self.property.hotwater["clean_description"] in ["From main system"] + if is_combi: + description = "Upgrade to a low carbon combi boiler" + else: + description = "Upgrade to a low carbon boiler" + + self.recommendations.append( + { + "phase": phase, + "parts": [ + # TODO + ], + "type": "heating", + "description": description, + "starting_u_value": None, + "new_u_value": None, + "sap_points": None, + **self.costs.low_carbon_boiler(is_combi=is_combi, size=f"{boiler_size}kw") + } + ) + # We recommend the heating controls controls_recommender = HeatingControlRecommender(self.property) controls_recommender.recommend(heating_description="Boiler and radiators, mains gas") + # We may have 2 recommendations from the heating controls - controls_recommender.recommendation + # The heating controls recommendation is distrinct from the boiler upgrade recommendation + # We insert phase into the recommendations for heating controls + for recommendation in controls_recommender.recommendation: + recommendation["phase"] = phase diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py index 944fec7a..d9a0a0fd 100644 --- a/recommendations/Recommendations.py +++ b/recommendations/Recommendations.py @@ -110,6 +110,9 @@ class Recommendations: self.heating_recommender.recommend(phase=phase) if self.heating_recommender.recommendations: property_recommendations.append(self.heating_recommender.recommendations) + # We check if we have distinct heating and heating controls recommendations + # If so, we increment by 2 (one of the heating system, one for the heating controls) + # otherwise we incremenet by 1 phase += 1 # Hot water From 09bbeaecae8156faedf090a28bfe0bcae231f0d2 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 3 Apr 2024 14:57:11 +0100 Subject: [PATCH 168/248] incorporate heating and heating control recommendations --- recommendations/HeatingRecommender.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py index c7064274..676a4b06 100644 --- a/recommendations/HeatingRecommender.py +++ b/recommendations/HeatingRecommender.py @@ -233,6 +233,8 @@ class HeatingRecommender: :return: """ + recommendation_phase = phase + # We now recommend boiler upgrades, if applicable if self.property.data["mainheat-energy-eff"] in ["Very Poor", "Poor", "Average"]: boiler_size = self.estimate_boiler_size( @@ -252,7 +254,7 @@ class HeatingRecommender: self.recommendations.append( { - "phase": phase, + "phase": recommendation_phase, "parts": [ # TODO ], @@ -261,16 +263,21 @@ class HeatingRecommender: "starting_u_value": None, "new_u_value": None, "sap_points": None, + "simulation_config": {"mainheat_energy_eff_ending": "Good"}, **self.costs.low_carbon_boiler(is_combi=is_combi, size=f"{boiler_size}kw") } ) + # We increment the recommendation phase, in the case of us having heating control recommendations + recommendation_phase += 1 + # We recommend the heating controls controls_recommender = HeatingControlRecommender(self.property) controls_recommender.recommend(heating_description="Boiler and radiators, mains gas") # We may have 2 recommendations from the heating controls - # The heating controls recommendation is distrinct from the boiler upgrade recommendation - # We insert phase into the recommendations for heating controls - for recommendation in controls_recommender.recommendation: - recommendation["phase"] = phase + if controls_recommender.recommendation: + # The heating controls recommendation is distrinct from the boiler upgrade recommendation + # We insert phase into the recommendations for heating controls + for recommendation in controls_recommender.recommendation: + recommendation["phase"] = recommendation_phase From 9130ad55fffc21858ca7061d26a2f6ecb8d66e3d Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 3 Apr 2024 14:59:42 +0100 Subject: [PATCH 169/248] Added missing controls to output --- recommendations/HeatingRecommender.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py index 676a4b06..9658aaa3 100644 --- a/recommendations/HeatingRecommender.py +++ b/recommendations/HeatingRecommender.py @@ -281,3 +281,5 @@ class HeatingRecommender: # We insert phase into the recommendations for heating controls for recommendation in controls_recommender.recommendation: recommendation["phase"] = recommendation_phase + + self.recommendations.extend(controls_recommender.recommendation) From a9c2bf1b9c0be1192edbeb50ba01401d1e55578f Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 3 Apr 2024 15:06:44 +0100 Subject: [PATCH 170/248] added correct incrementing of phase --- recommendations/HeatingControlRecommender.py | 8 ++++++++ recommendations/Recommendations.py | 4 +++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/recommendations/HeatingControlRecommender.py b/recommendations/HeatingControlRecommender.py index 547ea497..e224f243 100644 --- a/recommendations/HeatingControlRecommender.py +++ b/recommendations/HeatingControlRecommender.py @@ -161,6 +161,7 @@ class HeatingControlRecommender: self.recommendation.append( { + "type": "heating_control", "description": "upgrade heating controls to Room thermostat, programmer and TRVs", **self.costs.roomstat_programmer_trvs( number_heated_rooms=int(self.property.data["number-heated-rooms"]), @@ -168,6 +169,9 @@ class HeatingControlRecommender: has_room_thermostat=has_room_thermostat, has_trvs=has_trvs ), + "starting_u_value": None, + "new_u_value": None, + "sap_points": None, "simulation_config": simulation_config } ) @@ -208,10 +212,14 @@ class HeatingControlRecommender: self.recommendation.append( { + "type": "heating_control", "description": "upgrade heating controls to Room thermostat, programmer and TRVs", **self.costs.time_and_temperature_zone_control( number_heated_rooms=int(self.property.data["number-heated-rooms"]) ), + "starting_u_value": None, + "new_u_value": None, + "sap_points": None, "simulation_config": simulation_config } ) diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py index d9a0a0fd..902023dc 100644 --- a/recommendations/Recommendations.py +++ b/recommendations/Recommendations.py @@ -113,7 +113,9 @@ class Recommendations: # We check if we have distinct heating and heating controls recommendations # If so, we increment by 2 (one of the heating system, one for the heating controls) # otherwise we incremenet by 1 - phase += 1 + max_used_phase = max([rec["phase"] for rec in self.heating_recommender.recommendations]) + amount_to_increment = max_used_phase - phase + 1 + phase += amount_to_increment # Hot water if "hot_water" not in self.exclusions: From 2234269ca62611c9f0285acc0f79491ce98cf277 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 3 Apr 2024 15:14:19 +0100 Subject: [PATCH 171/248] added simulation --- backend/Property.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/backend/Property.py b/backend/Property.py index d97ce8cf..82108bbb 100644 --- a/backend/Property.py +++ b/backend/Property.py @@ -344,7 +344,7 @@ class Property: else: output["glazed_type_ending"] = "double glazing installed during or after 2002" - if recommendation["type"] in ["heating", "hot_water_tank_insulation"]: + if recommendation["type"] in ["heating", "hot_water_tank_insulation", "heating_control"]: # We update the data, as defined in the recommendaton simulation_config = recommendation["simulation_config"] @@ -364,7 +364,8 @@ class Property: "internal_wall_insulation", "external_wall_insulation", "cavity_wall_insulation", "loft_insulation", "room_roof_insulation", "flat_roof_insulation", "solid_floor_insulation", "suspended_floor_insulation", "exposed_floor_insulation", - "windows_glazing", "solar_pv", "heating", "hot_water_tank_insulation" + "windows_glazing", "solar_pv", "heating", "hot_water_tank_insulation", + "heating_control", ]: raise NotImplementedError("Implement me, given type %s" % recommendation["type"]) From f2cec8de11305c7d763a712050f0da685001bd7f Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 3 Apr 2024 16:30:45 +0100 Subject: [PATCH 172/248] fixed description for ttaz --- recommendations/HeatingControlRecommender.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/recommendations/HeatingControlRecommender.py b/recommendations/HeatingControlRecommender.py index e224f243..7010ad53 100644 --- a/recommendations/HeatingControlRecommender.py +++ b/recommendations/HeatingControlRecommender.py @@ -162,6 +162,7 @@ class HeatingControlRecommender: self.recommendation.append( { "type": "heating_control", + "parts": [], "description": "upgrade heating controls to Room thermostat, programmer and TRVs", **self.costs.roomstat_programmer_trvs( number_heated_rooms=int(self.property.data["number-heated-rooms"]), @@ -213,7 +214,8 @@ class HeatingControlRecommender: self.recommendation.append( { "type": "heating_control", - "description": "upgrade heating controls to Room thermostat, programmer and TRVs", + "parts": [], + "description": "Upgrade heating controls to Time and Temperature Zone Controls", **self.costs.time_and_temperature_zone_control( number_heated_rooms=int(self.property.data["number-heated-rooms"]) ), From 519dc6cfcb31ce4093ae0e6cace03ba30920e5e7 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 3 Apr 2024 19:17:27 +0100 Subject: [PATCH 173/248] added off-gas property recommendations --- backend/app/plan/router.py | 1 + etl/customers/gla_croydon_demo/asset_list.py | 42 +++- etl/customers/gla_croydon_demo/slides.py | 200 ++++++++++++++++++- recommendations/HeatingControlRecommender.py | 2 +- recommendations/HeatingRecommender.py | 12 +- 5 files changed, 247 insertions(+), 10 deletions(-) diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index 50b8a837..4868749d 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -389,6 +389,7 @@ async def trigger_plan(body: PlanTriggerRequest): # Commit final changes session.commit() + except IntegrityError: logger.error("Database integrity error occurred", exc_info=True) session.rollback() diff --git a/etl/customers/gla_croydon_demo/asset_list.py b/etl/customers/gla_croydon_demo/asset_list.py index 3a3f02a3..52e9422c 100644 --- a/etl/customers/gla_croydon_demo/asset_list.py +++ b/etl/customers/gla_croydon_demo/asset_list.py @@ -4,6 +4,23 @@ from utils.s3 import save_csv_to_s3 USER_ID = 8 PORTFOLIO_ID = 67 +archetype_1_uprns = [100020604138, 200001188299, 100020578756, 200001187196, 200001192253, 100020581792, 200001188304, + 100020625813, 100020618060, 100020585305, 100020617489, 100020615039, 100020618076, 100020588913, + 200001187197, 100020671205, 100020576940, 100020619814, 100020576472, 100020618083] +archetype_2_uprns = [100020698027, 10001007455, 100020653785, 10090383198, 100020665632, 100020620659, 100020615603, + 100020609610, 100020625597, 100020665656, 100020665640, 100020587905, 100020665630, 100020624351, + 100020625451, 100020624348, 100020666735, 100020653786, 100020576458, 100020657902, 100020624350, + 100020637405, 100020666734, 100020616325, 100020666716, 100020653783, 100020665645, 100020642337, + 100020665638, 100022904981, 100020688226, 100020630285, 100020626800, 100020665634, 100022907528, + 100020665652, 100020624347, 100020666721, 100020585002, 10014055968, 10001008257, 100020621438, + 100020576459, 100020665643, 100020665654, 100022917303] +archetype_3_uprns = [100020577523, 100020616446, 100020605342, 100020594652, 100020585394, 100020601138, 100020597485, + 100020614883, 100020633162, 100020697787, 200001185785, 100020646842, 100020581449, 100020595611, + 100020641814, 100020575611, 100020652986, 100020654671, 100020647336, 100020610518, 100020607980, + 100020692380, 100020581690] +archetype_4_uprns = [100020650603, 100020582907, 100020605116, 100020650607, 100020589325, 100020655500, 100020642537, + 200001187539, 100020631683, 100020610165, 100020596436, 100020598277, 100020660228] + def app(): """ @@ -84,14 +101,15 @@ def app(): archetype_2_sample_asset_list["ARCHETYPE"] = "Archetype 2" # Archetype 3: defined below: - # 1) EPC F + # 1) EPC E or below # 2) Solid brick wall # 3) House # 4) Pitched roof with no insulation # Just 7 properties (more expensive to retrofit) archetype_3_sample = epc_data[ epc_data["PROPERTY_TYPE"].isin(["House"]) & - (epc_data["CURRENT_ENERGY_RATING"].isin(["F", "G"])) & + (epc_data["CURRENT_ENERGY_RATING"].isin(["E", "F", "G"])) & + epc_data["WALLS_DESCRIPTION"].isin(["Solid brick, as built, no insulation (assumed)"]) & epc_data["ROOF_DESCRIPTION"].isin( [ "Pitched, no insulation", @@ -119,7 +137,6 @@ def app(): archetype_4_sample_asset_list = archetype_4_sample[["UPRN", "ADDRESS1", "POSTCODE"]].copy() archetype_4_sample_asset_list["ARCHETYPE"] = "Archetype 4" - # 104 total properties asset_list = pd.concat( [ archetype_1_sample_asset_list, @@ -152,6 +169,25 @@ def app(): ] ) ] + # We have slightly too many properties, so we take a random sample of each archetype + # achetype_1_size = 20 + # achetype_2_size = 46 + # achetype_3_size = 23 + # achetype_4_size = 13 + # archetype_1_uprns = asset_list[asset_list["archetype"] == "Archetype 1"]["uprn"].sample( + # int(achetype_1_size) + # ).tolist() + # archetype_2_uprns = asset_list[asset_list["archetype"] == "Archetype 2"]["uprn"].sample( + # int(achetype_2_size) + # ).tolist() + # archetype_3_uprns = asset_list[asset_list["archetype"] == "Archetype 3"]["uprn"].sample( + # int(achetype_3_size) + # ).tolist() + # archetype_4_uprns = asset_list[asset_list["archetype"] == "Archetype 4"]["uprn"].sample( + # int(achetype_4_size) + # ).tolist() + uprns_to_keep = archetype_1_uprns + archetype_2_uprns + archetype_3_uprns + archetype_4_uprns + asset_list = asset_list[asset_list["uprn"].isin(uprns_to_keep)] filename = f"{USER_ID}/{PORTFOLIO_ID}/inputs.csv" save_csv_to_s3( diff --git a/etl/customers/gla_croydon_demo/slides.py b/etl/customers/gla_croydon_demo/slides.py index 1d217226..e6c4b5b8 100644 --- a/etl/customers/gla_croydon_demo/slides.py +++ b/etl/customers/gla_croydon_demo/slides.py @@ -27,8 +27,24 @@ SAP_TARGET_1 = 69 SAP_TARGET_2 = 100 CUSTOMER_KEY = "gla-demo" +# Sample UPRNS +archetype_1_sample = ['100020618076', '100020619814', '100020581792', '100020671205', '100020585305', '100020606853', + '100020625813', '100020618042', '200001188304', '200001187196', '100020603026', '100020604138', + '100020615039', '200001188299', '100020618060', '200001192253'] -def app(): +archetype_2_sample = ['100020616325', '100020665634', '100020665654', '100020665638', '100020587936', '100020587905', + '100020665645', '100020625597', '100022907528', '100020665630', '100020624348', '10001008257', + '100020666735', '100020698027', '100020624351', '100020665656', '100020666716', '100020665632', + '100020666715', '100020645639', '200001191309', '100020625451', '100020624347', '100020665658', + '100020585002', '100022917303', '100020665650', '100020667737', '100020620659', '100022904981', + '100020642337', '100020657902', '100020615603', '100020626800', '100020665647', '100020665643'] + +archetype_3_sample = ['100020607980', '200001193193', '100020581690', '100020665611'] +archetype_4_sample = ['100020631683', '100020607667', '100020660228', '100020605116', '200001187539', '100020582907', + '100020610165', '100020650607', '100020655500', '100020598277', '100020642537'] + + +def scenario_1(): # Connect to database session = sessionmaker(bind=db_engine)() @@ -36,9 +52,7 @@ def app(): # Get the data we need ######################################################################## - # TODO: Update to portfolio desired - # portfolio_id = PORTFOLIO_ID_1 - portfolio_id = PORTFOLIO_ID_2 + portfolio_id = PORTFOLIO_ID_1 # Get the asset list asset_list = read_csv_from_s3( @@ -157,3 +171,181 @@ def app(): # Overview ######################## overview_totals = recommendations_summary.sum() + + +def make_sample(): + # sample_proportion = 67 / 102 + # Get the asset list + asset_list = read_csv_from_s3( + "retrofit-plan-inputs-dev", f"{USER_ID}/67/inputs.csv" + ) + asset_list = pd.DataFrame(asset_list) + + # From the asset list, we deduce how many properties we need + archetype_1_sample_size = 16 + archetype_2_sample_size = 36 + archetype_3_sample_size = 4 + archetype_4_sample_size = 11 + + # We take the sample and we'll keep the uprns static + archetype_1_sample = asset_list[ + asset_list["archetype"] == "Archetype 1" + ].sample(archetype_1_sample_size)["uprn"].to_list() + + archetype_2_sample = asset_list[ + asset_list["archetype"] == "Archetype 2" + ].sample(archetype_2_sample_size)["uprn"].to_list() + + archetype_3_sample = asset_list[ + asset_list["archetype"] == "Archetype 3" + ].sample(archetype_3_sample_size)["uprn"].to_list() + + archetype_4_sample = asset_list[ + asset_list["archetype"] == "Archetype 4" + ].sample(archetype_4_sample_size)["uprn"].to_list() + + +def scenario_2(): + # Connect to database + session = sessionmaker(bind=db_engine)() + + ######################################################################## + # Get the data we need + ######################################################################## + + portfolio_id = PORTFOLIO_ID_2 + + # Get the asset list + asset_list = read_csv_from_s3( + "retrofit-plan-inputs-dev", f"{USER_ID}/67/inputs.csv" + ) + asset_list = pd.DataFrame(asset_list) + + sample_uprns = archetype_1_sample + archetype_2_sample + archetype_3_sample + archetype_4_sample + + # Filter on sample uprns + asset_list = asset_list[asset_list["uprn"].astype(str).isin(sample_uprns)] + + # Get the properties for the portfolio + properties = get_properties_with_default_recommendations(session, portfolio_id) + properties_df = pd.DataFrame(properties) + properties_df = properties_df[properties_df["uprn"].astype(str).isin(sample_uprns)] + + # We now pull the data for the property details + property_details = get_property_details_by_portfolio_id(session, portfolio_id) + property_details_df = pd.DataFrame(property_details) + property_details_df = property_details_df[property_details_df["property_id"].isin(properties_df["id"].values)] + # We estimate bills based on the adjusted_energy_consumption + property_details_df["energy_bill"] = property_details_df["adjusted_energy_consumption"].apply( + lambda x: AnnualBillSavings.calculate_annual_bill(x) + ) + # Merge on uprn + property_details_df = property_details_df.merge( + properties_df[["uprn", "id"]].rename(columns={"id": "property_id"}), + on="property_id" + ) + + plans = get_plan_by_portfolio_id(session, portfolio_id) + plans_df = pd.DataFrame(plans) + + # Unnest the recommendations. Each recommendation is a list of dictionaries + recommendations_exploded = properties_df["recommendations"].explode().tolist() + recommendations_df = pd.DataFrame([r for r in recommendations_exploded if not pd.isnull(r)]) + # Add uprn on + recommendations_df = recommendations_df.merge( + properties_df[["uprn", "id"]].rename(columns={"id": "property_id"}), + how="left", + on="property_id" + ) + + recommendations_summary = create_recommendations_summary( + recommendations_df, + properties_df, + property_details_df, + SAP_TARGET_1 + ) + + # Calculate % changes of energ, co2 and abs + recommendations_summary["carbon_percent_change"] = ( + recommendations_summary["total_carbon"] / recommendations_summary["current_co2"] + ) + + recommendations_summary["energy_percent_change"] = ( + recommendations_summary["adjusted_heat_demand"] / recommendations_summary["current_energy"] + ) + + recommendations_summary["bills_percent_change"] = ( + recommendations_summary["total_bill_savings"] / recommendations_summary["current_energy_bill"] + ) + + ######################## + # Overview + ######################## + overview_totals = recommendations_summary.sum() + overview_means = recommendations_summary.mean() + + ######################## + # Measures + ######################## + measures_count = recommendations_df.groupby("type")["id"].count().reset_index() + + z = recommendations_df[recommendations_df["uprn"].astype(str).isin(archetype_3_sample)] + + recommendations_df[recommendations_df["uprn"].astype(str).isin(archetype_3_sample)]["type"].value_counts() + + # Summary information by each archetype + ######################## + # Archetype 1 + ######################## + archetype_1 = asset_list[asset_list["archetype"] == "Archetype 1"] + recommendations_arch_1_summary = recommendations_summary[ + recommendations_summary["uprn"].astype(str).isin(archetype_1["uprn"].values) + ] + + # Take the mean, median and maximum of each value + arch_1_recommendation_min = recommendations_arch_1_summary.min() + arch_1_recommendation_max = recommendations_arch_1_summary.max() + arch_1_recommendation_means = recommendations_arch_1_summary.mean() + + ######################## + # Archetype 2 + ######################## + archetype_2 = asset_list[asset_list["archetype"] == "Archetype 2"] + recommendations_arch_2_summary = recommendations_summary[ + recommendations_summary["uprn"].astype(str).isin(archetype_2["uprn"].values) + ] + + # Take the mean, median and maximum of each value + arch_2_recommendation_min = recommendations_arch_2_summary.min() + arch_2_recommendation_max = recommendations_arch_2_summary.max() + arch_2_recommendation_means = recommendations_arch_2_summary.mean().round(2) + + ######################## + # Archetype 3 + ######################## + archetype_3 = asset_list[asset_list["archetype"] == "Archetype 3"] + recommendations_arch_3_summary = recommendations_summary[ + recommendations_summary["uprn"].astype(str).isin(archetype_3["uprn"].values) + ] + + # Take the mean, median and maximum of each value + arch_3_recommendation_min = recommendations_arch_3_summary.min() + arch_3_recommendation_max = recommendations_arch_3_summary.max() + arch_3_recommendation_means = recommendations_arch_3_summary.mean() + + ######################## + # Archetype 4 + ######################## + archetype_4 = asset_list[asset_list["archetype"] == "Archetype 4"] + recommendations_arch_4_summary = recommendations_summary[ + recommendations_summary["uprn"].astype(str).isin(archetype_4["uprn"].values) + ] + + # Take the mean, median and maximum of each value + arch_4_recommendation_min = recommendations_arch_4_summary.min() + arch_4_recommendation_max = recommendations_arch_4_summary.max() + arch_4_recommendation_means = recommendations_arch_4_summary.mean() + + property_details_df[ + property_details_df["uprn"].astype(str).isin(archetype_4["uprn"].values) + ]["total_floor_area"].mean() diff --git a/recommendations/HeatingControlRecommender.py b/recommendations/HeatingControlRecommender.py index 7010ad53..95b5e3b1 100644 --- a/recommendations/HeatingControlRecommender.py +++ b/recommendations/HeatingControlRecommender.py @@ -215,7 +215,7 @@ class HeatingControlRecommender: { "type": "heating_control", "parts": [], - "description": "Upgrade heating controls to Time and Temperature Zone Controls", + "description": "Upgrade heating controls to Smart Thermostats, room sensors and smart radiator valves", **self.costs.time_and_temperature_zone_control( number_heated_rooms=int(self.property.data["number-heated-rooms"]) ), diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py index 9658aaa3..8b20c0cd 100644 --- a/recommendations/HeatingRecommender.py +++ b/recommendations/HeatingRecommender.py @@ -19,9 +19,17 @@ class HeatingRecommender: self.recommendations = [] # This first iteration of the recommender will provide very basic recommendation # We recommend heating controls based on the main heating system - if self.property.main_heating["clean_description"] in [ + + has_electric_heating_description = self.property.main_heating["clean_description"] in [ "Room heaters, electric", "Electric storage heaters", "Electric storage heaters, radiators" - ]: + ] + + no_heating_no_mains = ( + self.property.main_heating["clean_description"] in ["No system present, electric heaters assumed"] and + not self.property.data["mains-gas-flag"] + ) + + if has_electric_heating_description or no_heating_no_mains: # Recommend high heat retention storage heaters self.recommend_electric_storage_heaters(phase=phase, system_change=True, heating_controls_only=False) return From 47ebf866ee141c8ed91a7191b5bb75ef49246950 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 3 Apr 2024 20:02:37 +0100 Subject: [PATCH 174/248] fixed sample in slides --- backend/app/plan/router.py | 1 - etl/customers/gla_croydon_demo/slides.py | 35 +++++++++++---------- recommendations/HeatingRecommender.py | 39 ++++++++++++++++++++++-- 3 files changed, 55 insertions(+), 20 deletions(-) diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index 4868749d..50b8a837 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -389,7 +389,6 @@ async def trigger_plan(body: PlanTriggerRequest): # Commit final changes session.commit() - except IntegrityError: logger.error("Database integrity error occurred", exc_info=True) session.rollback() diff --git a/etl/customers/gla_croydon_demo/slides.py b/etl/customers/gla_croydon_demo/slides.py index e6c4b5b8..cbd1f7e4 100644 --- a/etl/customers/gla_croydon_demo/slides.py +++ b/etl/customers/gla_croydon_demo/slides.py @@ -28,20 +28,22 @@ SAP_TARGET_2 = 100 CUSTOMER_KEY = "gla-demo" # Sample UPRNS -archetype_1_sample = ['100020618076', '100020619814', '100020581792', '100020671205', '100020585305', '100020606853', - '100020625813', '100020618042', '200001188304', '200001187196', '100020603026', '100020604138', - '100020615039', '200001188299', '100020618060', '200001192253'] +archetype_1_sample = ['100020604138', '200001192253', '100020581792', '100020576940', '200001187196', '100020618060', + '100020625813', '100020578756', '100020618076', '200001187197', '100020619814', '100020617489', + '100020588913'] -archetype_2_sample = ['100020616325', '100020665634', '100020665654', '100020665638', '100020587936', '100020587905', - '100020665645', '100020625597', '100022907528', '100020665630', '100020624348', '10001008257', - '100020666735', '100020698027', '100020624351', '100020665656', '100020666716', '100020665632', - '100020666715', '100020645639', '200001191309', '100020625451', '100020624347', '100020665658', - '100020585002', '100022917303', '100020665650', '100020667737', '100020620659', '100022904981', - '100020642337', '100020657902', '100020615603', '100020626800', '100020665647', '100020665643'] +archetype_2_sample = ['100020585002', '100020615603', '100020665652', '100020626800', '100020624347', '100020624348', + '100020576459', '10001007455', '100020666716', '100020609610', '100020625451', '100020625597', + '100020624351', '100020665634', '100020624350', '100020665640', '100020665632', '100022917303', + '100020665656', '10014055968', '100020630285', '100020665638', '100020616325', '100020637405', + '100020698027', '100020657902', '100020688226', '100020653786', '100020642337', '100020665643'] -archetype_3_sample = ['100020607980', '200001193193', '100020581690', '100020665611'] -archetype_4_sample = ['100020631683', '100020607667', '100020660228', '100020605116', '200001187539', '100020582907', - '100020610165', '100020650607', '100020655500', '100020598277', '100020642537'] +archetype_3_sample = ['100020594652', '100020697787', '100020577523', '100020633162', '100020601138', '100020595611', + '100020597485', '100020614883', '100020605342', '100020654671', '100020575611', '100020607980', + '200001185785', '100020616446', '100020692380'] + +archetype_4_sample = ['100020596436', '100020610165', '200001187539', '100020655500', '100020582907', '100020598277', + '100020650607', '100020605116', '100020650603'] def scenario_1(): @@ -182,10 +184,11 @@ def make_sample(): asset_list = pd.DataFrame(asset_list) # From the asset list, we deduce how many properties we need - archetype_1_sample_size = 16 - archetype_2_sample_size = 36 - archetype_3_sample_size = 4 - archetype_4_sample_size = 11 + # Need to figure out the sizes + archetype_1_sample_size = 13 + archetype_2_sample_size = 30 + archetype_3_sample_size = 15 + archetype_4_sample_size = 9 # We take the sample and we'll keep the uprns static archetype_1_sample = asset_list[ diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py index 8b20c0cd..9d2e99e3 100644 --- a/recommendations/HeatingRecommender.py +++ b/recommendations/HeatingRecommender.py @@ -4,6 +4,7 @@ from recommendations.Costs import Costs from recommendations.recommendation_utils import check_simulation_difference from backend.Property import Property from etl.epc_clean.epc_attributes.MainheatAttributes import MainHeatAttributes +from etl.epc_clean.epc_attributes.HotWaterAttributes import HotWaterAttributes from recommendations.HeatingControlRecommender import HeatingControlRecommender @@ -35,7 +36,14 @@ class HeatingRecommender: return # if the property has mains heating with boiler and radiators, we recommend optimal heating controls - if self.property.main_heating["clean_description"] in ["Boiler and radiators, mains gas"]: + has_boiler = self.property.main_heating["clean_description"] in ["Boiler and radiators, mains gas"] + + # We also check that the property doesn't have a heating system, but it has access to the mains gas + no_heating_has_mains = self.property.main_heating["clean_description"] in [ + 'No system present, electric heaters assumed' + ] and self.property.data["mains-gas-flag"] + + if has_boiler or no_heating_has_mains: self.recommend_boiler_upgrades(phase=phase) return @@ -254,12 +262,37 @@ class HeatingRecommender: ) # If heating and hot water come from the mains, we need a combi boiler, otherwise we need a regular boiler - is_combi = self.property.hotwater["clean_description"] in ["From main system"] + hotwater_from_mains = self.property.hotwater["clean_description"] in ["From main system"] + access_to_mains_no_system = self.property.main_heating["clean_description"] in [ + 'No system present, electric heaters assumed' + ] and self.property.data["mains-gas-flag"] + is_combi = hotwater_from_mains or access_to_mains_no_system if is_combi: description = "Upgrade to a low carbon combi boiler" else: description = "Upgrade to a low carbon boiler" + simulation_config = {"mainheat_energy_eff_ending": "Good"} + if access_to_mains_no_system: + # Installation of a boiler improves the hot water system so we need to reflect this in + # the outcome of the recommendation + heating_ending_config = MainHeatAttributes("Boiler and radiators, mains gas").process() + hotwater_ending_config = HotWaterAttributes("From main system").process() + + heating_simulation_config = check_simulation_difference( + new_config=heating_ending_config, old_config=self.property.main_heating + ) + hotwater_simulation_config = check_simulation_difference( + new_config=hotwater_ending_config, old_config=self.property.hotwater + ) + + simulation_config = { + **simulation_config, + **heating_simulation_config, + **hotwater_simulation_config, + "hot_water_energy_eff_ending": "Good" + } + self.recommendations.append( { "phase": recommendation_phase, @@ -271,7 +304,7 @@ class HeatingRecommender: "starting_u_value": None, "new_u_value": None, "sap_points": None, - "simulation_config": {"mainheat_energy_eff_ending": "Good"}, + "simulation_config": simulation_config, **self.costs.low_carbon_boiler(is_combi=is_combi, size=f"{boiler_size}kw") } ) From 93830f90bb785a3f7f17e77a1ef8285d4aed966e Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 4 Apr 2024 16:35:14 +0100 Subject: [PATCH 175/248] removed low carbon from boiler terminology --- backend/ml_models/AnnualBillSavings.py | 6 +- etl/customers/gla_croydon_demo/slides.py | 424 ++++++++++++++++++++++- recommendations/HeatingRecommender.py | 15 +- 3 files changed, 431 insertions(+), 14 deletions(-) diff --git a/backend/ml_models/AnnualBillSavings.py b/backend/ml_models/AnnualBillSavings.py index 9be9d78a..99d67126 100644 --- a/backend/ml_models/AnnualBillSavings.py +++ b/backend/ml_models/AnnualBillSavings.py @@ -19,7 +19,8 @@ class AnnualBillSavings: PRICE_FACTOR = 0.09549999999999999 # Daily standard charge, based on average across England, Scotland and Wales, and includes VAT - DAILY_STANDARD_CHARGE = 0.3143 + DAILY_STANDARD_CHARGE_GAS = 0.3143 + DAILY_STANDARD_CHARGE_ELECTRICITY = 0.601 EPC_BANDS = ["G", "F", "E", "D", "C", "B", "A"] @@ -45,11 +46,12 @@ class AnnualBillSavings: def calculate_annual_bill(cls, kwh): """ This method will estimate the total annual bill for a property + It assumed gas & electricity are used :param kwh: The total kwh consumption :return: An estimate for annual bill """ - return cls.PRICE_FACTOR * kwh + cls.DAILY_STANDARD_CHARGE * 365 + return cls.PRICE_FACTOR * kwh + (cls.DAILY_STANDARD_CHARGE_GAS + cls.DAILY_STANDARD_CHARGE_ELECTRICITY * 365) @classmethod def adjust_energy_to_metered(cls, epc_energy_consumption, current_epc_rating): diff --git a/etl/customers/gla_croydon_demo/slides.py b/etl/customers/gla_croydon_demo/slides.py index cbd1f7e4..9f791bbd 100644 --- a/etl/customers/gla_croydon_demo/slides.py +++ b/etl/customers/gla_croydon_demo/slides.py @@ -112,6 +112,49 @@ def scenario_1(): recommendations_summary["total_bill_savings"] / recommendations_summary["current_energy_bill"] ) + ######################## + # Overview + ######################## + overview_totals = recommendations_summary.sum() + overview_means = recommendations_summary.mean() + + ######################## + # Measures + ######################## + measures_count = recommendations_df.groupby("type")["id"].count().reset_index() + wall_insulation_measures = measures_count[ + measures_count["type"].isin(["cavity_wall_insulation", "external_wall_insulation", "internal_wall_insulation"]) + ]["id"].sum() + ventilation_measures = measures_count[ + measures_count["type"].isin(["mechanical_ventilation"]) + ]["id"].sum() + roof_insulation_measures = measures_count[ + measures_count["type"].isin(["loft_insulation", "flat_roof_insulation"]) + ]["id"].sum() + floor_insulation_measures = measures_count[ + measures_count["type"].isin(["solid_floor_insulation", "suspended_floor_insulation"]) + ]["id"].sum() + windows = measures_count[ + measures_count["type"].isin(["windows_glazing"]) + ]["id"].sum() + heating = measures_count[ + measures_count["type"].isin(["heating"]) + ]["id"].sum() + heating_controls = measures_count[ + measures_count["type"].isin(["heating_control"]) + ]["id"].sum() + solar = measures_count[ + measures_count["type"].isin(["solar_pv"]) + ]["id"].sum() + other = measures_count[ + ~measures_count["type"].isin([ + "cavity_wall_insulation", "external_wall_insulation", "internal_wall_insulation", + "loft_insulation", "flat_roof_insulation", "solid_floor_insulation", + "suspended_floor_insulation", "windows_glazing", "heating", "heating_control", "solar_pv", + "mechanical_ventilation" + ]) + ]["id"].sum() + # Summary information by each archetype ######################## # Archetype 1 @@ -121,10 +164,54 @@ def scenario_1(): recommendations_summary["uprn"].astype(str).isin(archetype_1["uprn"].values) ] + arch_1_property_details = property_details_df[ + property_details_df["uprn"].astype(str).isin(archetype_1["uprn"].values) + ] + arch_1_property_details["co2_emissions"].sum() / property_details_df["co2_emissions"].sum() + # Take the mean, median and maximum of each value - arch_1_recommendation_min = recommendations_arch_1_summary.min() - arch_1_recommendation_max = recommendations_arch_1_summary.max() - arch_1_recommendation_means = recommendations_arch_1_summary.mean() + cols_to_keep = ["total_cost", "total_carbon", "total_bill_savings", "total_sap_points", "adjusted_heat_demand", + "energy_percent_change", "carbon_percent_change", "bills_percent_change"] + arch_1_recommendation_min = recommendations_arch_1_summary.min()[cols_to_keep] + arch_1_recommendation_max = recommendations_arch_1_summary.max()[cols_to_keep] + arch_1_recommendation_means = recommendations_arch_1_summary.mean()[cols_to_keep] + arch_1_totals = recommendations_arch_1_summary.sum()[cols_to_keep] + + annual_total_co2 = recommendations_arch_1_summary["total_carbon"].sum() + annual_total_bills = recommendations_arch_1_summary["total_bill_savings"].sum() + annual_total_energy_savings = recommendations_arch_1_summary["adjusted_heat_demand"].sum() + archetype_measures = \ + recommendations_df[recommendations_df["uprn"].astype(str).isin(archetype_1["uprn"].values)].groupby("type")[ + "id"].count().reset_index() + + cost_text = (f"{round(arch_1_recommendation_means['total_cost'], 2)}: " + f"{arch_1_recommendation_min['total_cost']} - {arch_1_recommendation_max['total_cost']}") + + sap_text = (f"{round(arch_1_recommendation_means['total_sap_points'], 2)}: " + f"{arch_1_recommendation_min['total_sap_points']} - {arch_1_recommendation_max['total_sap_points']}") + + energy_text = (f"{round(arch_1_recommendation_means['adjusted_heat_demand'], 2)}: " + f"{arch_1_recommendation_min['adjusted_heat_demand']} - " + f"{arch_1_recommendation_max['adjusted_heat_demand']}") + + energy_percent_text = (f"{round(arch_1_recommendation_means['energy_percent_change'], 2)}: " + f"{arch_1_recommendation_min['energy_percent_change']} - " + f"{arch_1_recommendation_max['energy_percent_change']}") + + carbon_text = (f"{round(arch_1_recommendation_means['total_carbon'], 2)}: " + f"{arch_1_recommendation_min['total_carbon']} - {arch_1_recommendation_max['total_carbon']}") + + carbon_percent_text = (f"{round(arch_1_recommendation_means['carbon_percent_change'], 2)}: " + f"{arch_1_recommendation_min['carbon_percent_change']} - " + f"{arch_1_recommendation_max['carbon_percent_change']}") + + bill_text = (f"{round(arch_1_recommendation_means['total_bill_savings'], 2)}: " + f"{arch_1_recommendation_min['total_bill_savings']} - " + f"{arch_1_recommendation_max['total_bill_savings']}") + + bill_percent_text = (f"{round(arch_1_recommendation_means['bills_percent_change'], 2)}: " + f"{arch_1_recommendation_min['bills_percent_change']} - " + f"{arch_1_recommendation_max['bills_percent_change']}") ######################## # Archetype 2 @@ -134,11 +221,53 @@ def scenario_1(): recommendations_summary["uprn"].astype(str).isin(archetype_2["uprn"].values) ] + arch_2_property_details = property_details_df[ + property_details_df["uprn"].astype(str).isin(archetype_2["uprn"].values) + ] + arch_2_property_details["co2_emissions"].sum() / property_details_df["co2_emissions"].sum() + # Take the mean, median and maximum of each value arch_2_recommendation_min = recommendations_arch_2_summary.min() arch_2_recommendation_max = recommendations_arch_2_summary.max() arch_2_recommendation_means = recommendations_arch_2_summary.mean().round(2) + total_cost = recommendations_arch_2_summary["total_cost"].sum() + annual_total_co2 = recommendations_arch_2_summary["total_carbon"].sum() + annual_total_bills = recommendations_arch_2_summary["total_bill_savings"].sum() + annual_total_energy_savings = recommendations_arch_2_summary["adjusted_heat_demand"].sum() + archetype_measures = \ + recommendations_df[recommendations_df["uprn"].astype(str).isin(archetype_2["uprn"].values)].groupby("type")[ + "id"].count().reset_index() + + cost_text = (f"{round(arch_2_recommendation_means['total_cost'], 2)}: " + f"{arch_2_recommendation_min['total_cost']} - {arch_2_recommendation_max['total_cost']}") + + sap_text = (f"{round(arch_2_recommendation_means['total_sap_points'], 2)}: " + f"{arch_2_recommendation_min['total_sap_points']} - {arch_2_recommendation_max['total_sap_points']}") + + energy_text = (f"{round(arch_2_recommendation_means['adjusted_heat_demand'], 2)}: " + f"{arch_2_recommendation_min['adjusted_heat_demand']} - " + f"{arch_2_recommendation_max['adjusted_heat_demand']}") + + energy_percent_text = (f"{round(arch_2_recommendation_means['energy_percent_change'], 2)}: " + f"{arch_2_recommendation_min['energy_percent_change']} - " + f"{arch_2_recommendation_max['energy_percent_change']}") + + carbon_text = (f"{round(arch_2_recommendation_means['total_carbon'], 2)}: " + f"{arch_2_recommendation_min['total_carbon']} - {arch_2_recommendation_max['total_carbon']}") + + carbon_percent_text = (f"{round(arch_2_recommendation_means['carbon_percent_change'], 2)}: " + f"{arch_2_recommendation_min['carbon_percent_change']} - " + f"{arch_2_recommendation_max['carbon_percent_change']}") + + bill_text = (f"{round(arch_2_recommendation_means['total_bill_savings'], 2)}: " + f"{arch_2_recommendation_min['total_bill_savings']} - " + f"{arch_2_recommendation_max['total_bill_savings']}") + + bill_percent_text = (f"{round(arch_2_recommendation_means['bills_percent_change'], 2)}: " + f"{arch_2_recommendation_min['bills_percent_change']} - " + f"{arch_2_recommendation_max['bills_percent_change']}") + ######################## # Archetype 3 ######################## @@ -147,11 +276,53 @@ def scenario_1(): recommendations_summary["uprn"].astype(str).isin(archetype_3["uprn"].values) ] + arch_3_property_details = property_details_df[ + property_details_df["uprn"].astype(str).isin(archetype_3["uprn"].values) + ] + arch_3_property_details["co2_emissions"].sum() / property_details_df["co2_emissions"].sum() + # Take the mean, median and maximum of each value arch_3_recommendation_min = recommendations_arch_3_summary.min() arch_3_recommendation_max = recommendations_arch_3_summary.max() arch_3_recommendation_means = recommendations_arch_3_summary.mean() + total_cost = recommendations_arch_3_summary["total_cost"].sum() + annual_total_co2 = recommendations_arch_3_summary["total_carbon"].sum() + annual_total_bills = recommendations_arch_3_summary["total_bill_savings"].sum() + annual_total_energy_savings = recommendations_arch_3_summary["adjusted_heat_demand"].sum() + archetype_measures = \ + recommendations_df[recommendations_df["uprn"].astype(str).isin(archetype_3["uprn"].values)].groupby("type")[ + "id"].count().reset_index() + + cost_text = (f"{round(arch_3_recommendation_means['total_cost'], 2)}: " + f"{arch_3_recommendation_min['total_cost']} - {arch_3_recommendation_max['total_cost']}") + + sap_text = (f"{round(arch_3_recommendation_means['total_sap_points'], 2)}: " + f"{arch_3_recommendation_min['total_sap_points']} - {arch_3_recommendation_max['total_sap_points']}") + + energy_text = (f"{round(arch_3_recommendation_means['adjusted_heat_demand'], 2)}: " + f"{arch_3_recommendation_min['adjusted_heat_demand']} - " + f"{arch_3_recommendation_max['adjusted_heat_demand']}") + + energy_percent_text = (f"{round(arch_3_recommendation_means['energy_percent_change'], 2)}: " + f"{arch_3_recommendation_min['energy_percent_change']} - " + f"{arch_3_recommendation_max['energy_percent_change']}") + + carbon_text = (f"{round(arch_3_recommendation_means['total_carbon'], 2)}: " + f"{arch_3_recommendation_min['total_carbon']} - {arch_3_recommendation_max['total_carbon']}") + + carbon_percent_text = (f"{round(arch_3_recommendation_means['carbon_percent_change'], 2)}: " + f"{arch_3_recommendation_min['carbon_percent_change']} - " + f"{arch_3_recommendation_max['carbon_percent_change']}") + + bill_text = (f"{round(arch_3_recommendation_means['total_bill_savings'], 2)}: " + f"{arch_3_recommendation_min['total_bill_savings']} - " + f"{arch_3_recommendation_max['total_bill_savings']}") + + bill_percent_text = (f"{round(arch_3_recommendation_means['bills_percent_change'], 2)}: " + f"{arch_3_recommendation_min['bills_percent_change']} - " + f"{arch_3_recommendation_max['bills_percent_change']}") + ######################## # Archetype 4 ######################## @@ -160,14 +331,52 @@ def scenario_1(): recommendations_summary["uprn"].astype(str).isin(archetype_4["uprn"].values) ] + arch_4_property_details = property_details_df[ + property_details_df["uprn"].astype(str).isin(archetype_4["uprn"].values) + ] + arch_4_property_details["co2_emissions"].sum() / property_details_df["co2_emissions"].sum() + # Take the mean, median and maximum of each value arch_4_recommendation_min = recommendations_arch_4_summary.min() arch_4_recommendation_max = recommendations_arch_4_summary.max() arch_4_recommendation_means = recommendations_arch_4_summary.mean() - property_details_df[ - property_details_df["uprn"].astype(str).isin(archetype_4["uprn"].values) - ]["total_floor_area"].mean() + total_cost = recommendations_arch_4_summary["total_cost"].sum() + annual_total_co2 = recommendations_arch_4_summary["total_carbon"].sum() + annual_total_bills = recommendations_arch_4_summary["total_bill_savings"].sum() + annual_total_energy_savings = recommendations_arch_4_summary["adjusted_heat_demand"].sum() + archetype_measures = \ + recommendations_df[recommendations_df["uprn"].astype(str).isin(archetype_4["uprn"].values)].groupby("type")[ + "id"].count().reset_index() + + cost_text = (f"{round(arch_4_recommendation_means['total_cost'], 2)}: " + f"{arch_4_recommendation_min['total_cost']} - {arch_4_recommendation_max['total_cost']}") + + sap_text = (f"{round(arch_4_recommendation_means['total_sap_points'], 2)}: " + f"{arch_4_recommendation_min['total_sap_points']} - {arch_4_recommendation_max['total_sap_points']}") + + energy_text = (f"{round(arch_4_recommendation_means['adjusted_heat_demand'], 2)}: " + f"{arch_4_recommendation_min['adjusted_heat_demand']} - " + f"{arch_4_recommendation_max['adjusted_heat_demand']}") + + energy_percent_text = (f"{round(arch_4_recommendation_means['energy_percent_change'], 2)}: " + f"{arch_4_recommendation_min['energy_percent_change']} - " + f"{arch_4_recommendation_max['energy_percent_change']}") + + carbon_text = (f"{round(arch_4_recommendation_means['total_carbon'], 2)}: " + f"{arch_4_recommendation_min['total_carbon']} - {arch_4_recommendation_max['total_carbon']}") + + carbon_percent_text = (f"{round(arch_4_recommendation_means['carbon_percent_change'], 2)}: " + f"{arch_4_recommendation_min['carbon_percent_change']} - " + f"{arch_4_recommendation_max['carbon_percent_change']}") + + bill_text = (f"{round(arch_4_recommendation_means['total_bill_savings'], 2)}: " + f"{arch_4_recommendation_min['total_bill_savings']} - " + f"{arch_4_recommendation_max['total_bill_savings']}") + + bill_percent_text = (f"{round(arch_4_recommendation_means['bills_percent_change'], 2)}: " + f"{arch_4_recommendation_min['bills_percent_change']} - " + f"{arch_4_recommendation_max['bills_percent_change']}") ######################## # Overview @@ -291,6 +500,38 @@ def scenario_2(): # Measures ######################## measures_count = recommendations_df.groupby("type")["id"].count().reset_index() + wall_insulation_measures = measures_count[ + measures_count["type"].isin(["cavity_wall_insulation", "external_wall_insulation", "internal_wall_insulation"]) + ]["id"].sum() + ventilation_measures = measures_count[ + measures_count["type"].isin(["mechanical_ventilation"]) + ]["id"].sum() + roof_insulation_measures = measures_count[ + measures_count["type"].isin(["loft_insulation", "flat_roof_insulation"]) + ]["id"].sum() + floor_insulation_measures = measures_count[ + measures_count["type"].isin(["solid_floor_insulation", "suspended_floor_insulation"]) + ]["id"].sum() + windows = measures_count[ + measures_count["type"].isin(["windows_glazing"]) + ]["id"].sum() + heating = measures_count[ + measures_count["type"].isin(["heating"]) + ]["id"].sum() + heating_controls = measures_count[ + measures_count["type"].isin(["heating_control"]) + ]["id"].sum() + solar = measures_count[ + measures_count["type"].isin(["solar_pv"]) + ]["id"].sum() + other = measures_count[ + ~measures_count["type"].isin([ + "cavity_wall_insulation", "external_wall_insulation", "internal_wall_insulation", + "loft_insulation", "flat_roof_insulation", "solid_floor_insulation", + "suspended_floor_insulation", "windows_glazing", "heating", "heating_control", "solar_pv", + "mechanical_ventilation" + ]) + ]["id"].sum() z = recommendations_df[recommendations_df["uprn"].astype(str).isin(archetype_3_sample)] @@ -305,11 +546,54 @@ def scenario_2(): recommendations_summary["uprn"].astype(str).isin(archetype_1["uprn"].values) ] + arch_1_property_details = property_details_df[ + property_details_df["uprn"].astype(str).isin(archetype_1["uprn"].values) + ] + arch_1_property_details["co2_emissions"].sum() / property_details_df["co2_emissions"].sum() + # Take the mean, median and maximum of each value arch_1_recommendation_min = recommendations_arch_1_summary.min() arch_1_recommendation_max = recommendations_arch_1_summary.max() arch_1_recommendation_means = recommendations_arch_1_summary.mean() + arch_1_totals = recommendations_arch_1_summary.sum() + + annual_total_co2 = recommendations_arch_1_summary["total_carbon"].sum() + annual_total_bills = recommendations_arch_1_summary["total_bill_savings"].sum() + annual_total_energy_savings = recommendations_arch_1_summary["adjusted_heat_demand"].sum() + archetype_measures = \ + recommendations_df[recommendations_df["uprn"].astype(str).isin(archetype_1["uprn"].values)].groupby("type")[ + "id"].count().reset_index() + + cost_text = (f"{round(arch_1_recommendation_means['total_cost'], 2)}: " + f"{arch_1_recommendation_min['total_cost']} - {arch_1_recommendation_max['total_cost']}") + + sap_text = (f"{round(arch_1_recommendation_means['total_sap_points'], 2)}: " + f"{arch_1_recommendation_min['total_sap_points']} - {arch_1_recommendation_max['total_sap_points']}") + + energy_text = (f"{round(arch_1_recommendation_means['adjusted_heat_demand'], 2)}: " + f"{arch_1_recommendation_min['adjusted_heat_demand']} - " + f"{arch_1_recommendation_max['adjusted_heat_demand']}") + + energy_percent_text = (f"{round(arch_1_recommendation_means['energy_percent_change'], 2)}: " + f"{arch_1_recommendation_min['energy_percent_change']} - " + f"{arch_1_recommendation_max['energy_percent_change']}") + + carbon_text = (f"{round(arch_1_recommendation_means['total_carbon'], 2)}: " + f"{arch_1_recommendation_min['total_carbon']} - {arch_1_recommendation_max['total_carbon']}") + + carbon_percent_text = (f"{round(arch_1_recommendation_means['carbon_percent_change'], 2)}: " + f"{arch_1_recommendation_min['carbon_percent_change']} - " + f"{arch_1_recommendation_max['carbon_percent_change']}") + + bill_text = (f"{round(arch_1_recommendation_means['total_bill_savings'], 2)}: " + f"{arch_1_recommendation_min['total_bill_savings']} - " + f"{arch_1_recommendation_max['total_bill_savings']}") + + bill_percent_text = (f"{round(arch_1_recommendation_means['bills_percent_change'], 2)}: " + f"{arch_1_recommendation_min['bills_percent_change']} - " + f"{arch_1_recommendation_max['bills_percent_change']}") + ######################## # Archetype 2 ######################## @@ -318,11 +602,53 @@ def scenario_2(): recommendations_summary["uprn"].astype(str).isin(archetype_2["uprn"].values) ] + arch_2_property_details = property_details_df[ + property_details_df["uprn"].astype(str).isin(archetype_2["uprn"].values) + ] + arch_2_property_details["co2_emissions"].sum() / property_details_df["co2_emissions"].sum() + # Take the mean, median and maximum of each value arch_2_recommendation_min = recommendations_arch_2_summary.min() arch_2_recommendation_max = recommendations_arch_2_summary.max() arch_2_recommendation_means = recommendations_arch_2_summary.mean().round(2) + total_cost = recommendations_arch_2_summary["total_cost"].sum() + annual_total_co2 = recommendations_arch_2_summary["total_carbon"].sum() + annual_total_bills = recommendations_arch_2_summary["total_bill_savings"].sum() + annual_total_energy_savings = recommendations_arch_2_summary["adjusted_heat_demand"].sum() + archetype_measures = \ + recommendations_df[recommendations_df["uprn"].astype(str).isin(archetype_2["uprn"].values)].groupby("type")[ + "id"].count().reset_index() + + cost_text = (f"{round(arch_2_recommendation_means['total_cost'], 2)}: " + f"{arch_2_recommendation_min['total_cost']} - {arch_2_recommendation_max['total_cost']}") + + sap_text = (f"{round(arch_2_recommendation_means['total_sap_points'], 2)}: " + f"{arch_2_recommendation_min['total_sap_points']} - {arch_2_recommendation_max['total_sap_points']}") + + energy_text = (f"{round(arch_2_recommendation_means['adjusted_heat_demand'], 2)}: " + f"{arch_2_recommendation_min['adjusted_heat_demand']} - " + f"{arch_2_recommendation_max['adjusted_heat_demand']}") + + energy_percent_text = (f"{round(arch_2_recommendation_means['energy_percent_change'], 2)}: " + f"{arch_2_recommendation_min['energy_percent_change']} - " + f"{arch_2_recommendation_max['energy_percent_change']}") + + carbon_text = (f"{round(arch_2_recommendation_means['total_carbon'], 2)}: " + f"{arch_2_recommendation_min['total_carbon']} - {arch_2_recommendation_max['total_carbon']}") + + carbon_percent_text = (f"{round(arch_2_recommendation_means['carbon_percent_change'], 2)}: " + f"{arch_2_recommendation_min['carbon_percent_change']} - " + f"{arch_2_recommendation_max['carbon_percent_change']}") + + bill_text = (f"{round(arch_2_recommendation_means['total_bill_savings'], 2)}: " + f"{arch_2_recommendation_min['total_bill_savings']} - " + f"{arch_2_recommendation_max['total_bill_savings']}") + + bill_percent_text = (f"{round(arch_2_recommendation_means['bills_percent_change'], 2)}: " + f"{arch_2_recommendation_min['bills_percent_change']} - " + f"{arch_2_recommendation_max['bills_percent_change']}") + ######################## # Archetype 3 ######################## @@ -331,11 +657,53 @@ def scenario_2(): recommendations_summary["uprn"].astype(str).isin(archetype_3["uprn"].values) ] + arch_3_property_details = property_details_df[ + property_details_df["uprn"].astype(str).isin(archetype_3["uprn"].values) + ] + arch_3_property_details["co2_emissions"].sum() / property_details_df["co2_emissions"].sum() + # Take the mean, median and maximum of each value arch_3_recommendation_min = recommendations_arch_3_summary.min() arch_3_recommendation_max = recommendations_arch_3_summary.max() arch_3_recommendation_means = recommendations_arch_3_summary.mean() + total_cost = recommendations_arch_3_summary["total_cost"].sum() + annual_total_co2 = recommendations_arch_3_summary["total_carbon"].sum() + annual_total_bills = recommendations_arch_3_summary["total_bill_savings"].sum() + annual_total_energy_savings = recommendations_arch_3_summary["adjusted_heat_demand"].sum() + archetype_measures = \ + recommendations_df[recommendations_df["uprn"].astype(str).isin(archetype_3["uprn"].values)].groupby("type")[ + "id"].count().reset_index() + + cost_text = (f"{round(arch_3_recommendation_means['total_cost'], 2)}: " + f"{arch_3_recommendation_min['total_cost']} - {arch_3_recommendation_max['total_cost']}") + + sap_text = (f"{round(arch_3_recommendation_means['total_sap_points'], 2)}: " + f"{arch_3_recommendation_min['total_sap_points']} - {arch_3_recommendation_max['total_sap_points']}") + + energy_text = (f"{round(arch_3_recommendation_means['adjusted_heat_demand'], 2)}: " + f"{arch_3_recommendation_min['adjusted_heat_demand']} - " + f"{arch_3_recommendation_max['adjusted_heat_demand']}") + + energy_percent_text = (f"{round(arch_3_recommendation_means['energy_percent_change'], 2)}: " + f"{arch_3_recommendation_min['energy_percent_change']} - " + f"{arch_3_recommendation_max['energy_percent_change']}") + + carbon_text = (f"{round(arch_3_recommendation_means['total_carbon'], 2)}: " + f"{arch_3_recommendation_min['total_carbon']} - {arch_3_recommendation_max['total_carbon']}") + + carbon_percent_text = (f"{round(arch_3_recommendation_means['carbon_percent_change'], 2)}: " + f"{arch_3_recommendation_min['carbon_percent_change']} - " + f"{arch_3_recommendation_max['carbon_percent_change']}") + + bill_text = (f"{round(arch_3_recommendation_means['total_bill_savings'], 2)}: " + f"{arch_3_recommendation_min['total_bill_savings']} - " + f"{arch_3_recommendation_max['total_bill_savings']}") + + bill_percent_text = (f"{round(arch_3_recommendation_means['bills_percent_change'], 2)}: " + f"{arch_3_recommendation_min['bills_percent_change']} - " + f"{arch_3_recommendation_max['bills_percent_change']}") + ######################## # Archetype 4 ######################## @@ -344,11 +712,49 @@ def scenario_2(): recommendations_summary["uprn"].astype(str).isin(archetype_4["uprn"].values) ] + arch_4_property_details = property_details_df[ + property_details_df["uprn"].astype(str).isin(archetype_4["uprn"].values) + ] + arch_4_property_details["co2_emissions"].sum() / property_details_df["co2_emissions"].sum() + # Take the mean, median and maximum of each value arch_4_recommendation_min = recommendations_arch_4_summary.min() arch_4_recommendation_max = recommendations_arch_4_summary.max() arch_4_recommendation_means = recommendations_arch_4_summary.mean() - property_details_df[ - property_details_df["uprn"].astype(str).isin(archetype_4["uprn"].values) - ]["total_floor_area"].mean() + total_cost = recommendations_arch_4_summary["total_cost"].sum() + annual_total_co2 = recommendations_arch_4_summary["total_carbon"].sum() + annual_total_bills = recommendations_arch_4_summary["total_bill_savings"].sum() + annual_total_energy_savings = recommendations_arch_4_summary["adjusted_heat_demand"].sum() + archetype_measures = \ + recommendations_df[recommendations_df["uprn"].astype(str).isin(archetype_4["uprn"].values)].groupby("type")[ + "id"].count().reset_index() + + cost_text = (f"{round(arch_4_recommendation_means['total_cost'], 2)}: " + f"{arch_4_recommendation_min['total_cost']} - {arch_4_recommendation_max['total_cost']}") + + sap_text = (f"{round(arch_4_recommendation_means['total_sap_points'], 2)}: " + f"{arch_4_recommendation_min['total_sap_points']} - {arch_4_recommendation_max['total_sap_points']}") + + energy_text = (f"{round(arch_4_recommendation_means['adjusted_heat_demand'], 2)}: " + f"{arch_4_recommendation_min['adjusted_heat_demand']} - " + f"{arch_4_recommendation_max['adjusted_heat_demand']}") + + energy_percent_text = (f"{round(arch_4_recommendation_means['energy_percent_change'], 2)}: " + f"{arch_4_recommendation_min['energy_percent_change']} - " + f"{arch_4_recommendation_max['energy_percent_change']}") + + carbon_text = (f"{round(arch_4_recommendation_means['total_carbon'], 2)}: " + f"{arch_4_recommendation_min['total_carbon']} - {arch_4_recommendation_max['total_carbon']}") + + carbon_percent_text = (f"{round(arch_4_recommendation_means['carbon_percent_change'], 2)}: " + f"{arch_4_recommendation_min['carbon_percent_change']} - " + f"{arch_4_recommendation_max['carbon_percent_change']}") + + bill_text = (f"{round(arch_4_recommendation_means['total_bill_savings'], 2)}: " + f"{arch_4_recommendation_min['total_bill_savings']} - " + f"{arch_4_recommendation_max['total_bill_savings']}") + + bill_percent_text = (f"{round(arch_4_recommendation_means['bills_percent_change'], 2)}: " + f"{arch_4_recommendation_min['bills_percent_change']} - " + f"{arch_4_recommendation_max['bills_percent_change']}") diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py index 9d2e99e3..2c075820 100644 --- a/recommendations/HeatingRecommender.py +++ b/recommendations/HeatingRecommender.py @@ -186,9 +186,18 @@ class HeatingRecommender: # This upgrade will only take the heating system to average energy efficiency heating_simulation_config["mainheat_energy_eff_ending"] = "Average" + # If the property is off-gas and has no heating system in place, the number of heated rooms will actually + # be 0, so we use the number of rooms as the figure + number_heated_rooms = ( + self.property.data["number-heated-rooms"] if self.property.data["number-heated-rooms"] > 0 + else ( + self.property.number_of_rooms - 1 if self.property.number_of_rooms > 1 else + self.property.number_of_rooms + ) + ) # Upgrade to electric storage heaters costs = self.costs.high_heat_electric_storage_heaters( - number_heated_rooms=self.property.data["number-heated-rooms"] + number_heated_rooms=number_heated_rooms ) description = "Install high heat retention electric storage heaters" @@ -268,9 +277,9 @@ class HeatingRecommender: ] and self.property.data["mains-gas-flag"] is_combi = hotwater_from_mains or access_to_mains_no_system if is_combi: - description = "Upgrade to a low carbon combi boiler" + description = "Upgrade to a new combi boiler" else: - description = "Upgrade to a low carbon boiler" + description = "Upgrade to a new boiler" simulation_config = {"mainheat_energy_eff_ending": "Good"} if access_to_mains_no_system: From e182d7acd77aa9dfc56a03650c59ffb3d763aa36 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 5 Apr 2024 10:19:22 +0100 Subject: [PATCH 176/248] change calculation of energy savings to use adjusted heat demand, not heat demand --- backend/app/db/functions/portfolio_functions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/app/db/functions/portfolio_functions.py b/backend/app/db/functions/portfolio_functions.py index a8a882bd..ead8280f 100644 --- a/backend/app/db/functions/portfolio_functions.py +++ b/backend/app/db/functions/portfolio_functions.py @@ -11,7 +11,7 @@ def aggregate_portfolio_recommendations( session.query( func.sum(Recommendation.estimated_cost).label("cost"), func.sum(Recommendation.total_work_hours).label("total_work_hours"), - func.sum(Recommendation.heat_demand).label("energy_savings"), + func.sum(Recommendation.adjusted_heat_demand).label("energy_savings"), func.sum(Recommendation.co2_equivalent_savings).label("co2_equivalent_savings"), func.sum(Recommendation.energy_cost_savings).label("energy_cost_savings"), ) From 02e72c569513b846cd1348caa17d20a786507c7b Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 5 Apr 2024 14:02:48 +0100 Subject: [PATCH 177/248] prevent hot water tank insulation recommendations when no heating system is in place --- recommendations/HotwaterRecommendations.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/recommendations/HotwaterRecommendations.py b/recommendations/HotwaterRecommendations.py index 298671a2..667f5f69 100644 --- a/recommendations/HotwaterRecommendations.py +++ b/recommendations/HotwaterRecommendations.py @@ -22,8 +22,13 @@ class HotwaterRecommendations: # This first iteration of the recommender will provide very basic recommendation # We recommend heating controls based on the main heating system - if (self.property.hotwater["heater_type"] in ["electric immersion"]) & \ - (self.property.data["hot-water-energy-eff"] == "Very Poor"): + # If there is not system present, we do not recommend anything, since we will have a separate recommendation + # suggesting system upgrades (e.g. boiler replacement) + if ( + (self.property.hotwater["heater_type"] in ["electric immersion"]) & + (self.property.data["hot-water-energy-eff"] == "Very Poor") & + (self.property.hotwater["no_system_present"] is None) + ): self.recommend_tank_insulation(phase=phase) return From 4134fdbb755f4a25e8162bfb851709372d0c5677 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 5 Apr 2024 15:00:24 +0100 Subject: [PATCH 178/248] Added pruning of solar panel options to prevent systems much too large or much too small --- recommendations/SolarPvRecommendations.py | 46 +++++++++++++++++++---- 1 file changed, 38 insertions(+), 8 deletions(-) diff --git a/recommendations/SolarPvRecommendations.py b/recommendations/SolarPvRecommendations.py index 3a89b213..744351be 100644 --- a/recommendations/SolarPvRecommendations.py +++ b/recommendations/SolarPvRecommendations.py @@ -8,6 +8,9 @@ class SolarPvRecommendations: # Wattage per panel - this is based on the average wattage of a solar panel being between 250w and 420w SOLAR_PANEL_WATTAGE = 250 + MAX_SYSTEM_WATTAGE = 4200 + MIN_SYSTEM_WATTAGE = 2500 + def __init__(self, property_instance): """ :param property_instance: Instance of the Property class, for the home associated to property_id @@ -18,6 +21,19 @@ class SolarPvRecommendations: self.recommendation = [] + @staticmethod + def trim_solar_wattage_options(scenarios_with_wattage): + # Initialize the list with the first element, assuming the list is not empty + trimmed_list = [scenarios_with_wattage[0]] + + # Iterate over the list starting from the second element + for scenario in scenarios_with_wattage[1:]: + # Compare the second element (index 1) of the current tuple with the last tuple in the trimmed list + if scenario[1] > trimmed_list[-1][1]: + trimmed_list.append(scenario) + + return trimmed_list + def recommend(self, phase): """ We check if a property is potentially suitable for solar PV based on the following criteria: @@ -46,26 +62,40 @@ class SolarPvRecommendations: self.property.solar_pv_percentage - 0.1, self.property.solar_pv_percentage, self.property.solar_pv_percentage + 0.1 ] - # We make sure we haven't gone too low or high - roof_coverage_scenarios = [v for v in roof_coverage_scenarios if 0 <= v <= 1] + # We make sure we haven't gone too low or high - we allow no more than 60% coverage + roof_coverage_scenarios = [v for v in roof_coverage_scenarios if 0 <= v <= 0.6] + # If we only have two scenarios, we add a coverage scenario 10% less than the smallest + if len(roof_coverage_scenarios) == 2: + roof_coverage_scenarios.insert(0, roof_coverage_scenarios[0] - 0.1) battery_scenarios = [False, True] - # I now produce the cross product of the scenarios - scenarios = [(roof, battery) for roof in roof_coverage_scenarios for battery in battery_scenarios] - - for roof_coverage, has_battery in scenarios: + scenarios_with_wattage = [] + for roof_coverage in roof_coverage_scenarios: # We now have a property which is potentially suitable for solar PV solar_pv_roof_area = self.property.get_solar_pv_roof_area(roof_coverage) number_solar_panels = np.floor(solar_pv_roof_area / self.SOLAR_PANEL_AREA) solar_panel_wattage = number_solar_panels * self.SOLAR_PANEL_WATTAGE + solar_panel_wattage = np.clip( + a=solar_panel_wattage, a_min=self.MIN_SYSTEM_WATTAGE, a_max=self.MAX_SYSTEM_WATTAGE + ) + scenarios_with_wattage.append((roof_coverage, solar_panel_wattage)) + # We trim the scenarios, so that we don't have duplicate wattages + scenarios_with_wattage = self.trim_solar_wattage_options(scenarios_with_wattage) + + # Produce the cross product of the scenarios + scenarios = [ + (roof, wattage, battery) for roof, wattage in scenarios_with_wattage for battery in battery_scenarios + ] + # We deduce the wattage of the solar panels based on the roof coverage + + for roof_coverage, solar_panel_wattage, has_battery in scenarios: + # We now have a property which is potentially suitable for solar PV roof_coverage_percent = round(roof_coverage * 100) - # Given the wattage, we estimate the cost of the solar PV system. This is based on the MCS database # of solar PV installations cost_result = self.costs.solar_pv(wattage=solar_panel_wattage, has_battery=has_battery) - kw = np.floor(solar_panel_wattage / 100) / 10 if has_battery: From ec6fc84911d1a8ac3689c9f07b866fda98086212 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 5 Apr 2024 15:14:55 +0100 Subject: [PATCH 179/248] updating solar panel logic --- recommendations/SolarPvRecommendations.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/recommendations/SolarPvRecommendations.py b/recommendations/SolarPvRecommendations.py index 744351be..4cf1c1fc 100644 --- a/recommendations/SolarPvRecommendations.py +++ b/recommendations/SolarPvRecommendations.py @@ -8,8 +8,8 @@ class SolarPvRecommendations: # Wattage per panel - this is based on the average wattage of a solar panel being between 250w and 420w SOLAR_PANEL_WATTAGE = 250 - MAX_SYSTEM_WATTAGE = 4200 - MIN_SYSTEM_WATTAGE = 2500 + MAX_SYSTEM_WATTAGE = 6000 + MIN_SYSTEM_WATTAGE = 1000 def __init__(self, property_instance): """ @@ -60,8 +60,9 @@ class SolarPvRecommendations: # 2) With and without battery roof_coverage_scenarios = [ self.property.solar_pv_percentage - 0.1, self.property.solar_pv_percentage, - self.property.solar_pv_percentage + 0.1 ] + if self.property.solar_pv_percentage <= 0.4: + roof_coverage_scenarios.append(self.property.solar_pv_percentage + 0.1) # We make sure we haven't gone too low or high - we allow no more than 60% coverage roof_coverage_scenarios = [v for v in roof_coverage_scenarios if 0 <= v <= 0.6] # If we only have two scenarios, we add a coverage scenario 10% less than the smallest @@ -76,6 +77,10 @@ class SolarPvRecommendations: number_solar_panels = np.floor(solar_pv_roof_area / self.SOLAR_PANEL_AREA) solar_panel_wattage = number_solar_panels * self.SOLAR_PANEL_WATTAGE + + if solar_panel_wattage < self.MIN_SYSTEM_WATTAGE: + continue + solar_panel_wattage = np.clip( a=solar_panel_wattage, a_min=self.MIN_SYSTEM_WATTAGE, a_max=self.MAX_SYSTEM_WATTAGE ) From 6258c347d68ecd1156387f9e2a532d099e2be2c3 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 5 Apr 2024 16:06:30 +0100 Subject: [PATCH 180/248] updating boiler recommendation to impact mains fuel and consider the impact on the main fuel --- etl/customers/gla_croydon_demo/asset_list.py | 4 ++ recommendations/HeatingRecommender.py | 58 +++++++++++++++----- 2 files changed, 49 insertions(+), 13 deletions(-) diff --git a/etl/customers/gla_croydon_demo/asset_list.py b/etl/customers/gla_croydon_demo/asset_list.py index 52e9422c..777cba83 100644 --- a/etl/customers/gla_croydon_demo/asset_list.py +++ b/etl/customers/gla_croydon_demo/asset_list.py @@ -34,6 +34,10 @@ def app(): low_memory=False ) + z = epc_data.groupby(["MAINHEAT_DESCRIPTION", "MAINHEATCONT_DESCRIPTION", "MAIN_FUEL"]).size().reset_index( + name="count") + z = z[z["MAINHEAT_DESCRIPTION"] == "Boiler and radiators, mains gas"] + # Filter on entries where we have a UPRN epc_data = epc_data[~pd.isnull(epc_data["UPRN"])] diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py index 2c075820..f602ecab 100644 --- a/recommendations/HeatingRecommender.py +++ b/recommendations/HeatingRecommender.py @@ -5,6 +5,7 @@ from recommendations.recommendation_utils import check_simulation_difference from backend.Property import Property from etl.epc_clean.epc_attributes.MainheatAttributes import MainHeatAttributes from etl.epc_clean.epc_attributes.HotWaterAttributes import HotWaterAttributes +from etl.epc_clean.epc_attributes.MainFuelAttributes import MainFuelAttributes from recommendations.HeatingControlRecommender import HeatingControlRecommender @@ -44,7 +45,7 @@ class HeatingRecommender: ] and self.property.data["mains-gas-flag"] if has_boiler or no_heating_has_mains: - self.recommend_boiler_upgrades(phase=phase) + self.recommend_boiler_upgrades(phase=phase, no_heating_has_mains=no_heating_has_mains) return @staticmethod @@ -250,17 +251,20 @@ class HeatingRecommender: return closest_size - def recommend_boiler_upgrades(self, phase): + def recommend_boiler_upgrades(self, phase, no_heating_has_mains): """ This boiler recommendation will only recommend a like-for-like upgrade, since changing the system is generally more expensive :param phase: + :param no_heating_has_mains: indicaes if the property has no heating system, but has access to the mains gas :return: """ recommendation_phase = phase # We now recommend boiler upgrades, if applicable + simulation_config = {} + boiler_costs = {} if self.property.data["mainheat-energy-eff"] in ["Very Poor", "Poor", "Average"]: boiler_size = self.estimate_boiler_size( property_type=self.property.data["property-type"], @@ -272,21 +276,20 @@ class HeatingRecommender: # If heating and hot water come from the mains, we need a combi boiler, otherwise we need a regular boiler hotwater_from_mains = self.property.hotwater["clean_description"] in ["From main system"] - access_to_mains_no_system = self.property.main_heating["clean_description"] in [ - 'No system present, electric heaters assumed' - ] and self.property.data["mains-gas-flag"] - is_combi = hotwater_from_mains or access_to_mains_no_system + + is_combi = hotwater_from_mains or no_heating_has_mains if is_combi: description = "Upgrade to a new combi boiler" else: description = "Upgrade to a new boiler" simulation_config = {"mainheat_energy_eff_ending": "Good"} - if access_to_mains_no_system: + if no_heating_has_mains: # Installation of a boiler improves the hot water system so we need to reflect this in # the outcome of the recommendation heating_ending_config = MainHeatAttributes("Boiler and radiators, mains gas").process() hotwater_ending_config = HotWaterAttributes("From main system").process() + fuel_ending_config = MainFuelAttributes("mains gas (not community)").process() heating_simulation_config = check_simulation_difference( new_config=heating_ending_config, old_config=self.property.main_heating @@ -294,14 +297,20 @@ class HeatingRecommender: hotwater_simulation_config = check_simulation_difference( new_config=hotwater_ending_config, old_config=self.property.hotwater ) + fuel_simulation_config = check_simulation_difference( + new_config=fuel_ending_config, old_config=self.property.main_fuel + ) simulation_config = { **simulation_config, **heating_simulation_config, **hotwater_simulation_config, + **fuel_simulation_config, "hot_water_energy_eff_ending": "Good" } + boiler_costs = self.costs.low_carbon_boiler(is_combi=is_combi, size=f"{boiler_size}kw") + self.recommendations.append( { "phase": recommendation_phase, @@ -314,22 +323,45 @@ class HeatingRecommender: "new_u_value": None, "sap_points": None, "simulation_config": simulation_config, - **self.costs.low_carbon_boiler(is_combi=is_combi, size=f"{boiler_size}kw") + **boiler_costs } ) - # We increment the recommendation phase, in the case of us having heating control recommendations - recommendation_phase += 1 - # We recommend the heating controls + # If the property did not previously have a boiler, we combine controls_recommender = HeatingControlRecommender(self.property) controls_recommender.recommend(heating_description="Boiler and radiators, mains gas") # We may have 2 recommendations from the heating controls - if controls_recommender.recommendation: + if not controls_recommender.recommendation: + return + + if no_heating_has_mains: + # We combine the heating and controls recommendations + boiler_recommendation = self.recommendations[0].copy() + combined_recommendations = [] + for controls_recommendation in controls_recommender.recommendation: + combined_recommendation = self.combine_heating_and_controls( + controls_recommendations=[controls_recommendation], + heating_simulation_config=simulation_config, + costs=boiler_costs, + description=boiler_recommendation["description"], + phase=recommendation_phase, + heating_controls_only=False, + system_change=True + ) + combined_recommendations.extend(combined_recommendation) + + # Overwrite the existing boiler recommendation + self.recommendations = combined_recommendations + else: + # We increment the recommendation phase, since the heating controls are separate from the boiler upgrade + recommendation_phase += 1 # The heating controls recommendation is distrinct from the boiler upgrade recommendation # We insert phase into the recommendations for heating controls for recommendation in controls_recommender.recommendation: recommendation["phase"] = recommendation_phase - self.recommendations.extend(controls_recommender.recommendation) + self.recommendations.extend(controls_recommender.recommendation) + + return From 35a288fd7406c630fddde596360fa35e53d3fdd4 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 5 Apr 2024 16:47:15 +0100 Subject: [PATCH 181/248] Updating recommendations --- backend/Property.py | 5 ----- etl/customers/gla_croydon_demo/asset_list.py | 3 +-- recommendations/HeatingRecommender.py | 7 ++++++- recommendations/HotwaterRecommendations.py | 5 +++-- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/backend/Property.py b/backend/Property.py index 19f15b02..d3dd8395 100644 --- a/backend/Property.py +++ b/backend/Property.py @@ -298,11 +298,6 @@ class Property: if recommendation["type"] == "cavity_wall_insulation": output["is_filled_cavity_ending"] = True - # TODO: perhaps detrimental - # When making a recommendation for the wall, we will also update the ventilation - # if output["mechanical_ventilation_ending"] == 'natural': - # output["mechanical_ventilation_ending"] = 'mechanical, extract only' - else: if output["walls_thermal_transmittance_ending"] is None: raise ValueError("We should not have a None value for the u value") diff --git a/etl/customers/gla_croydon_demo/asset_list.py b/etl/customers/gla_croydon_demo/asset_list.py index 777cba83..7dde8926 100644 --- a/etl/customers/gla_croydon_demo/asset_list.py +++ b/etl/customers/gla_croydon_demo/asset_list.py @@ -34,8 +34,7 @@ def app(): low_memory=False ) - z = epc_data.groupby(["MAINHEAT_DESCRIPTION", "MAINHEATCONT_DESCRIPTION", "MAIN_FUEL"]).size().reset_index( - name="count") + z = epc_data.groupby(["WALLS_DESCRIPTION", "WALLS_ENERGY_EFF"]).size().reset_index(name="count") z = z[z["MAINHEAT_DESCRIPTION"] == "Boiler and radiators, mains gas"] # Filter on entries where we have a UPRN diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py index f602ecab..aec1f419 100644 --- a/recommendations/HeatingRecommender.py +++ b/recommendations/HeatingRecommender.py @@ -104,8 +104,13 @@ class HeatingRecommender: **recommendation_simulation_config, **controls_recommendations[0]["simulation_config"] } + controls_description = controls_recommendations[0]['description'] + # Make the first letter of the description lowercase + controls_description = ( + controls_description[0].lower() + controls_description[1:] + ) - recommendation_description = f"{description} and {controls_recommendations[0]['description']}" + recommendation_description = f"{description} and {controls_description}" recommendation = { "phase": phase, diff --git a/recommendations/HotwaterRecommendations.py b/recommendations/HotwaterRecommendations.py index 667f5f69..7f77597f 100644 --- a/recommendations/HotwaterRecommendations.py +++ b/recommendations/HotwaterRecommendations.py @@ -22,8 +22,9 @@ class HotwaterRecommendations: # This first iteration of the recommender will provide very basic recommendation # We recommend heating controls based on the main heating system - # If there is not system present, we do not recommend anything, since we will have a separate recommendation - # suggesting system upgrades (e.g. boiler replacement) + + # If there is no system present, but access to the mains, we + if ( (self.property.hotwater["heater_type"] in ["electric immersion"]) & (self.property.data["hot-water-energy-eff"] == "Very Poor") & From 0142e6fe5fcbcffc836bc139df48cf31e77545f1 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 8 Apr 2024 15:29:52 +0100 Subject: [PATCH 182/248] wip matching completed surveys back to the asset list --- .idea/Model.iml | 2 +- .idea/misc.xml | 2 +- .../ha_15_32/ha_analysis_batch_3.py | 78 +++++++++++++++++++ 3 files changed, 80 insertions(+), 2 deletions(-) diff --git a/.idea/Model.iml b/.idea/Model.iml index 4413bb06..b0f9c00d 100644 --- a/.idea/Model.iml +++ b/.idea/Model.iml @@ -7,7 +7,7 @@ - + diff --git a/.idea/misc.xml b/.idea/misc.xml index 6f308057..1122b380 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -3,7 +3,7 @@ - + diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index b4b82d0b..de2c0e6a 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -6907,3 +6907,81 @@ def app(): december_figures["ECO4 remaining"] ) december_figures["ECO4 remaining"].sum() + + # Adhoc - for UNITAS, stripping out additional surveys that have been completed + unitas_data = loader.data["HA50"].copy() + unitas_asset_list = unitas_data["asset_list"].copy() + unitas_survey_sheet = unitas_data["survey_list"].copy() + # We remove the surveyed properties from the asset sheet + unitas_survey_sheet = unitas_survey_sheet[~pd.isnull(unitas_survey_sheet["asset_list_row_id"])] + unitas_asset_list = unitas_asset_list.merge( + unitas_survey_sheet[["asset_list_row_id", "installation_status"]], + how="left", + on="asset_list_row_id" + ) + unitas_asset_list = unitas_asset_list[pd.isnull(unitas_asset_list["installation_status"])] + unitas_asset_list = unitas_asset_list.drop(columns=["installation_status"]) + + # We read in the data for the further completed surveys + unitas_phase_1_workbook = openpyxl.load_workbook( + "local_data/ha_data/UNITAS ( STOKE) MASTER ROLLING SHEET UPDATED 8.4.24 K - no password.xlsx" + ) + phase_1_worksheet = unitas_phase_1_workbook["ECO 4 - PHASE 1"] + phase_2_worksheet = unitas_phase_1_workbook["ECO4 - PHASE 2"] + phase1_colnames = [cell.value for cell in phase_1_worksheet[1]] + phase_1_rows_data = [] + for row in phase_1_worksheet.iter_rows(min_row=2, values_only=False): + row_data = [cell.value for cell in row] # This will get you the cell values + phase_1_rows_data.append(row_data) + + phase_1_surveys = pd.DataFrame(phase_1_rows_data, columns=phase1_colnames) + + # Correct phase 1 surveys in the same fashion as the previous approach + phase_1_surveys = DataLoader.correct_ha50_survey_list(phase_1_surveys.copy()) + + # We check all phase 1 surveys are contained in the data we had before + additional = [] + for _, row in tqdm(phase_1_surveys.iterrows(), total=len(phase_1_surveys)): + # We look for the entry in the old survey sheet: + # matched_uprn = unitas_survey_sheet[unitas_survey_sheet["EPR UPRN NUMBER"] == row["UPRN"]] + # if matched_uprn.shape[0] == 1: + # continue + + matched_1 = unitas_survey_sheet[ + (unitas_survey_sheet["Post Code"] == row["Post Code"]) & + (unitas_survey_sheet["NO."] == row["NO."]) + ] + + if matched_1.shape[0] == 1: + continue + + matched_2 = unitas_survey_sheet[ + (unitas_survey_sheet["Street / Block Name"] == row["Street / Block Name"]) & + (unitas_survey_sheet["NO."] == row["NO."]) + ] + + if matched_2.shape[0] == 1: + continue + + additional.append(row.to_dict()) + additional = pd.DataFrame(additional) + + phase_2_rows_data = [] + for row in phase_2_worksheet.iter_rows(min_row=2, values_only=False): + row_data = [cell.value for cell in row] # This will get you the cell values + phase_2_rows_data.append(row_data) + + phase2_colnames = [cell.value for cell in phase_2_worksheet[1]] + phase_2_surveys = pd.DataFrame(phase_2_rows_data, columns=phase2_colnames) + # Drop all of the occurances of "OFFICE USE ONLY" columns + phase_2_surveys = phase_2_surveys.drop(columns=[c for c in phase_2_surveys.columns if "OFFICE USE ONLY" in c]) + common_columns = list({c for c in phase_2_surveys.columns if c in additional.columns}) + additional_filtered = additional[common_columns] + + further_unitas_completed_surveys = pd.concat( + [phase_2_surveys, additional_filtered], + axis=0, + ignore_index=True + ) + + # We match these back to the asset list From dc80313eca2119703e161c6a6ad1c9380f1cc886 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 9 Apr 2024 14:57:55 +0100 Subject: [PATCH 183/248] merging EPC data and survey outcomes to asset list --- .../ha_15_32/ha_analysis_batch_3.py | 413 ++++++++++++++---- 1 file changed, 334 insertions(+), 79 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index de2c0e6a..35bb63fe 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -3459,7 +3459,7 @@ class DataLoader: "not eligible", asset_list["ECO Eligibility"] ) - asset_list = asset_list.drop(columns=["has_eco3"]) + # asset_list = asset_list.drop(columns=["has_eco3"]) # Report on sales sales_report = {} @@ -6778,6 +6778,339 @@ def identify_eco_works(loader): breakdowns = breakdowns.fillna(0) +def unitas_data_prep(loader): + ##### + # Adhoc - for UNITAS, stripping out additional surveys that have been completed + unitas_data = loader.data["HA50"].copy() + unitas_asset_list = unitas_data["asset_list"].copy() + unitas_survey_sheet = unitas_data["survey_list"].copy() + + # We remove the surveyed properties from the asset sheet + unitas_survey_sheet = unitas_survey_sheet[~pd.isnull(unitas_survey_sheet["asset_list_row_id"])] + unitas_asset_list = unitas_asset_list.merge( + unitas_survey_sheet[["asset_list_row_id", "installation_status"]], + how="left", + on="asset_list_row_id" + ) + unitas_asset_list = unitas_asset_list[pd.isnull(unitas_asset_list["installation_status"])] + unitas_asset_list = unitas_asset_list.drop(columns=["installation_status"]) + + # We read in the data for the further completed surveys + unitas_phase_1_workbook = openpyxl.load_workbook( + "local_data/ha_data/UNITAS ( STOKE) MASTER ROLLING SHEET UPDATED 8.4.24 K - no password.xlsx" + ) + phase_1_worksheet = unitas_phase_1_workbook["ECO 4 - PHASE 1"] + phase_2_worksheet = unitas_phase_1_workbook["ECO4 - PHASE 2"] + phase1_colnames = [cell.value for cell in phase_1_worksheet[1]] + phase_1_rows_data = [] + for row in phase_1_worksheet.iter_rows(min_row=2, values_only=False): + row_data = [cell.value for cell in row] # This will get you the cell values + phase_1_rows_data.append(row_data) + + phase_1_surveys = pd.DataFrame(phase_1_rows_data, columns=phase1_colnames) + + # Correct phase 1 surveys in the same fashion as the previous approach + phase_1_surveys = DataLoader.correct_ha50_survey_list(phase_1_surveys.copy()) + + # We check all phase 1 surveys are contained in the data we had before + additional = [] + for _, row in tqdm(phase_1_surveys.iterrows(), total=len(phase_1_surveys)): + # We look for the entry in the old survey sheet: + # matched_uprn = unitas_survey_sheet[unitas_survey_sheet["EPR UPRN NUMBER"] == row["UPRN"]] + # if matched_uprn.shape[0] == 1: + # continue + + matched_1 = unitas_survey_sheet[ + (unitas_survey_sheet["Post Code"] == row["Post Code"]) & + (unitas_survey_sheet["NO."] == row["NO."]) + ] + + if matched_1.shape[0] == 1: + continue + + matched_2 = unitas_survey_sheet[ + (unitas_survey_sheet["Street / Block Name"] == row["Street / Block Name"]) & + (unitas_survey_sheet["NO."] == row["NO."]) + ] + + if matched_2.shape[0] == 1: + continue + + additional.append(row.to_dict()) + additional = pd.DataFrame(additional) + + phase_2_rows_data = [] + for row in phase_2_worksheet.iter_rows(min_row=2, values_only=False): + row_data = [cell.value for cell in row] # This will get you the cell values + phase_2_rows_data.append(row_data) + + phase2_colnames = [cell.value for cell in phase_2_worksheet[1]] + phase_2_surveys = pd.DataFrame(phase_2_rows_data, columns=phase2_colnames) + # Drop all of the occurances of "OFFICE USE ONLY" columns + phase_2_surveys = phase_2_surveys.drop(columns=[c for c in phase_2_surveys.columns if "OFFICE USE ONLY" in c]) + common_columns = list({c for c in phase_2_surveys.columns if c in additional.columns}) + additional_filtered = additional[common_columns] + + further_unitas_completed_surveys = pd.concat( + [phase_2_surveys, additional_filtered], + axis=0, + ignore_index=True + ) + + # Add a phase 2 key + further_unitas_completed_surveys["survey_list_row_id"] = [ + "unitas_phase_2" + str(i) for i in further_unitas_completed_surveys.index + ] + + not_in_asset_list = [ + "unitas_phase_20", "unitas_phase_234", "unitas_phase_2163", "unitas_phase_2173", "unitas_phase_2374" + ] + + additional_postcodes = ["st28bg"] + + full_asset_list = unitas_data["asset_list"].copy() + full_asset_list["matching_postcode"] = full_asset_list["matching_postcode"].str.lower().str.replace(" ", "") + further_unitas_completed_surveys["Post Code"] = further_unitas_completed_surveys["Post Code"].str.replace( + "ST 5DT", "ST3 5DT" + ) + + # We match these back to the asset list + matching_lookup = [] + for _, row in tqdm(further_unitas_completed_surveys.iterrows(), total=len(further_unitas_completed_surveys)): + + if row["survey_list_row_id"] in not_in_asset_list: + continue + + postcode_lower = row["Post Code"].lower().strip().replace(" ", "") + if postcode_lower in additional_postcodes: + continue + + # Confirmed not in asset lsit + # Filter asset list on postcode + df = full_asset_list[ + full_asset_list["matching_postcode"].str.contains(postcode_lower) + ] + + df = df[df["HouseNo"] == str(row["NO."])] + + if df.shape[0] != 1: + raise Exception("NOT FOUND") + + matching_lookup.append( + { + "survey_list_row_id": row["survey_list_row_id"], + "asset_list_row_id": df["asset_list_row_id"].values[0], + } + ) + + matching_lookup = pd.DataFrame(matching_lookup) + matching_lookup["phase_2_surveyed"] = True + + # We merge this onto the asset list and remove the rows + unitas_asset_list = unitas_asset_list.merge( + matching_lookup, how="left", on="asset_list_row_id" + ) + # Drop rows where phase_2_surveyed is populated + unitas_asset_list = unitas_asset_list[ + pd.isnull(unitas_asset_list["phase_2_surveyed"]) + ] + + # We add in the new CIGA submissions + unitas_round_2_ciga_workbook = openpyxl.load_workbook("local_data/ha_data/Unitas second round CIGA checks.xlsx") + ciga_round_2_worksheet = unitas_round_2_ciga_workbook["Worksheet"] + ciga_round_2_colnames = [cell.value for cell in ciga_round_2_worksheet[1]] + round_2_rows_data = [] + for row in ciga_round_2_worksheet.iter_rows(min_row=2, values_only=False): + row_data = [cell.value for cell in row] # This will get you the cell values + round_2_rows_data.append(row_data) + + ciga_round_2 = pd.DataFrame(round_2_rows_data, columns=ciga_round_2_colnames) + # We merge the ciga sheet to the asset list + ciga_dependent_asset_list = unitas_asset_list[ + unitas_asset_list["ECO Eligibility"].str.contains("subject to ciga") + ].copy() + + # We merge the ciga sheet to the asset list + ciga_round_2_matched = ciga_dependent_asset_list.merge( + ciga_round_2, how="inner", on=["Address Line 1", "Post Code"] + ) + # Filter on just the properties that had no guarantee + ciga_round_2_matched = ciga_round_2_matched[ciga_round_2_matched["Guarantee"] == "No"] + + # ECO Eligibility + # not eligible 9227 + # failed ciga 2711 + # eco4 (subject to ciga) 2238 + # eco4 - passed ciga 901 + # gbis 114 + # eco4 91 + + # We filter on the properties we're looking to re-survey + unitas_properties_to_survey = unitas_asset_list[ + unitas_asset_list["ECO Eligibility"].isin( + [ + "eco4 - passed ciga", + "eco4" + ] + ) + ].copy() + + unitas_properties_to_survey = pd.concat( + [ + unitas_properties_to_survey, + ciga_round_2_matched[unitas_properties_to_survey.columns] + ] + ) + + epc_api_key = "a2Nvbm5rb3dsZXNzYXJAZ21haWwuY29tOjY5MGJiMWM0NmIyOGI5ZDUxYzAxMzQzYzNiZGNlZGJjZDNmODQwMzA=" + + # We now retrieve the lastest EPC data + epc_data = [] + for _, unitas_property in tqdm(unitas_properties_to_survey.iterrows(), total=len(unitas_properties_to_survey)): + property_type, _ = get_property_type_and_built_form(property_meta=unitas_property, ha_name="HA50") + + full_address = unitas_property["matching_address"] + + searcher = SearchEpc( + address1=str(unitas_property["HouseNo"]), + postcode=unitas_property["matching_postcode"], + auth_token=epc_api_key, + os_api_key="", + property_type=property_type, + full_address=full_address, + fast=True + ) + # Force the skipping of estimating the EPC + searcher.ordnance_survey_client.property_type = None + searcher.ordnance_survey_client.built_form = None + + searcher.find_property(skip_os=True) + if searcher.newest_epc is None: + continue + + epc = { + "asset_list_row_id": unitas_property["asset_list_row_id"], + **searcher.newest_epc.copy() + } + + epc_data.append(epc) + + epc_df = pd.DataFrame(epc_data) + # Pull out just the columns we need + epc_df = epc_df[ + [ + "asset_list_row_id", + "address1", "postcode", + "current-energy-efficiency", + "current-energy-rating", + "inspection-date", + "transaction-type", + "built-form" + ] + ] + + epc_df["EPC Rating"] = ( + epc_df["current-energy-efficiency"].astype(str) + + epc_df["current-energy-rating"].astype(str) + ) + + # Merge onto the Unitas data: + unitas_properties_to_survey_full = unitas_properties_to_survey.merge( + epc_df[ + [ + "asset_list_row_id", + "EPC Rating", + "inspection-date", + "transaction-type", + "built-form" + ] + ], + how="left", + on="asset_list_row_id" + ) + + unitas_properties_to_survey_full["ECO Eligibility"] = unitas_properties_to_survey_full["ECO Eligibility"].replace( + "eco4 (subject to ciga)", "eco4 - passed ciga, phase 2 check" + ) + + for col in ["EPC Rating", "inspection-date", "transaction-type", "built-form"]: + unitas_properties_to_survey_full[col] = np.where( + pd.isnull(unitas_properties_to_survey_full[col]), + "No EPC found", + unitas_properties_to_survey_full[col] + ) + unitas_properties_to_survey_full[col] = unitas_properties_to_survey_full[col].fillna( + "No EPC found" + ) + unitas_properties_to_survey_full[col] = unitas_properties_to_survey_full[col].astype(str) + + unitas_properties_to_survey_full = unitas_properties_to_survey_full.rename( + columns={ + "inspection-date": "Last EPC Inspection Date", + "transaction-type": "Last EPC Reason", + "built-form": "Last EPC Built Form", + } + ) + + # We now match to the survey outcomes + unitas_survey_outcomes_workbook = openpyxl.load_workbook( + "local_data/ha_data/UNITAS - survey outcomes 26.03.2024.xlsx" + ) + unitas_survey_outcomes_worksheet = unitas_survey_outcomes_workbook["OUTCOMES"] + unitas_outcomes_colnames = [cell.value for cell in unitas_survey_outcomes_worksheet[2]] + outcomes_rows_data = [] + for row in unitas_survey_outcomes_worksheet.iter_rows(min_row=3, values_only=False): + row_data = [cell.value for cell in row] # This will get you the cell values + outcomes_rows_data.append(row_data) + + unitas_outcomes = pd.DataFrame(outcomes_rows_data, columns=unitas_outcomes_colnames) + unitas_outcomes = unitas_outcomes.rename( + columns={ + "Notes (If 'no answer' under outcomes, have you checked around the property for access " + "issues where possible?)": "Notes" + } + ) + + unitas_outcomes["Postcode"].unique() + eg1 = unitas_properties_to_survey_full[ + (unitas_properties_to_survey_full["Post Code"] == "ST6 6RF") + ] + eg1_outcomes = unitas_outcomes[ + (unitas_outcomes["Postcode"] == "ST6 6RF") + ] + + # Merge outcomes onto properties to survey. Will probably have to do algorithmically + full_asset_list["matching_postcode_nospace"] = full_asset_list["matching_postcode"].str.lower().str.replace(" ", "") + outcome_matching = [] + for _, outcome in tqdm(unitas_outcomes.iterrows(), total=len(unitas_outcomes)): + # We search for the corresponding entry in the asset list + postcode_lower = outcome["Postcode"].lower().strip().replace(" ", "") + + # Confirmed not in asset lsit + # Filter asset list on postcode + df = unitas_properties_to_survey_full[ + unitas_properties_to_survey_full["matching_postcode_nospace"].str.contains(postcode_lower) + ] + + df = df[df["HouseNo"] == str(outcome["No."])] + if df.empty: + continue + + if df.shape[0] == 1: + outcome_matching.append( + { + "asset_list_row_id": df["asset_list_row_id"].values[0], + **outcome.to_dict() + } + ) + continue + + raise Exception("something went wrong") + + # Store as an excel + unitas_properties_to_survey_full.to_excel("Unitas - phase 2 properties to Survey.xlsx") + + def app(): """ This app contains the housin association analysis for HAs 1, 6, 14, 39 and 107. @@ -6907,81 +7240,3 @@ def app(): december_figures["ECO4 remaining"] ) december_figures["ECO4 remaining"].sum() - - # Adhoc - for UNITAS, stripping out additional surveys that have been completed - unitas_data = loader.data["HA50"].copy() - unitas_asset_list = unitas_data["asset_list"].copy() - unitas_survey_sheet = unitas_data["survey_list"].copy() - # We remove the surveyed properties from the asset sheet - unitas_survey_sheet = unitas_survey_sheet[~pd.isnull(unitas_survey_sheet["asset_list_row_id"])] - unitas_asset_list = unitas_asset_list.merge( - unitas_survey_sheet[["asset_list_row_id", "installation_status"]], - how="left", - on="asset_list_row_id" - ) - unitas_asset_list = unitas_asset_list[pd.isnull(unitas_asset_list["installation_status"])] - unitas_asset_list = unitas_asset_list.drop(columns=["installation_status"]) - - # We read in the data for the further completed surveys - unitas_phase_1_workbook = openpyxl.load_workbook( - "local_data/ha_data/UNITAS ( STOKE) MASTER ROLLING SHEET UPDATED 8.4.24 K - no password.xlsx" - ) - phase_1_worksheet = unitas_phase_1_workbook["ECO 4 - PHASE 1"] - phase_2_worksheet = unitas_phase_1_workbook["ECO4 - PHASE 2"] - phase1_colnames = [cell.value for cell in phase_1_worksheet[1]] - phase_1_rows_data = [] - for row in phase_1_worksheet.iter_rows(min_row=2, values_only=False): - row_data = [cell.value for cell in row] # This will get you the cell values - phase_1_rows_data.append(row_data) - - phase_1_surveys = pd.DataFrame(phase_1_rows_data, columns=phase1_colnames) - - # Correct phase 1 surveys in the same fashion as the previous approach - phase_1_surveys = DataLoader.correct_ha50_survey_list(phase_1_surveys.copy()) - - # We check all phase 1 surveys are contained in the data we had before - additional = [] - for _, row in tqdm(phase_1_surveys.iterrows(), total=len(phase_1_surveys)): - # We look for the entry in the old survey sheet: - # matched_uprn = unitas_survey_sheet[unitas_survey_sheet["EPR UPRN NUMBER"] == row["UPRN"]] - # if matched_uprn.shape[0] == 1: - # continue - - matched_1 = unitas_survey_sheet[ - (unitas_survey_sheet["Post Code"] == row["Post Code"]) & - (unitas_survey_sheet["NO."] == row["NO."]) - ] - - if matched_1.shape[0] == 1: - continue - - matched_2 = unitas_survey_sheet[ - (unitas_survey_sheet["Street / Block Name"] == row["Street / Block Name"]) & - (unitas_survey_sheet["NO."] == row["NO."]) - ] - - if matched_2.shape[0] == 1: - continue - - additional.append(row.to_dict()) - additional = pd.DataFrame(additional) - - phase_2_rows_data = [] - for row in phase_2_worksheet.iter_rows(min_row=2, values_only=False): - row_data = [cell.value for cell in row] # This will get you the cell values - phase_2_rows_data.append(row_data) - - phase2_colnames = [cell.value for cell in phase_2_worksheet[1]] - phase_2_surveys = pd.DataFrame(phase_2_rows_data, columns=phase2_colnames) - # Drop all of the occurances of "OFFICE USE ONLY" columns - phase_2_surveys = phase_2_surveys.drop(columns=[c for c in phase_2_surveys.columns if "OFFICE USE ONLY" in c]) - common_columns = list({c for c in phase_2_surveys.columns if c in additional.columns}) - additional_filtered = additional[common_columns] - - further_unitas_completed_surveys = pd.concat( - [phase_2_surveys, additional_filtered], - axis=0, - ignore_index=True - ) - - # We match these back to the asset list From f0c4ca0143ee886ba84960b00e3f2700b6047429 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 10 Apr 2024 11:14:33 +0100 Subject: [PATCH 184/248] completed unitas --- .../ha_15_32/ha_analysis_batch_3.py | 46 ++++++++++++++++++- 1 file changed, 45 insertions(+), 1 deletion(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 35bb63fe..f99c7b1a 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -7106,9 +7106,53 @@ def unitas_data_prep(loader): continue raise Exception("something went wrong") + outcome_matching = pd.DataFrame(outcome_matching) + + # We can have duplicate matches, so we format the Date letter sent column and retrieve the newest outcome + outcome_matching["Date letters sent"] = outcome_matching["Date letters sent"].str.lower() + outcome_matching["Extracted Date"] = outcome_matching["Date letters sent"].str.extract( + r'(?:w[./]c )(\d{2}\.\d{2}\.\d{4})') + outcome_matching["Extracted Date"] = pd.to_datetime(outcome_matching["Extracted Date"], format='%d.%m.%Y') + # We sort by asset_list_row_id and extracted date, and retrieve the newest + outcome_matching = outcome_matching.sort_values(["asset_list_row_id", "Extracted Date"], ascending=[True, False]) + + # Some properties will have multiple outcomes - for these, we re-format + outcome_matching_grouped = [] + for asset_list_row_id, grouped_data in outcome_matching.groupby("asset_list_row_id"): + if grouped_data.shape[0] == 1: + outcome_matching_grouped.append( + { + "Number of previous visits": 1, + **grouped_data.to_dict("records")[0] + } + ) + continue + if grouped_data.shape[0] == 2: + newest_visit = grouped_data.head(1) + oldest_visit = grouped_data.tail(1)[['Outcomes', 'Surveyor', 'Notes', 'Date letters sent']].add_suffix( + " second visit") + to_append = { + "Number of previous visits": 2, + **newest_visit.to_dict("records")[0], + **oldest_visit.to_dict("records")[0] + } + outcome_matching_grouped.append(to_append) + else: + raise Exception("something went wrong") + + outcome_matching_grouped = pd.DataFrame(outcome_matching_grouped) + + unitas_properties_to_survey_with_outcomes = unitas_properties_to_survey_full.merge( + outcome_matching_grouped, how="left", on="asset_list_row_id" + ) + unitas_properties_to_survey_with_outcomes["Number of previous visits"] = ( + unitas_properties_to_survey_with_outcomes["Number of previous visits"].fillna(0) + ) # Store as an excel - unitas_properties_to_survey_full.to_excel("Unitas - phase 2 properties to Survey.xlsx") + unitas_properties_to_survey_with_outcomes.to_excel("Unitas - phase 2 properties to Survey.xlsx") + + unitas_properties_to_survey_with_outcomes["Last EPC Built Form"].value_counts() def app(): From cf7627a8d7fa06df445faf7637e06eefd7f8764b Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 10 Apr 2024 12:04:17 +0100 Subject: [PATCH 185/248] started setting up asset list and gathering council tax bands --- etl/customers/immo/pilot/asset_list.py | 44 ++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 etl/customers/immo/pilot/asset_list.py diff --git a/etl/customers/immo/pilot/asset_list.py b/etl/customers/immo/pilot/asset_list.py new file mode 100644 index 00000000..33f79729 --- /dev/null +++ b/etl/customers/immo/pilot/asset_list.py @@ -0,0 +1,44 @@ +import os + +import pandas as pd +from tqdm import tqdm + +from dotenv import load_dotenv +from utils.s3 import read_excel_from_s3 +from backend.SearchEpc import SearchEpc +from epc_api.client import EpcClient +from utils.s3 import save_csv_to_s3 + +# Read in the .env file in backend +load_dotenv(dotenv_path="backend/.env") +EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN") + +USER_ID = 8 +PORTFOLIO_ID = 70 + +council_tax_bands = [ + {'address': '8 Corporation Road', 'postcode': 'DY2 7PX', 'band': 'A'}, + {'address': '21 Wells Road', 'postcode': 'DY5 3TB', 'band': 'A'}, + {'address': '27 Milton Road', 'postcode': 'WV14 8HZ', 'band': 'A'}, + {'address': '195 Ashenhurst Road', 'postcode': 'DY1 2JB', 'band': 'A'}, + {'address': '53 Bromley', 'postcode': 'DY5 4PJ', 'band': 'A'}, + {'address': '91 Osprey Drive', 'postcode': 'DY1 2JS', 'band': 'B'}, + {'address': '47 Fairfield Road', 'postcode': 'DY8 5UJ', 'band': 'B'}, + {'address': '150 Huntingtree Road', 'postcode': 'B63 4HP', 'band': 'C'}, + {'address': '6 Beech Road', 'postcode': 'DY1 4BP', 'band': 'A'}, + {'address': '5 Oaklands', 'postcode': 'B62 0JA', 'band': 'A'}, +] + + +def app(): + raw_asset_list = read_excel_from_s3( + bucket_name="retrofit-datalake-dev", + file_key="customers/Immo/IMMO Sample Assets_Dudley.xlsx", + header_row=0 + ) + raw_asset_list = raw_asset_list.drop(columns=["Unnamed: 0"]) + # Extract address and postcode + raw_asset_list["address"] = raw_asset_list["Full Address"].str.split(",").str[0] + raw_asset_list["postcode"] = raw_asset_list["Full Address"].str.split(",").str[-1].str.strip() + + raw_asset_list[["address", "postcode"]].to_dict("records") From b791ecb054f0e5be39f91f78771f74ed80fe904d Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 10 Apr 2024 12:08:47 +0100 Subject: [PATCH 186/248] set up asset list --- etl/customers/immo/pilot/asset_list.py | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/etl/customers/immo/pilot/asset_list.py b/etl/customers/immo/pilot/asset_list.py index 33f79729..269ffe00 100644 --- a/etl/customers/immo/pilot/asset_list.py +++ b/etl/customers/immo/pilot/asset_list.py @@ -41,4 +41,23 @@ def app(): raw_asset_list["address"] = raw_asset_list["Full Address"].str.split(",").str[0] raw_asset_list["postcode"] = raw_asset_list["Full Address"].str.split(",").str[-1].str.strip() - raw_asset_list[["address", "postcode"]].to_dict("records") + council_tax_bands = pd.DataFrame(council_tax_bands) + asset_list = raw_asset_list.merge(council_tax_bands, how="left", on=["address", "postcode"]) + + # Store the data in s3 + filename = f"{USER_ID}/{PORTFOLIO_ID}/pilot.csv" + save_csv_to_s3( + dataframe=asset_list, + bucket_name="retrofit-plan-inputs-dev", + file_name=filename + ) + + body = { + "portfolio_id": str(PORTFOLIO_ID), + "housing_type": "Private", + "goal": "Increase EPC", + "goal_value": "A", + "trigger_file_path": filename, + "budget": None, + } + print(body) From 5079170a25066e4ed3ab96c7a5034f1ddce5ada2 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 10 Apr 2024 13:34:15 +0100 Subject: [PATCH 187/248] pulled valuations for immo pilot from Zoopla --- .idea/Model.iml | 2 +- .idea/misc.xml | 2 +- backend/app/plan/router.py | 10 ++++++++++ backend/ml_models/Valuation.py | 11 +++++++++++ 4 files changed, 23 insertions(+), 2 deletions(-) diff --git a/.idea/Model.iml b/.idea/Model.iml index b0f9c00d..4413bb06 100644 --- a/.idea/Model.iml +++ b/.idea/Model.iml @@ -7,7 +7,7 @@ - + diff --git a/.idea/misc.xml b/.idea/misc.xml index 1122b380..6f308057 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -3,7 +3,7 @@ - + diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index 50b8a837..c71533fa 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -142,6 +142,16 @@ async def trigger_plan(body: PlanTriggerRequest): ) ) + z = [] + for p in input_properties: + z.append( + { + "uprn": p.uprn, + "address": p.address, + "postcode": p.postcode, + } + ) + if not input_properties: return Response(status_code=204) diff --git a/backend/ml_models/Valuation.py b/backend/ml_models/Valuation.py index 2bb7de32..251c016a 100644 --- a/backend/ml_models/Valuation.py +++ b/backend/ml_models/Valuation.py @@ -52,6 +52,17 @@ class PropertyValuation: 10070056829: 76_000, 10070056920: 76_000, 10023345463: 76_000, + # IMMO Dudley Pilot - search by going to https://www.zoopla.co.uk/property/uprn/{uprn}/ + 90070461: 172_000, # Based on Zoopla + 90022227: 181_000, # Based on Zoopla + 90106884: 180_000, # Based on Zoopla + 90051858: 201_000, # Based on Zoopla + 90060989: 172_000, # Based on Zoopla + 90048026: 196_000, # Based on Zoopla + 90077535: 192_000, # Based on Zoopla + 90093693: 279_000, # Based on Zoopla + 90055152: 149_000, # Based on Zoopla + 90028499: 238_000, # Based on Zoopla } # We base our valuation uplifts on a number of sources From 5ac5cd7737a5b632258d130ea0e36057c25b0b6a Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 10 Apr 2024 14:02:19 +0100 Subject: [PATCH 188/248] fixing bug when setting phase for heating controls, without a recommendation --- backend/app/plan/router.py | 10 ---------- recommendations/HeatingRecommender.py | 7 ++++++- 2 files changed, 6 insertions(+), 11 deletions(-) diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index c71533fa..50b8a837 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -142,16 +142,6 @@ async def trigger_plan(body: PlanTriggerRequest): ) ) - z = [] - for p in input_properties: - z.append( - { - "uprn": p.uprn, - "address": p.address, - "postcode": p.postcode, - } - ) - if not input_properties: return Response(status_code=204) diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py index aec1f419..91730053 100644 --- a/recommendations/HeatingRecommender.py +++ b/recommendations/HeatingRecommender.py @@ -361,7 +361,12 @@ class HeatingRecommender: self.recommendations = combined_recommendations else: # We increment the recommendation phase, since the heating controls are separate from the boiler upgrade - recommendation_phase += 1 + # but we'll only upgrade if we have a heating recommendation + has_heating_recommendation = any( + recommendation["type"] == "heating" for recommendation in self.recommendations + ) + if has_heating_recommendation: + recommendation_phase += 1 # The heating controls recommendation is distrinct from the boiler upgrade recommendation # We insert phase into the recommendations for heating controls for recommendation in controls_recommender.recommendation: From 4e4199345511c2aa8e838581cebe9e7c307c1475 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 10 Apr 2024 17:20:49 +0100 Subject: [PATCH 189/248] savings --- etl/customers/immo/pilot/asset_list.py | 13 +------------ recommendations/optimiser/optimiser_functions.py | 6 +----- 2 files changed, 2 insertions(+), 17 deletions(-) diff --git a/etl/customers/immo/pilot/asset_list.py b/etl/customers/immo/pilot/asset_list.py index 269ffe00..7939a555 100644 --- a/etl/customers/immo/pilot/asset_list.py +++ b/etl/customers/immo/pilot/asset_list.py @@ -1,18 +1,7 @@ -import os - import pandas as pd -from tqdm import tqdm - -from dotenv import load_dotenv from utils.s3 import read_excel_from_s3 -from backend.SearchEpc import SearchEpc -from epc_api.client import EpcClient from utils.s3 import save_csv_to_s3 -# Read in the .env file in backend -load_dotenv(dotenv_path="backend/.env") -EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN") - USER_ID = 8 PORTFOLIO_ID = 70 @@ -28,6 +17,7 @@ council_tax_bands = [ {'address': '6 Beech Road', 'postcode': 'DY1 4BP', 'band': 'A'}, {'address': '5 Oaklands', 'postcode': 'B62 0JA', 'band': 'A'}, ] +council_tax_bands = pd.DataFrame(council_tax_bands) def app(): @@ -41,7 +31,6 @@ def app(): raw_asset_list["address"] = raw_asset_list["Full Address"].str.split(",").str[0] raw_asset_list["postcode"] = raw_asset_list["Full Address"].str.split(",").str[-1].str.strip() - council_tax_bands = pd.DataFrame(council_tax_bands) asset_list = raw_asset_list.merge(council_tax_bands, how="left", on=["address", "postcode"]) # Store the data in s3 diff --git a/recommendations/optimiser/optimiser_functions.py b/recommendations/optimiser/optimiser_functions.py index 27838d6e..9860c5ea 100644 --- a/recommendations/optimiser/optimiser_functions.py +++ b/recommendations/optimiser/optimiser_functions.py @@ -20,10 +20,6 @@ def prepare_input_measures(property_recommendations, goal, housing_type): if not goal_key: raise NotImplementedError("Not implemented this gain type - investigate me") - # We don't include suspended and solid floor insulation as possible measures in private housing, because - # of the need to decant the tenant - ignored_measures = ["suspended_floor_insulation", "solid_floor_insulation"] if housing_type == "Private" else [] - input_measures = [] for recs in property_recommendations: input_measures.append( @@ -34,7 +30,7 @@ def prepare_input_measures(property_recommendations, goal, housing_type): "gain": rec[goal_key], "type": rec["type"] } - for rec in recs if rec["type"] not in ignored_measures + for rec in recs ] ) From 346b798c192e4c071640123379c021373d965543 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 10 Apr 2024 17:26:00 +0100 Subject: [PATCH 190/248] removed whitespace --- backend/app/plan/router.py | 1 - 1 file changed, 1 deletion(-) diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index 50b8a837..bbf9261b 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -75,7 +75,6 @@ async def trigger_plan(body: PlanTriggerRequest): logger.info("Connecting to db") session = sessionmaker(bind=db_engine)() created_at = datetime.now().isoformat() - # TODO: We should store the trigger file path in the database with the plan so we can track the file that # triggered the plan From e0e60f8c9822aec63e1acb74bdb037a8a4840210 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 10 Apr 2024 17:26:23 +0100 Subject: [PATCH 191/248] added whitespace --- backend/app/plan/router.py | 1 + 1 file changed, 1 insertion(+) diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index bbf9261b..4b4d45e7 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -75,6 +75,7 @@ async def trigger_plan(body: PlanTriggerRequest): logger.info("Connecting to db") session = sessionmaker(bind=db_engine)() created_at = datetime.now().isoformat() + # TODO: We should store the trigger file path in the database with the plan so we can track the file that # triggered the plan From 505fe0736becf7ad649d24ff68bf902825239b02 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 10 Apr 2024 18:46:51 +0100 Subject: [PATCH 192/248] Updating optimiser to only optimise solar recommendations that include the battery --- backend/app/plan/router.py | 7 ++----- recommendations/SolarPvRecommendations.py | 3 ++- recommendations/optimiser/optimiser_functions.py | 12 +++++++----- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index 4b4d45e7..6f179c79 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -75,7 +75,7 @@ async def trigger_plan(body: PlanTriggerRequest): logger.info("Connecting to db") session = sessionmaker(bind=db_engine)() created_at = datetime.now().isoformat() - + # TODO: We should store the trigger file path in the database with the plan so we can track the file that # triggered the plan @@ -242,7 +242,7 @@ async def trigger_plan(body: PlanTriggerRequest): expected_adjusted_energy=expected_adjusted_energy ) - input_measures = prepare_input_measures(recommendations_with_impact, body.goal, body.housing_type) + input_measures = prepare_input_measures(recommendations_with_impact, body.goal) current_sap_points = int(property_instance.data["current-energy-efficiency"]) target_sap_points = epc_to_sap_lower_bound(body.goal_value) @@ -279,9 +279,6 @@ async def trigger_plan(body: PlanTriggerRequest): if ventilation_rec: selected_recommendations.add(ventilation_rec["recommendation_id"]) - # We check if the selected recommendation is wall ventilation and if so, we make sure - # mechanical ventilation is selected - # We'll use the set of selected recommendations to filter the recommendations to upload final_recommendations = [ [ diff --git a/recommendations/SolarPvRecommendations.py b/recommendations/SolarPvRecommendations.py index 4cf1c1fc..f75003ce 100644 --- a/recommendations/SolarPvRecommendations.py +++ b/recommendations/SolarPvRecommendations.py @@ -122,6 +122,7 @@ class SolarPvRecommendations: **cost_result, # This is required for simulating the SAP impact. solar_pv_percentage is between 0 & 1 so we scale # back up here - "photo_supply": 100 * roof_coverage + "photo_supply": 100 * roof_coverage, + "has_battery": has_battery } ) diff --git a/recommendations/optimiser/optimiser_functions.py b/recommendations/optimiser/optimiser_functions.py index 9860c5ea..6159b930 100644 --- a/recommendations/optimiser/optimiser_functions.py +++ b/recommendations/optimiser/optimiser_functions.py @@ -1,17 +1,13 @@ -def prepare_input_measures(property_recommendations, goal, housing_type): +def prepare_input_measures(property_recommendations, goal): """ Basic function to convert recommendations_to_upload to a format that is suitable for the optimiser - large :param property_recommendations: object containing the recommendations, created in the plan trigger api :param goal: goal to be optimised for, should be one of the keys in gain_map. E.g. if the gain is SAP points, the goal should reflect that desired gain - :param housing_type: type of housing the recommendations are for - should be one of "Social" or "Private" :return: Nested list of input measures """ - if housing_type not in ["Social", "Private"]: - raise ValueError("Invalid housing type - investigate me") - goal_map = { "Increase EPC": "sap_points" } @@ -22,6 +18,12 @@ def prepare_input_measures(property_recommendations, goal, housing_type): input_measures = [] for recs in property_recommendations: + if recs[0]["type"] == "solar_pv": + # if the recommendation is a solar recommendation without a battery, we exclude it from the optimisation. + # That will ensure that the optimiser only considers solar recommendations with batteries, so we don't + # under-report the potential cost + recs = [r for r in recs if recs["has_battery"]] + input_measures.append( [ { From f04b79d6800fce396fdbc5494b66f221d43a9826 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 10 Apr 2024 18:54:58 +0100 Subject: [PATCH 193/248] fixed bug with selecting batter solar recommendations --- recommendations/optimiser/optimiser_functions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/recommendations/optimiser/optimiser_functions.py b/recommendations/optimiser/optimiser_functions.py index 6159b930..d6353eea 100644 --- a/recommendations/optimiser/optimiser_functions.py +++ b/recommendations/optimiser/optimiser_functions.py @@ -22,7 +22,7 @@ def prepare_input_measures(property_recommendations, goal): # if the recommendation is a solar recommendation without a battery, we exclude it from the optimisation. # That will ensure that the optimiser only considers solar recommendations with batteries, so we don't # under-report the potential cost - recs = [r for r in recs if recs["has_battery"]] + recs = [r for r in recs if r["has_battery"]] input_measures.append( [ From 43af0de04732ba737459a1f04ccb50950287c235 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 11 Apr 2024 10:30:36 +0100 Subject: [PATCH 194/248] Updated condittions we recommend loft insulation, so it is not recommended if the home has more than 200mm insulation in place already --- recommendations/RoofRecommendations.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/recommendations/RoofRecommendations.py b/recommendations/RoofRecommendations.py index eb1c6c4f..8d6a91e7 100644 --- a/recommendations/RoofRecommendations.py +++ b/recommendations/RoofRecommendations.py @@ -20,8 +20,9 @@ class RoofRecommendations: DIMINISHING_RETURNS_U_VALUE = 0.14 - # It is recommended that lofts should have at least 270mm of insulation - MINIMUM_LOFT_ISULATION_MM = 270 + # It is recommended that lofts should have at least 270mm of insulation. If the property has more than 200mm of + # loft insulation in place already, we do not recommend anything for the moment + MINIMUM_LOFT_ISULATION_MM = 200 # Flat roof should have at least 100mm of insulation MINIMUM_FLAT_ROOF_ISULATION_MM = 100 @@ -71,7 +72,7 @@ class RoofRecommendations: # Building regulations part L recommend installing at least 270mm of insulation, however generally we # experience diminishing returns in terms of SAP once we go beyond around 150mm of insulation # This only holds true for pitched roofs. - if (insulation_thickness >= self.MINIMUM_LOFT_ISULATION_MM) and self.property.roof["is_pitched"]: + if (insulation_thickness > self.MINIMUM_LOFT_ISULATION_MM) and self.property.roof["is_pitched"]: return if (insulation_thickness >= self.MINIMUM_FLAT_ROOF_ISULATION_MM) and self.property.roof["is_flat"]: From db6fd58af4e89dcbdbecd436f2a9328ea6924521 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 11 Apr 2024 10:56:17 +0100 Subject: [PATCH 195/248] changing the logic we use to recommend a combi boiler --- backend/Property.py | 13 +++++++++- recommendations/HeatingRecommender.py | 36 ++++++++++++++++++--------- 2 files changed, 36 insertions(+), 13 deletions(-) diff --git a/backend/Property.py b/backend/Property.py index d3dd8395..6f2e648d 100644 --- a/backend/Property.py +++ b/backend/Property.py @@ -55,7 +55,13 @@ class Property: DATA_ANOMALY_MATCHES = DATA_ANOMALY_MATCHES - def __init__(self, id, postcode, address, epc_record): + # Surplus information, that can be provided as optional inputs, by a customer + n_bathrooms = None + n_bedrooms = None + + def __init__( + self, id, postcode, address, epc_record, **kwargs + ): self.epc_record = epc_record @@ -133,6 +139,11 @@ class Property: self.recommendations_scoring_data = [] + def parse_kwargs(self, kwargs): + # We extract the elements from kwargs that we recognise. Anything additional is ignored + self.n_bathrooms = kwargs.get("n_bathrooms", None) + self.n_bedrooms = kwargs.get("n_bedrooms", None) + def create_base_difference_epc_record(self, cleaned_lookup: dict): """ Creates a EPCDifferenceRecord object, which is used to store the difference between the current and diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py index 91730053..d4fe0a90 100644 --- a/recommendations/HeatingRecommender.py +++ b/recommendations/HeatingRecommender.py @@ -34,7 +34,6 @@ class HeatingRecommender: if has_electric_heating_description or no_heating_no_mains: # Recommend high heat retention storage heaters self.recommend_electric_storage_heaters(phase=phase, system_change=True, heating_controls_only=False) - return # if the property has mains heating with boiler and radiators, we recommend optimal heating controls has_boiler = self.property.main_heating["clean_description"] in ["Boiler and radiators, mains gas"] @@ -44,9 +43,16 @@ class HeatingRecommender: 'No system present, electric heaters assumed' ] and self.property.data["mains-gas-flag"] - if has_boiler or no_heating_has_mains: - self.recommend_boiler_upgrades(phase=phase, no_heating_has_mains=no_heating_has_mains) - return + # We also check if the property has electric heating, but it has access to the mains gas + electic_heating_has_mains = has_electric_heating_description and self.property.data["mains-gas-flag"] + + if has_boiler or no_heating_has_mains or electic_heating_has_mains: + # This indicates that the home previously did not have a boiler in place and so would require + # an overhaul to the system + system_change = not has_boiler + self.recommend_boiler_upgrades(phase=phase, system_change=system_change) + + return @staticmethod def check_simulation_difference(old_config, new_config): @@ -256,12 +262,14 @@ class HeatingRecommender: return closest_size - def recommend_boiler_upgrades(self, phase, no_heating_has_mains): + def recommend_boiler_upgrades(self, phase, system_change): """ This boiler recommendation will only recommend a like-for-like upgrade, since changing the system is generally more expensive :param phase: - :param no_heating_has_mains: indicaes if the property has no heating system, but has access to the mains gas + :param system_change: Indicates if the property would be undergoing a heating system change. This could be true + if the home didn't have a heating system in place, or if the home had electric heating + previously :return: """ @@ -279,17 +287,21 @@ class HeatingRecommender: num_heated_rooms=self.property.data["number-heated-rooms"], ) - # If heating and hot water come from the mains, we need a combi boiler, otherwise we need a regular boiler - hotwater_from_mains = self.property.hotwater["clean_description"] in ["From main system"] - - is_combi = hotwater_from_mains or no_heating_has_mains + # We recommend a combi boiler under the following conditions + # 1) If there are 4 or fewer rooms (we don't use heqted rooms because none of the rooms could be + # heated if there is no existing heating system). + # 2) There is more than 1 bathroom + is_combi = ( + (self.property.data["number-heated-rooms"] <= 4) or + (self.property.n_bathrooms not in [None, 0, 1]) + ) if is_combi: description = "Upgrade to a new combi boiler" else: - description = "Upgrade to a new boiler" + description = "Upgrade to a new gas condensing boiler" simulation_config = {"mainheat_energy_eff_ending": "Good"} - if no_heating_has_mains: + if system_change: # Installation of a boiler improves the hot water system so we need to reflect this in # the outcome of the recommendation heating_ending_config = MainHeatAttributes("Boiler and radiators, mains gas").process() From ac8cf271698788d4479626dae19f09a0027c79aa Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 11 Apr 2024 11:20:29 +0100 Subject: [PATCH 196/248] created extract kwargs to read bathrooms and bedrooms --- backend/Property.py | 22 ++++++++++++++++++++++ backend/app/plan/router.py | 1 + 2 files changed, 23 insertions(+) diff --git a/backend/Property.py b/backend/Property.py index 6f2e648d..5fe9716e 100644 --- a/backend/Property.py +++ b/backend/Property.py @@ -139,6 +139,28 @@ class Property: self.recommendations_scoring_data = [] + @classmethod + def extract_kwargs(cls, kwargs): + """ + This method is to be used in the router, to extract the kwargs from the request and prevent any errors such as + non-integer values, or inputs that clash with the __init__ method of this class + :param kwargs: + :return: + """ + n_bathrooms = kwargs.get("n_bathrooms", None) + if n_bathrooms is not None: + # We add on a small value to ensure that the number of bathrooms is rounded up, in case the value is 0.5 + n_bathrooms = int(round(n_bathrooms + 1e-5)) + + n_bedrooms = kwargs.get("n_bedrooms", None) + if n_bedrooms is not None: + n_bedrooms = int(round(n_bedrooms + 1e-5)) + + return { + "n_bathrooms": n_bathrooms, + "n_bedrooms": n_bedrooms, + } + def parse_kwargs(self, kwargs): # We extract the elements from kwargs that we recognise. Anything additional is ignored self.n_bathrooms = kwargs.get("n_bathrooms", None) diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index 6f179c79..7dc11bb9 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -139,6 +139,7 @@ async def trigger_plan(body: PlanTriggerRequest): address=epc_searcher.address_clean, postcode=epc_searcher.postcode_clean, epc_record=prepared_epc, + **Property.extract_kwargs(config) ) ) From 2aa2e5947e6d29acf5c82962788a18ad9daf3351 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 11 Apr 2024 11:36:14 +0100 Subject: [PATCH 197/248] adding bedrooms and bathrooms to asset list for immo --- .idea/Model.iml | 2 +- .idea/misc.xml | 2 +- etl/customers/immo/pilot/asset_list.py | 8 ++++++++ 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/.idea/Model.iml b/.idea/Model.iml index 4413bb06..b0f9c00d 100644 --- a/.idea/Model.iml +++ b/.idea/Model.iml @@ -7,7 +7,7 @@ - + diff --git a/.idea/misc.xml b/.idea/misc.xml index 6f308057..1122b380 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -3,7 +3,7 @@ - + diff --git a/etl/customers/immo/pilot/asset_list.py b/etl/customers/immo/pilot/asset_list.py index 7939a555..9756e00b 100644 --- a/etl/customers/immo/pilot/asset_list.py +++ b/etl/customers/immo/pilot/asset_list.py @@ -33,6 +33,14 @@ def app(): asset_list = raw_asset_list.merge(council_tax_bands, how="left", on=["address", "postcode"]) + # We're provided with number of bathrooms and number of bedrooms. + asset_list = asset_list.rename( + columns={ + "No. of Beds": "n_bedrooms", + "No. of WC's": "n_bathrooms" + } + ) + # Store the data in s3 filename = f"{USER_ID}/{PORTFOLIO_ID}/pilot.csv" save_csv_to_s3( From 606fd3a615e2188f78e2721aef9732e5d0d76328 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 11 Apr 2024 11:49:48 +0100 Subject: [PATCH 198/248] Adding parsing of kwargs to Property class --- .idea/Model.iml | 2 +- .idea/misc.xml | 2 +- backend/Property.py | 6 ++++-- backend/app/plan/router.py | 20 ++++++++++---------- 4 files changed, 16 insertions(+), 14 deletions(-) diff --git a/.idea/Model.iml b/.idea/Model.iml index b0f9c00d..4413bb06 100644 --- a/.idea/Model.iml +++ b/.idea/Model.iml @@ -7,7 +7,7 @@ - + diff --git a/.idea/misc.xml b/.idea/misc.xml index 1122b380..6f308057 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -3,7 +3,7 @@ - + diff --git a/backend/Property.py b/backend/Property.py index 5fe9716e..950c1ac9 100644 --- a/backend/Property.py +++ b/backend/Property.py @@ -139,6 +139,8 @@ class Property: self.recommendations_scoring_data = [] + self.parse_kwargs(kwargs) + @classmethod def extract_kwargs(cls, kwargs): """ @@ -150,11 +152,11 @@ class Property: n_bathrooms = kwargs.get("n_bathrooms", None) if n_bathrooms is not None: # We add on a small value to ensure that the number of bathrooms is rounded up, in case the value is 0.5 - n_bathrooms = int(round(n_bathrooms + 1e-5)) + n_bathrooms = int(round(float(n_bathrooms) + 1e-5)) n_bedrooms = kwargs.get("n_bedrooms", None) if n_bedrooms is not None: - n_bedrooms = int(round(n_bedrooms + 1e-5)) + n_bedrooms = int(round(float(n_bedrooms) + 1e-5)) return { "n_bathrooms": n_bathrooms, diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index 7dc11bb9..3cb2027d 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -109,16 +109,16 @@ async def trigger_plan(body: PlanTriggerRequest): session, body.portfolio_id, epc_searcher.address_clean, epc_searcher.postcode_clean, epc_searcher.uprn ) # if a new record was not created, we don't produduce recommendations - if not is_new: - continue - - create_property_targets( - session, - property_id=property_id, - portfolio_id=body.portfolio_id, - epc_target=body.goal_value, - heat_demand_target=None - ) + # if not is_new: + # continue + # + # create_property_targets( + # session, + # property_id=property_id, + # portfolio_id=body.portfolio_id, + # epc_target=body.goal_value, + # heat_demand_target=None + # ) epc_records = { 'original_epc': epc_searcher.newest_epc.copy(), From 69424149510c38f59d1d847cbcef740a287da23b Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 11 Apr 2024 15:40:50 +0100 Subject: [PATCH 199/248] Updating heating recommender to recommend heating controls, with the heating change --- backend/app/plan/router.py | 21 ++++++++++----------- recommendations/HeatingRecommender.py | 6 +++--- 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index 3cb2027d..4b91566e 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -108,17 +108,16 @@ async def trigger_plan(body: PlanTriggerRequest): property_id, is_new = create_property( session, body.portfolio_id, epc_searcher.address_clean, epc_searcher.postcode_clean, epc_searcher.uprn ) - # if a new record was not created, we don't produduce recommendations - # if not is_new: - # continue - # - # create_property_targets( - # session, - # property_id=property_id, - # portfolio_id=body.portfolio_id, - # epc_target=body.goal_value, - # heat_demand_target=None - # ) + if not is_new: + continue + + create_property_targets( + session, + property_id=property_id, + portfolio_id=body.portfolio_id, + epc_target=body.goal_value, + heat_demand_target=None + ) epc_records = { 'original_epc': epc_searcher.newest_epc.copy(), diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py index d4fe0a90..6e4b2230 100644 --- a/recommendations/HeatingRecommender.py +++ b/recommendations/HeatingRecommender.py @@ -48,7 +48,7 @@ class HeatingRecommender: if has_boiler or no_heating_has_mains or electic_heating_has_mains: # This indicates that the home previously did not have a boiler in place and so would require - # an overhaul to the system + # an overhaul to the system - right now, this is all reasons, apart from if there is an existing boiler system_change = not has_boiler self.recommend_boiler_upgrades(phase=phase, system_change=system_change) @@ -353,8 +353,8 @@ class HeatingRecommender: if not controls_recommender.recommendation: return - if no_heating_has_mains: - # We combine the heating and controls recommendations + if system_change: + # We combine the heating and controls recommendations, in the case of a system change boiler_recommendation = self.recommendations[0].copy() combined_recommendations = [] for controls_recommendation in controls_recommender.recommendation: From 014d51c0605e853351b621fbeafdf8ca3b870cbf Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 11 Apr 2024 16:09:31 +0100 Subject: [PATCH 200/248] fixing the case where we recommend a boiler and new heating controls, as well as an improved electrical system --- recommendations/HeatingRecommender.py | 36 +++++++++++++-------------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py index 6e4b2230..1813e5e8 100644 --- a/recommendations/HeatingRecommender.py +++ b/recommendations/HeatingRecommender.py @@ -278,6 +278,7 @@ class HeatingRecommender: # We now recommend boiler upgrades, if applicable simulation_config = {} boiler_costs = {} + boiler_recommendation = {} if self.property.data["mainheat-energy-eff"] in ["Very Poor", "Poor", "Average"]: boiler_size = self.estimate_boiler_size( property_type=self.property.data["property-type"], @@ -290,10 +291,12 @@ class HeatingRecommender: # We recommend a combi boiler under the following conditions # 1) If there are 4 or fewer rooms (we don't use heqted rooms because none of the rooms could be # heated if there is no existing heating system). - # 2) There is more than 1 bathroom + # 2) There 1 or fewer bathrooms + # Otherwise, we recommend a gas condensing boiler, which will server a larger property, that has multiple + # bathrooms is_combi = ( (self.property.data["number-heated-rooms"] <= 4) or - (self.property.n_bathrooms not in [None, 0, 1]) + (self.property.n_bathrooms in [None, 0, 1]) ) if is_combi: description = "Upgrade to a new combi boiler" @@ -328,21 +331,19 @@ class HeatingRecommender: boiler_costs = self.costs.low_carbon_boiler(is_combi=is_combi, size=f"{boiler_size}kw") - self.recommendations.append( - { - "phase": recommendation_phase, - "parts": [ - # TODO - ], - "type": "heating", - "description": description, - "starting_u_value": None, - "new_u_value": None, - "sap_points": None, - "simulation_config": simulation_config, - **boiler_costs - } - ) + boiler_recommendation = { + "phase": recommendation_phase, + "parts": [ + # TODO + ], + "type": "heating", + "description": description, + "starting_u_value": None, + "new_u_value": None, + "sap_points": None, + "simulation_config": simulation_config, + **boiler_costs + } # We recommend the heating controls # If the property did not previously have a boiler, we combine @@ -355,7 +356,6 @@ class HeatingRecommender: if system_change: # We combine the heating and controls recommendations, in the case of a system change - boiler_recommendation = self.recommendations[0].copy() combined_recommendations = [] for controls_recommendation in controls_recommender.recommendation: combined_recommendation = self.combine_heating_and_controls( From 88f43bcc822b4550540c88e7363d920937563072 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 11 Apr 2024 16:49:44 +0100 Subject: [PATCH 201/248] fixed the combi boiler logic --- recommendations/HeatingControlRecommender.py | 3 ++- recommendations/HeatingRecommender.py | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/recommendations/HeatingControlRecommender.py b/recommendations/HeatingControlRecommender.py index 95b5e3b1..76eaba4f 100644 --- a/recommendations/HeatingControlRecommender.py +++ b/recommendations/HeatingControlRecommender.py @@ -215,7 +215,8 @@ class HeatingControlRecommender: { "type": "heating_control", "parts": [], - "description": "Upgrade heating controls to Smart Thermostats, room sensors and smart radiator valves", + "description": "Upgrade heating controls to Smart Thermostats, room sensors and smart radiator valves " + "(time & temperature zone control)", **self.costs.time_and_temperature_zone_control( number_heated_rooms=int(self.property.data["number-heated-rooms"]) ), diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py index 1813e5e8..bd4d87a2 100644 --- a/recommendations/HeatingRecommender.py +++ b/recommendations/HeatingRecommender.py @@ -295,7 +295,7 @@ class HeatingRecommender: # Otherwise, we recommend a gas condensing boiler, which will server a larger property, that has multiple # bathrooms is_combi = ( - (self.property.data["number-heated-rooms"] <= 4) or + (self.property.data["number-heated-rooms"] <= 4) and (self.property.n_bathrooms in [None, 0, 1]) ) if is_combi: @@ -370,7 +370,7 @@ class HeatingRecommender: combined_recommendations.extend(combined_recommendation) # Overwrite the existing boiler recommendation - self.recommendations = combined_recommendations + self.recommendations.extend(combined_recommendations) else: # We increment the recommendation phase, since the heating controls are separate from the boiler upgrade # but we'll only upgrade if we have a heating recommendation From 61584a6320bfd50bb4f18266a09cc1bb1e4e2ba1 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 11 Apr 2024 18:14:38 +0100 Subject: [PATCH 202/248] extend recommendations to cover portable electric heaters --- recommendations/Costs.py | 18 ++++++++++++- recommendations/HeatingRecommender.py | 37 ++++++++++++++++++++++++--- 2 files changed, 50 insertions(+), 5 deletions(-) diff --git a/recommendations/Costs.py b/recommendations/Costs.py index e5ceb0c0..f4ac259b 100644 --- a/recommendations/Costs.py +++ b/recommendations/Costs.py @@ -79,6 +79,10 @@ CONVENTIONAL_BOILER_COSTS = { "40kw": 1776 } +# Assumes 3 hours to remove each heater (including re-decorating) +ROOM_HEATER_REMOVAL_COST = 120 +ROOM_HEATER_REMOVAL_LABOUR_HOURS = 3 + class Costs: """ @@ -1100,7 +1104,7 @@ class Costs: "labour_days": labour_days, } - def low_carbon_boiler(self, is_combi, size): + def boiler(self, is_combi, size, exising_room_heaters, n_heated_rooms): """ Based on a basic estimate of median value £2600 to install a low carbon combi boiler :return: @@ -1118,6 +1122,18 @@ class Costs: labour_cost = labour_rate * self.labour_adjustment_factor * labour_days # Add contingency and preliminaries labour_cost = labour_cost * (1 + self.CONTINGENCY + self.PRELIMINARIES) + + # if there are existing room heaters, we need to add the cost of removing them + if exising_room_heaters: + removal_cost = ROOM_HEATER_REMOVAL_COST * n_heated_rooms + removal_labour_hours = ROOM_HEATER_REMOVAL_LABOUR_HOURS * n_heated_rooms + else: + removal_cost = 0 + removal_labour_hours = 0 + + labour_cost = labour_cost + removal_cost + labour_days = labour_days + (removal_labour_hours / 8) + vat = labour_cost * self.VAT_RATE subtotal_before_vat = unit_cost + labour_cost diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py index bd4d87a2..14509eea 100644 --- a/recommendations/HeatingRecommender.py +++ b/recommendations/HeatingRecommender.py @@ -43,14 +43,36 @@ class HeatingRecommender: 'No system present, electric heaters assumed' ] and self.property.data["mains-gas-flag"] + has_gas_heaters = ( + self.property.main_heating["clean_description"] in ["Room heaters, mains gas"] and + self.property.data["mains-gas-flag"] + ) + # We also check if the property has electric heating, but it has access to the mains gas electic_heating_has_mains = has_electric_heating_description and self.property.data["mains-gas-flag"] - if has_boiler or no_heating_has_mains or electic_heating_has_mains: + portable_heaters_has_mains = ( + self.property.main_heating["clean_description"] in ["Portable electric heaters assumed for most rooms"] and + self.property.data["mains-gas-flag"] + ) + + if ( + has_boiler or + no_heating_has_mains or + electic_heating_has_mains or + has_gas_heaters or + portable_heaters_has_mains + ): # This indicates that the home previously did not have a boiler in place and so would require # an overhaul to the system - right now, this is all reasons, apart from if there is an existing boiler system_change = not has_boiler - self.recommend_boiler_upgrades(phase=phase, system_change=system_change) + exising_room_heaters = self.property.main_heating["clean_description"] in [ + "Room heaters, electric", "Room heaters, mains gas" + ] + + self.recommend_boiler_upgrades( + phase=phase, system_change=system_change, exising_room_heaters=exising_room_heaters + ) return @@ -262,7 +284,7 @@ class HeatingRecommender: return closest_size - def recommend_boiler_upgrades(self, phase, system_change): + def recommend_boiler_upgrades(self, phase, system_change, exising_room_heaters): """ This boiler recommendation will only recommend a like-for-like upgrade, since changing the system is generally more expensive @@ -270,6 +292,8 @@ class HeatingRecommender: :param system_change: Indicates if the property would be undergoing a heating system change. This could be true if the home didn't have a heating system in place, or if the home had electric heating previously + :param exising_room_heaters: Indicates if the property had room heaters previously - if so, a boiler + recommendation will need to be accompanied by removal of the room heaters :return: """ @@ -329,7 +353,12 @@ class HeatingRecommender: "hot_water_energy_eff_ending": "Good" } - boiler_costs = self.costs.low_carbon_boiler(is_combi=is_combi, size=f"{boiler_size}kw") + boiler_costs = self.costs.boiler( + is_combi=is_combi, + size=f"{boiler_size}kw", + exising_room_heaters=exising_room_heaters, + n_heated_rooms=self.property.data["number-heated-rooms"] + ) boiler_recommendation = { "phase": recommendation_phase, From 3ecd7a974276bb6f4296124c6acf7e55f280e574 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 11 Apr 2024 19:14:49 +0100 Subject: [PATCH 203/248] added simulation for secondary heating --- backend/Property.py | 6 ++- recommendations/Costs.py | 45 ++++++++++++++++------ recommendations/HeatingRecommender.py | 2 +- recommendations/Recommendations.py | 8 ++++ recommendations/SecondaryHeating.py | 55 +++++++++++++++++++++++++++ 5 files changed, 102 insertions(+), 14 deletions(-) create mode 100644 recommendations/SecondaryHeating.py diff --git a/backend/Property.py b/backend/Property.py index 950c1ac9..0f5e7e77 100644 --- a/backend/Property.py +++ b/backend/Property.py @@ -456,7 +456,9 @@ class Property: "double glazing installed during or after 2002" ) - if recommendation["type"] in ["heating", "hot_water_tank_insulation", "heating_control"]: + if recommendation["type"] in [ + "heating", "hot_water_tank_insulation", "heating_control", "secondary_heating" + ]: # We update the data, as defined in the recommendaton simulation_config = recommendation["simulation_config"] @@ -477,7 +479,7 @@ class Property: "loft_insulation", "room_roof_insulation", "flat_roof_insulation", "solid_floor_insulation", "suspended_floor_insulation", "exposed_floor_insulation", "windows_glazing", "solar_pv", "heating", "hot_water_tank_insulation", - "heating_control", + "heating_control", "secondary_heating" ]: raise NotImplementedError( "Implement me, given type %s" % recommendation["type"] diff --git a/recommendations/Costs.py b/recommendations/Costs.py index f4ac259b..45c17102 100644 --- a/recommendations/Costs.py +++ b/recommendations/Costs.py @@ -1104,6 +1104,28 @@ class Costs: "labour_days": labour_days, } + def heater_removal(self, n_rooms): + """ + Estimates the costs of removal of heaters, including the redecoration costs of the space behind the heater + :return: + """ + + removal_cost = ROOM_HEATER_REMOVAL_COST * n_rooms + removal_labour_hours = ROOM_HEATER_REMOVAL_LABOUR_HOURS * n_rooms + + vat = removal_cost * self.VAT_RATE + + subtotal_before_vat = removal_cost + total_cost = subtotal_before_vat + vat + + return { + "total": total_cost, + "subtotal": subtotal_before_vat, + "vat": vat, + "labour_hours": removal_labour_hours, + "labour_days": np.ceil(removal_labour_hours / 8), + } + def boiler(self, is_combi, size, exising_room_heaters, n_heated_rooms): """ Based on a basic estimate of median value £2600 to install a low carbon combi boiler @@ -1114,6 +1136,7 @@ class Costs: # The unit cost is the cost without VAT # We now need to estimate the cost of the works labour_days = 2 + labour_hours = labour_days * 8 labour_rate = 500 # Average cost of installation is 1 (maybe 2days) at £300 per day @@ -1123,26 +1146,26 @@ class Costs: # Add contingency and preliminaries labour_cost = labour_cost * (1 + self.CONTINGENCY + self.PRELIMINARIES) - # if there are existing room heaters, we need to add the cost of removing them - if exising_room_heaters: - removal_cost = ROOM_HEATER_REMOVAL_COST * n_heated_rooms - removal_labour_hours = ROOM_HEATER_REMOVAL_LABOUR_HOURS * n_heated_rooms - else: - removal_cost = 0 - removal_labour_hours = 0 - - labour_cost = labour_cost + removal_cost - labour_days = labour_days + (removal_labour_hours / 8) + # labour_days = labour_days + (removal_labour_hours / 8) vat = labour_cost * self.VAT_RATE subtotal_before_vat = unit_cost + labour_cost total_cost = subtotal_before_vat + vat + # if there are existing room heaters, we need to add the cost of removing them + if exising_room_heaters: + removal_costing = self.heater_removal(n_rooms=n_heated_rooms) + # Add the totals to the existing totals + total_cost += removal_costing["total"] + subtotal_before_vat += removal_costing["subtotal"] + labour_hours += removal_costing["labour_hours"] + labour_days += removal_costing["labour_days"] + return { "total": total_cost, "subtotal": subtotal_before_vat, "vat": vat, - "labour_hours": labour_days * 8, + "labour_hours": labour_hours, "labour_days": labour_days, } diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py index 14509eea..92457a27 100644 --- a/recommendations/HeatingRecommender.py +++ b/recommendations/HeatingRecommender.py @@ -319,7 +319,7 @@ class HeatingRecommender: # Otherwise, we recommend a gas condensing boiler, which will server a larger property, that has multiple # bathrooms is_combi = ( - (self.property.data["number-heated-rooms"] <= 4) and + (self.property.number_of_rooms <= 4) and (self.property.n_bathrooms in [None, 0, 1]) ) if is_combi: diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py index 902023dc..68fead16 100644 --- a/recommendations/Recommendations.py +++ b/recommendations/Recommendations.py @@ -11,6 +11,7 @@ from recommendations.SolarPvRecommendations import SolarPvRecommendations from recommendations.WindowsRecommendations import WindowsRecommendations from recommendations.HeatingRecommender import HeatingRecommender from recommendations.HotwaterRecommendations import HotwaterRecommendations +from recommendations.SecondaryHeating import SecondaryHeating from backend.ml_models.AnnualBillSavings import AnnualBillSavings @@ -46,6 +47,7 @@ class Recommendations: self.solar_recommender = SolarPvRecommendations(property_instance=property_instance) self.heating_recommender = HeatingRecommender(property_instance=property_instance) self.hotwater_recommender = HotwaterRecommendations(property_instance=property_instance) + self.secondary_heating_recommender = SecondaryHeating(property_instance=property_instance) def recommend(self): @@ -130,6 +132,12 @@ class Recommendations: property_recommendations.append(self.lighting_recommender.recommendation) phase += 1 + if "secondary_heating" not in self.exclusions: + self.secondary_heating_recommender.recommend(phase=phase) + if self.secondary_heating_recommender.recommendation: + property_recommendations.append(self.secondary_heating_recommender.recommendation) + phase += 1 + # Renewables if "solar_pv" not in self.exclusions: self.solar_recommender.recommend(phase=phase) diff --git a/recommendations/SecondaryHeating.py b/recommendations/SecondaryHeating.py new file mode 100644 index 00000000..f31c4c05 --- /dev/null +++ b/recommendations/SecondaryHeating.py @@ -0,0 +1,55 @@ +from recommendations.Costs import Costs +from backend.Property import Property + + +class SecondaryHeating: + """ + This class recommends the removal of the secondary heating system for properties that have a primary heating + system. + """ + + # The list of existing heating systems that are accepted + ACCEPTED_MAINHEAT_DESCRIPTIONS = ["Boiler and radiators, mains gas"] + ACCEPTED_SECONDHEAT_DESCRIPTIONS = ["Room heaters, electric"] + # These are the heaters where works are required to remove them + FIXED_HEATER_DESCRIPTIONS = ["Room heaters, electric"] + + def __init__(self, property_instance: Property): + self.property = property_instance + self.costs = Costs(self.property) + + self.recommendation = [] + + def recommend(self, phase: int): + # Reset + self.recommendation = [] + + if self.property.main_heating["clean_description"] not in self.ACCEPTED_MAINHEAT_DESCRIPTIONS: + return + + # TODO: We need to clean secondary data + if self.property.data['secondheat-description'] not in self.ACCEPTED_SECONDHEAT_DESCRIPTIONS: + return + + if self.property.data['secondheat-description'] in self.FIXED_HEATER_DESCRIPTIONS: + # We have an associated cost otherwise, there is no cost + n_rooms = self.property.data['number-heated-rooms'] + else: + n_rooms = 0 + + costs = self.costs.heater_removal(n_rooms=n_rooms) + self.recommendation.append( + { + "phase": phase, + "parts": [], + "type": "secondary_heating", + "description": "Remove the secondary heating system", + "starting_u_value": None, + "new_u_value": None, + "sap_points": None, + **costs, + "simulation_config": { + "secondheat_description_ending": "None" + } + } + ) From 0b75ec9210e7c7c097bf4e6b5d2d87cb273af6cd Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 12 Apr 2024 14:41:19 +0100 Subject: [PATCH 204/248] Added patches and overrides to immo asset list --- .idea/Model.iml | 2 +- .idea/misc.xml | 2 +- .../AirSourceHeatPumpEfficiency.py | 78 +++++++++++++++++++ etl/air_source_heat_pump/app.py | 24 ++++++ etl/customers/immo/pilot/asset_list.py | 70 ++++++++++++++++- 5 files changed, 172 insertions(+), 4 deletions(-) create mode 100644 etl/air_source_heat_pump/AirSourceHeatPumpEfficiency.py create mode 100644 etl/air_source_heat_pump/app.py diff --git a/.idea/Model.iml b/.idea/Model.iml index 4413bb06..b0f9c00d 100644 --- a/.idea/Model.iml +++ b/.idea/Model.iml @@ -7,7 +7,7 @@ - + diff --git a/.idea/misc.xml b/.idea/misc.xml index 6f308057..1122b380 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -3,7 +3,7 @@ - + diff --git a/etl/air_source_heat_pump/AirSourceHeatPumpEfficiency.py b/etl/air_source_heat_pump/AirSourceHeatPumpEfficiency.py new file mode 100644 index 00000000..2ba82e77 --- /dev/null +++ b/etl/air_source_heat_pump/AirSourceHeatPumpEfficiency.py @@ -0,0 +1,78 @@ +import pandas as pd +from tqdm import tqdm +from utils.s3 import save_dataframe_to_s3_parquet, read_dataframe_from_s3_parquet +from utils.logger import setup_logger +from etl.epc.settings import EARLIEST_EPC_DATE + +logger = setup_logger() + + +class AirSourceHeatPumpEfficiency: + + def __init__(self, file_directories, cleaned_lookup): + """ + :param file_directories: A list of directories where files are stored. + :param cleaned_lookup: A dictionary containing cleaned lookup data. + """ + self.file_directories = file_directories + self.cleaned_lookup = cleaned_lookup + + self.results = [] + + def create_dataset(self): + logger.info("Creating solar photo supply dataset") + for dir in tqdm(self.file_directories): + filepath = dir / "certificates.csv" + df = pd.read_csv(filepath, low_memory=False) + df = df[~pd.isnull(df["UPRN"])] + df["UPRN"] = df["UPRN"].astype(int).astype(str) + # Take entries after SAP12 + df["LODGEMENT_DATE"] = pd.to_datetime(df["LODGEMENT_DATE"]) + df = df[df["LODGEMENT_DATE"] > EARLIEST_EPC_DATE] + + df = df[ + ~df["TENURE"].isin( + [ + "unknown", + "Not defined - use in the case of a new dwelling for which the intended tenure in not known. " + "It is not to be used for an existing dwelling" + ] + ) + ] + + # Take entries that contain an air source heat pump + df = df[ + df["MAINHEAT_DESCRIPTION"].str.contains("air source heat pump", case=False, na=False) + ] + # Get the columns we're interested in + df = df[ + [ + "MAINHEAT_DESCRIPTION", + "MAINHEAT_ENERGY_EFF", + "MAINHEATCONT_DESCRIPTION", + "MAINHEATC_ENERGY_EFF", + "MAIN_FUEL", + "HOTWATER_DESCRIPTION", + "HOT_WATER_ENERGY_EFF", + "MAINS_GAS_FLAG" + ] + ] + + counts = df.groupby( + [ + "MAINHEAT_DESCRIPTION", + "MAINHEAT_ENERGY_EFF", + "MAINHEATCONT_DESCRIPTION", + "MAINHEATC_ENERGY_EFF", + "MAIN_FUEL", + "HOTWATER_DESCRIPTION", + "HOT_WATER_ENERGY_EFF", + "MAINS_GAS_FLAG" + ] + ).size().reset_index(name="count") + + # Drop rows that have a missing PROPERTY_TYPE, BUILT_FORM, CONSTRUCTION_AGE_BAND, TOTAL_FLOOR_AREA + for col in ["PROPERTY_TYPE", "BUILT_FORM", "CONSTRUCTION_AGE_BAND", "TOTAL_FLOOR_AREA"]: + df = df[~pd.isnull(df[col])] + # Take newest LODGEMENT_DATE per UPRN + df = df.sort_values(by="LODGEMENT_DATE", ascending=False).drop_duplicates(subset=["UPRN"]) diff --git a/etl/air_source_heat_pump/app.py b/etl/air_source_heat_pump/app.py new file mode 100644 index 00000000..ac87b34b --- /dev/null +++ b/etl/air_source_heat_pump/app.py @@ -0,0 +1,24 @@ +from pathlib import Path +from backend.app.plan.utils import get_cleaned +from etl.air_source_heat_pump.AirSourceHeatPumpEfficiency import AirSourceHeatPumpEfficiency + +DATA_DIRECTORY = Path(__file__).parent / "local_data" / "all-domestic-certificates" + + +def app(): + """ + This code reads in the EPC dataset and looks at the efficiency values for heating systems that inclue air source + heat pumps. This dataset is then used to inform the recommendations for the air source heat pump, so we know + how to set the simulation + :return: + """ + + directories = [entry for entry in DATA_DIRECTORY.iterdir() if entry.is_dir()] + cleaned_lookup = get_cleaned() + + ashp_data_client = AirSourceHeatPumpEfficiency( + file_directories=directories, + cleaned_lookup=cleaned_lookup + ) + + ashp_data_client.create_dataset() diff --git a/etl/customers/immo/pilot/asset_list.py b/etl/customers/immo/pilot/asset_list.py index 9756e00b..0da8f885 100644 --- a/etl/customers/immo/pilot/asset_list.py +++ b/etl/customers/immo/pilot/asset_list.py @@ -19,6 +19,40 @@ council_tax_bands = [ ] council_tax_bands = pd.DataFrame(council_tax_bands) +# This is information we need to override on the EPC itself, for instance if a new survey has been conducted and +# that has not reached the API +patches = [ + { + 'address': '6 Beech Road', 'postcode': 'DY1 4BP', + 'walls-description': 'Mixed: Filled cavity and external insulated solid brick', + 'walls-energy-eff': 'Good', + 'roof-description': 'Pitched, 12 mm loft insulation', + 'roof-energy-eff': 'Very Poor', + 'windows-description': 'Fully double glazed', + 'windows-energy-eff': 'Good', + 'mainheat-description': 'Room heaters, electric', + 'mainheat-energy-eff': 'Very Poor', + 'mainheatcont-description': 'Appliance thermostats', + 'mainheatc-energy-eff': 'Good', + 'lighting-description': 'Low energy lighting in 25% of fixed outlets', + 'lighting-energy-eff': 'Good', + 'floor-description': 'Mixed: Solid no insulation and suspended no insulation', + 'secondheat-description': 'None', + 'current-energy-efficiency': '32', + } +] + +# This is information that is found as a result of the non-invasives, that mean that certain measures +# have been installed already. To reflect this in the front end, it is included in the recommendation, however +# the cost is removed and instead, a message is presented saying that the measure is already installed. +overrides = [ + { + 'address': '5 Oaklands', + 'postcode': 'B62 0JA', + "overrides": ["windows_glazing"] + } +] + def app(): raw_asset_list = read_excel_from_s3( @@ -41,7 +75,7 @@ def app(): } ) - # Store the data in s3 + # Store the asset list in s3 filename = f"{USER_ID}/{PORTFOLIO_ID}/pilot.csv" save_csv_to_s3( dataframe=asset_list, @@ -49,12 +83,44 @@ def app(): file_name=filename ) + # Store overrides in s3 + overrides_filename = f"{USER_ID}/{PORTFOLIO_ID}/overrides.json" + save_csv_to_s3( + dataframe=pd.DataFrame(overrides), + bucket_name="retrofit-plan-inputs-dev", + file_name=overrides_filename + ) + + # Store patches in s3 + patches_filename = f"{USER_ID}/{PORTFOLIO_ID}/patches.json" + save_csv_to_s3( + dataframe=pd.DataFrame(patches), + bucket_name="retrofit-plan-inputs-dev", + file_name=patches_filename + ) + + # EPC C portoflio body = { "portfolio_id": str(PORTFOLIO_ID), "housing_type": "Private", "goal": "Increase EPC", - "goal_value": "A", + "goal_value": "C", "trigger_file_path": filename, + "overrides_file_path": overrides_filename, + "patches_file_path": patches_filename, + "budget": None, + } + print(body) + + # EPC B portoflio + body = { + "portfolio_id": str(PORTFOLIO_ID + 1), + "housing_type": "Private", + "goal": "Increase EPC", + "goal_value": "B", + "trigger_file_path": filename, + "overrides_file_path": overrides_filename, + "patches_file_path": patches_filename, "budget": None, } print(body) From ab180f65225507c6d666516fd70259a7c0ec4ac5 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 12 Apr 2024 15:06:12 +0100 Subject: [PATCH 205/248] Added overrides and patches to router --- .idea/Model.iml | 2 +- .idea/misc.xml | 2 +- backend/Property.py | 6 ++++- backend/app/plan/router.py | 34 ++++++++++++++++++-------- backend/app/plan/schemas.py | 2 ++ etl/customers/immo/pilot/asset_list.py | 4 +-- 6 files changed, 35 insertions(+), 15 deletions(-) diff --git a/.idea/Model.iml b/.idea/Model.iml index b0f9c00d..4413bb06 100644 --- a/.idea/Model.iml +++ b/.idea/Model.iml @@ -7,7 +7,7 @@ - + diff --git a/.idea/misc.xml b/.idea/misc.xml index 1122b380..6f308057 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -3,7 +3,7 @@ - + diff --git a/backend/Property.py b/backend/Property.py index 0f5e7e77..882e450c 100644 --- a/backend/Property.py +++ b/backend/Property.py @@ -60,7 +60,7 @@ class Property: n_bedrooms = None def __init__( - self, id, postcode, address, epc_record, **kwargs + self, id, postcode, address, epc_record, overrides=None, **kwargs ): self.epc_record = epc_record @@ -74,6 +74,10 @@ class Property: } self.old_data = epc_record.get("old_data") self.property_dimensions = None + # This is a list of measures that have already been installed in the property, typically found as a result + # of the non-invasive surveys. We reflect that this has been installed in the recommendations, but remove the + # cost and instead, provide a message that the measure has already been installed + self.overrides = overrides self.uprn = epc_record.get("uprn") self.full_sap_epc = epc_record.get("full_sap_epc") diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index 4b91566e..8d39c97f 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -44,20 +44,15 @@ BATCH_SIZE = 5 SCORING_BATCH_SIZE = 400 -def patch_epc(config, epc_records): +def patch_epc(patch, epc_records): """ This utility function is useful to patch the epc data if we have data from the customer :return: """ - number_habitable_rooms = config.get("number-habitable-rooms", None) - number_heated_rooms = config.get("number-heated-rooms", None) - - if number_habitable_rooms is not None: - epc_records["original_epc"]["number-habitable-rooms"] = int(number_habitable_rooms) - - if number_heated_rooms is not None: - epc_records["original_epc"]["number-heated-rooms"] = int(number_heated_rooms) + for patch_variable, patch_value in patch.items(): + if patch_variable in epc_records["original_epc"]: + epc_records["original_epc"][patch_variable] = patch_value return epc_records @@ -85,6 +80,17 @@ async def trigger_plan(body: PlanTriggerRequest): session.begin() logger.info("Getting the inputs") plan_input = read_csv_from_s3(bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.trigger_file_path) + # If we have patches or overrides, we should read them in here + patches = [] + if body.patches_file_path: + patches = read_csv_from_s3(bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.patches_file_path) + + overrides = [] + if body.overrides_file_path: + overrides = read_csv_from_s3( + bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.overrides_file_path + ) + cleaning_data = read_dataframe_from_s3_parquet( bucket_name=get_settings().DATA_BUCKET, file_key="sap_change_model/cleaning_dataset.parquet", ) @@ -124,7 +130,11 @@ async def trigger_plan(body: PlanTriggerRequest): 'full_sap_epc': epc_searcher.full_sap_epc.copy(), 'old_data': epc_searcher.older_epcs.copy(), } - epc_records = patch_epc(config, epc_records) + + patch = next(( + x for x in patches if (x["address"] == config["address"]) and (x["postcode"] == config["postcode"]) + ), None) + epc_records = patch_epc(patch, epc_records) prepared_epc = EPCRecord( epc_records=epc_records, @@ -132,12 +142,16 @@ async def trigger_plan(body: PlanTriggerRequest): cleaning_data=cleaning_data ) + overrides = next(( + x for x in overrides if (x["address"] == config["address"]) and (x["postcode"] == config["postcode"]) + ), None) input_properties.append( Property( id=property_id, address=epc_searcher.address_clean, postcode=epc_searcher.postcode_clean, epc_record=prepared_epc, + overrides=overrides, **Property.extract_kwargs(config) ) ) diff --git a/backend/app/plan/schemas.py b/backend/app/plan/schemas.py index b8a99704..ec49e41e 100644 --- a/backend/app/plan/schemas.py +++ b/backend/app/plan/schemas.py @@ -9,6 +9,8 @@ class PlanTriggerRequest(BaseModel): goal_value: str portfolio_id: int trigger_file_path: str + overrides_file_path: Optional[str] = None + patches_file_path: Optional[str] = None exclusions: Optional[conlist(str, min_items=1)] = None # Pre-defined list of possibilities for exclusions diff --git a/etl/customers/immo/pilot/asset_list.py b/etl/customers/immo/pilot/asset_list.py index 0da8f885..15681d42 100644 --- a/etl/customers/immo/pilot/asset_list.py +++ b/etl/customers/immo/pilot/asset_list.py @@ -24,7 +24,7 @@ council_tax_bands = pd.DataFrame(council_tax_bands) patches = [ { 'address': '6 Beech Road', 'postcode': 'DY1 4BP', - 'walls-description': 'Mixed: Filled cavity and external insulated solid brick', + 'walls-description': 'Cavity wall, filled cavity', 'walls-energy-eff': 'Good', 'roof-description': 'Pitched, 12 mm loft insulation', 'roof-energy-eff': 'Very Poor', @@ -36,7 +36,7 @@ patches = [ 'mainheatc-energy-eff': 'Good', 'lighting-description': 'Low energy lighting in 25% of fixed outlets', 'lighting-energy-eff': 'Good', - 'floor-description': 'Mixed: Solid no insulation and suspended no insulation', + 'floor-description': 'Solid, no insulation (assumed)', 'secondheat-description': 'None', 'current-energy-efficiency': '32', } From 8e2d823693f53ad47a4fe857fd8f24d84c0c4ec1 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 12 Apr 2024 15:11:51 +0100 Subject: [PATCH 206/248] corrected parsing of overrides --- backend/Property.py | 4 ++-- backend/app/plan/router.py | 8 ++++---- etl/customers/immo/pilot/asset_list.py | 3 +++ 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/backend/Property.py b/backend/Property.py index 882e450c..3fac3667 100644 --- a/backend/Property.py +++ b/backend/Property.py @@ -60,7 +60,7 @@ class Property: n_bedrooms = None def __init__( - self, id, postcode, address, epc_record, overrides=None, **kwargs + self, id, postcode, address, epc_record, override=None, **kwargs ): self.epc_record = epc_record @@ -77,7 +77,7 @@ class Property: # This is a list of measures that have already been installed in the property, typically found as a result # of the non-invasive surveys. We reflect that this has been installed in the recommendations, but remove the # cost and instead, provide a message that the measure has already been installed - self.overrides = overrides + self.override = override self.uprn = epc_record.get("uprn") self.full_sap_epc = epc_record.get("full_sap_epc") diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index 8d39c97f..08ce0dcc 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -133,7 +133,7 @@ async def trigger_plan(body: PlanTriggerRequest): patch = next(( x for x in patches if (x["address"] == config["address"]) and (x["postcode"] == config["postcode"]) - ), None) + ), {}) epc_records = patch_epc(patch, epc_records) prepared_epc = EPCRecord( @@ -142,16 +142,16 @@ async def trigger_plan(body: PlanTriggerRequest): cleaning_data=cleaning_data ) - overrides = next(( + override = next(( x for x in overrides if (x["address"] == config["address"]) and (x["postcode"] == config["postcode"]) - ), None) + ), {}) input_properties.append( Property( id=property_id, address=epc_searcher.address_clean, postcode=epc_searcher.postcode_clean, epc_record=prepared_epc, - overrides=overrides, + override=override, **Property.extract_kwargs(config) ) ) diff --git a/etl/customers/immo/pilot/asset_list.py b/etl/customers/immo/pilot/asset_list.py index 15681d42..07ebe884 100644 --- a/etl/customers/immo/pilot/asset_list.py +++ b/etl/customers/immo/pilot/asset_list.py @@ -39,6 +39,9 @@ patches = [ 'floor-description': 'Solid, no insulation (assumed)', 'secondheat-description': 'None', 'current-energy-efficiency': '32', + 'energy-consumption-current': '491', + 'co2-emissions-current': '5.0', + 'potential-energy-efficiency': '87' } ] From 0ede95cc4a7499ad0db1c6eda5ef6e012ab9f763 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 12 Apr 2024 15:25:08 +0100 Subject: [PATCH 207/248] added override to wall insulation --- backend/Property.py | 4 +++- recommendations/WallRecommendations.py | 15 ++++++++++++++- recommendations/recommendation_utils.py | 12 ++++++++++++ 3 files changed, 29 insertions(+), 2 deletions(-) diff --git a/backend/Property.py b/backend/Property.py index 3fac3667..d000be28 100644 --- a/backend/Property.py +++ b/backend/Property.py @@ -1,4 +1,5 @@ import os +import ast from itertools import groupby import pandas as pd @@ -77,7 +78,8 @@ class Property: # This is a list of measures that have already been installed in the property, typically found as a result # of the non-invasive surveys. We reflect that this has been installed in the recommendations, but remove the # cost and instead, provide a message that the measure has already been installed - self.override = override + + self.override = ast.literal_eval(override['overrides']) if override is not None else [] self.uprn = epc_record.get("uprn") self.full_sap_epc = epc_record.get("full_sap_epc") diff --git a/recommendations/WallRecommendations.py b/recommendations/WallRecommendations.py index 6b59c148..3acc17f0 100644 --- a/recommendations/WallRecommendations.py +++ b/recommendations/WallRecommendations.py @@ -8,7 +8,7 @@ from backend.Property import Property from BaseUtility import Definitions from recommendations.recommendation_utils import ( r_value_per_mm_to_u_value, calculate_u_value_uplift, is_diminishing_returns, update_lowest_selected_u_value, - get_recommended_part, get_wall_u_value + get_recommended_part, get_wall_u_value, override_costs ) from recommendations.config import PARTIALLY_FILLED_PERCENTAGE_ASSUMPTION from recommendations.Costs import Costs @@ -221,6 +221,10 @@ class WallRecommendations(Definitions): material=material.to_dict(), ) + is_override = "cavity_wall_insulation" in cost_result + if is_override: + cost_result = override_costs(cost_result) + recommendations.append( { "phase": phase, @@ -237,6 +241,7 @@ class WallRecommendations(Definitions): "starting_u_value": u_value, "new_u_value": new_u_value, "sap_points": None, + "is_override": is_override, **cost_result } ) @@ -277,12 +282,19 @@ class WallRecommendations(Definitions): material=material.to_dict(), non_insulation_materials=non_insulation_materials ) + is_override = "internal_wall_insulation" in cost_result + if is_override: + cost_result = override_costs(cost_result) + elif material["type"] == "external_wall_insulation": cost_result = self.costs.external_wall_insulation( wall_area=self.property.insulation_wall_area, material=material.to_dict(), non_insulation_materials=non_insulation_materials ) + is_override = "external_wall_insulation" in cost_result + if is_override: + cost_result = override_costs(cost_result) else: raise ValueError("Invalid material type") @@ -301,6 +313,7 @@ class WallRecommendations(Definitions): "description": self._make_description(material), "starting_u_value": u_value, "new_u_value": new_u_value, + "is_override": is_override, "sap_points": None, **cost_result } diff --git a/recommendations/recommendation_utils.py b/recommendations/recommendation_utils.py index 0d5f9743..a3043c31 100644 --- a/recommendations/recommendation_utils.py +++ b/recommendations/recommendation_utils.py @@ -767,3 +767,15 @@ def check_simulation_difference(old_config, new_config): differences = {key + "_ending": new_config[key] for key in new_config if old_config[key] != new_config[key]} return differences + + +def override_costs(costs): + """ + If the method is overridden, we want to make sure that the costs are zero. This function sets the costs to zero + :param costs: Dictionary of costing, as returned by the Costs class + :return: + """ + for k in costs: + costs[k] = 0 + + return costs From 1c5ccb2c8c46a613851dfaf153a16ee4242eaf0a Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 12 Apr 2024 15:26:11 +0100 Subject: [PATCH 208/248] added override to roof insulation --- recommendations/RoofRecommendations.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/recommendations/RoofRecommendations.py b/recommendations/RoofRecommendations.py index 8d6a91e7..ed087228 100644 --- a/recommendations/RoofRecommendations.py +++ b/recommendations/RoofRecommendations.py @@ -5,7 +5,7 @@ from typing import List from datatypes.enums import QuantityUnits from recommendations.recommendation_utils import ( get_roof_u_value, r_value_per_mm_to_u_value, calculate_u_value_uplift, is_diminishing_returns, - update_lowest_selected_u_value, get_recommended_part, convert_thickness_to_numeric + update_lowest_selected_u_value, get_recommended_part, convert_thickness_to_numeric, override_costs ) from recommendations.Costs import Costs @@ -207,12 +207,18 @@ class RoofRecommendations: floor_area=self.property.insulation_floor_area, material=material ) + is_override = "loft_insulation" in cost_result + if is_override: + cost_result = override_costs(cost_result) elif material["type"] == "flat_roof_insulation": cost_result = self.costs.flat_roof_insulation( floor_area=self.property.insulation_floor_area, material=material, non_insulation_materials=non_insulation_materials ) + is_override = "flat_roof_insulation" in cost_result + if is_override: + cost_result = override_costs(cost_result) else: raise ValueError("Invalid material type") @@ -232,6 +238,7 @@ class RoofRecommendations: "starting_u_value": u_value, "new_u_value": new_u_value, "sap_points": None, + "is_override": is_override, **cost_result } ) From adcd31c8f4e69e92ff592a03103eb60f1c06617a Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 12 Apr 2024 15:27:58 +0100 Subject: [PATCH 209/248] correcting override in walls and roof --- recommendations/RoofRecommendations.py | 4 ++-- recommendations/VentilationRecommendations.py | 4 ++++ recommendations/WallRecommendations.py | 6 +++--- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/recommendations/RoofRecommendations.py b/recommendations/RoofRecommendations.py index ed087228..5ba7e82e 100644 --- a/recommendations/RoofRecommendations.py +++ b/recommendations/RoofRecommendations.py @@ -207,7 +207,7 @@ class RoofRecommendations: floor_area=self.property.insulation_floor_area, material=material ) - is_override = "loft_insulation" in cost_result + is_override = "loft_insulation" in self.property.override if is_override: cost_result = override_costs(cost_result) elif material["type"] == "flat_roof_insulation": @@ -216,7 +216,7 @@ class RoofRecommendations: material=material, non_insulation_materials=non_insulation_materials ) - is_override = "flat_roof_insulation" in cost_result + is_override = "flat_roof_insulation" in self.property.override if is_override: cost_result = override_costs(cost_result) else: diff --git a/recommendations/VentilationRecommendations.py b/recommendations/VentilationRecommendations.py index 1657b759..aa6299e0 100644 --- a/recommendations/VentilationRecommendations.py +++ b/recommendations/VentilationRecommendations.py @@ -56,6 +56,10 @@ class VentilationRecommendations(Definitions): part[0]["quantity"] = n_units part[0]["quantity_unit"] = "part" + is_override = "cavity_wall_insulation" in cost_result + if is_override: + cost_result = override_costs(cost_result) + # We recommend installing two mechanical ventilation systems self.recommendation = [ { diff --git a/recommendations/WallRecommendations.py b/recommendations/WallRecommendations.py index 3acc17f0..471a62cb 100644 --- a/recommendations/WallRecommendations.py +++ b/recommendations/WallRecommendations.py @@ -221,7 +221,7 @@ class WallRecommendations(Definitions): material=material.to_dict(), ) - is_override = "cavity_wall_insulation" in cost_result + is_override = "cavity_wall_insulation" in self.property.override if is_override: cost_result = override_costs(cost_result) @@ -282,7 +282,7 @@ class WallRecommendations(Definitions): material=material.to_dict(), non_insulation_materials=non_insulation_materials ) - is_override = "internal_wall_insulation" in cost_result + is_override = "internal_wall_insulation" in self.property.override if is_override: cost_result = override_costs(cost_result) @@ -292,7 +292,7 @@ class WallRecommendations(Definitions): material=material.to_dict(), non_insulation_materials=non_insulation_materials ) - is_override = "external_wall_insulation" in cost_result + is_override = "external_wall_insulation" in self.property.override if is_override: cost_result = override_costs(cost_result) else: From fadff714d2c3227eb835b94951ed09b25ff870c4 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 12 Apr 2024 15:29:41 +0100 Subject: [PATCH 210/248] add override to ventilation --- recommendations/VentilationRecommendations.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/recommendations/VentilationRecommendations.py b/recommendations/VentilationRecommendations.py index aa6299e0..07f7cf1e 100644 --- a/recommendations/VentilationRecommendations.py +++ b/recommendations/VentilationRecommendations.py @@ -50,16 +50,16 @@ class VentilationRecommendations(Definitions): part = self.materials.copy() - estimated_cost = n_units * part[0]["cost"] + is_override = "cavity_wall_insulation" in self.property.override + + estimated_cost = n_units * part[0]["cost"] if not is_override else 0 + labour_hours = 4 * n_units if not is_override else 0 + labour_days = 4 * n_units / 8.0 if not is_override else 0 part[0]["total"] = estimated_cost part[0]["quantity"] = n_units part[0]["quantity_unit"] = "part" - is_override = "cavity_wall_insulation" in cost_result - if is_override: - cost_result = override_costs(cost_result) - # We recommend installing two mechanical ventilation systems self.recommendation = [ { @@ -76,7 +76,7 @@ class VentilationRecommendations(Definitions): "energy_cost_savings": 0, "total": estimated_cost, # We use a very simple and rough estimate of 4 hours per unit - "labour_hours": 4 * n_units, - "labour_days": 4 * n_units / 8.0 # Assume 8 hour day + "labour_hours": labour_hours, + "labour_days": labour_days # Assume 8 hour day } ] From 493db6c4a01dcf825fe49d77cfc8fcb974a7d1e1 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 12 Apr 2024 15:31:07 +0100 Subject: [PATCH 211/248] added floor insulation to override --- recommendations/FloorRecommendations.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/recommendations/FloorRecommendations.py b/recommendations/FloorRecommendations.py index 713d5f92..1744a928 100644 --- a/recommendations/FloorRecommendations.py +++ b/recommendations/FloorRecommendations.py @@ -8,7 +8,7 @@ from datatypes.enums import QuantityUnits from backend.Property import Property from recommendations.recommendation_utils import ( r_value_per_mm_to_u_value, calculate_u_value_uplift, is_diminishing_returns, update_lowest_selected_u_value, - get_recommended_part, get_floor_u_value + get_recommended_part, get_floor_u_value, override_costs ) from recommendations.Costs import Costs @@ -192,12 +192,22 @@ class FloorRecommendations(Definitions): material=material.to_dict(), non_insulation_materials=non_insulation_materials ) + + is_override = "suspended_floor_insulation" in self.property.override + if is_override: + cost_result = override_costs(cost_result) + elif material["type"] == "solid_floor_insulation": cost_result = self.costs.solid_floor_insulation( insulation_floor_area=self.property.insulation_floor_area, material=material.to_dict(), non_insulation_materials=non_insulation_materials ) + + is_override = "solid_floor_insulation" in self.property.override + if is_override: + cost_result = override_costs(cost_result) + else: raise NotImplementedError("Implement me!") From b052c9925f9064d2462442cccecac08bc511cc21 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 12 Apr 2024 15:41:52 +0100 Subject: [PATCH 212/248] Added heating override --- recommendations/FireplaceRecommendations.py | 4 ++- recommendations/FloorRecommendations.py | 2 +- recommendations/HeatingRecommender.py | 22 ++++++++++++----- recommendations/WindowsRecommendations.py | 27 +++++++++++++-------- 4 files changed, 37 insertions(+), 18 deletions(-) diff --git a/recommendations/FireplaceRecommendations.py b/recommendations/FireplaceRecommendations.py index 5d620d49..c1114f31 100644 --- a/recommendations/FireplaceRecommendations.py +++ b/recommendations/FireplaceRecommendations.py @@ -32,7 +32,8 @@ class FireplaceRecommendations(Definitions): if number_open_fireplaces == 0: return - estimated_cost = number_open_fireplaces * self.COST_OF_WORK + is_override = "sealing_open_fireplace" in self.property.override + estimated_cost = number_open_fireplaces * self.COST_OF_WORK if not is_override else 0 # We recommend installing two mechanical ventilation systems self.recommendation = [ @@ -44,6 +45,7 @@ class FireplaceRecommendations(Definitions): "starting_u_value": None, "new_u_value": None, "sap_points": None, + "is_override": is_override, "total": estimated_cost, # Take a very basic estimate of 6 hours, multipled by the number of open fireplaces to seal "labour_hours": 6 * number_open_fireplaces, diff --git a/recommendations/FloorRecommendations.py b/recommendations/FloorRecommendations.py index 1744a928..b7bd370c 100644 --- a/recommendations/FloorRecommendations.py +++ b/recommendations/FloorRecommendations.py @@ -207,7 +207,6 @@ class FloorRecommendations(Definitions): is_override = "solid_floor_insulation" in self.property.override if is_override: cost_result = override_costs(cost_result) - else: raise NotImplementedError("Implement me!") @@ -227,6 +226,7 @@ class FloorRecommendations(Definitions): "starting_u_value": u_value, "new_u_value": new_u_value, "sap_points": None, + "is_override": is_override, **cost_result } ) diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py index 92457a27..27e4985a 100644 --- a/recommendations/HeatingRecommender.py +++ b/recommendations/HeatingRecommender.py @@ -1,7 +1,7 @@ import pandas as pd from recommendations.Costs import Costs -from recommendations.recommendation_utils import check_simulation_difference +from recommendations.recommendation_utils import check_simulation_difference, override_costs from backend.Property import Property from etl.epc_clean.epc_attributes.MainheatAttributes import MainHeatAttributes from etl.epc_clean.epc_attributes.HotWaterAttributes import HotWaterAttributes @@ -33,7 +33,7 @@ class HeatingRecommender: if has_electric_heating_description or no_heating_no_mains: # Recommend high heat retention storage heaters - self.recommend_electric_storage_heaters(phase=phase, system_change=True, heating_controls_only=False) + self.recommend_hhr_storage_heaters(phase=phase, system_change=True, heating_controls_only=False) # if the property has mains heating with boiler and radiators, we recommend optimal heating controls has_boiler = self.property.main_heating["clean_description"] in ["Boiler and radiators, mains gas"] @@ -89,9 +89,8 @@ class HeatingRecommender: return differences - @staticmethod def combine_heating_and_controls( - controls_recommendations, heating_simulation_config, costs, description, phase, heating_controls_only, + self, controls_recommendations, heating_simulation_config, costs, description, phase, heating_controls_only, system_change ): """ @@ -140,6 +139,11 @@ class HeatingRecommender: recommendation_description = f"{description} and {controls_description}" + is_override = "cavity_wall_insulation" in self.property.override + if is_override: + total_costs = override_costs(total_costs) + recommendation_description = "Heating system has already been upgraded, no further action needed." + recommendation = { "phase": phase, "parts": [ @@ -150,6 +154,7 @@ class HeatingRecommender: "starting_u_value": None, "new_u_value": None, "sap_points": None, + "is_override": is_override, **total_costs, "simulation_config": recommendation_simulation_config } @@ -181,9 +186,8 @@ class HeatingRecommender: return output - def recommend_electric_storage_heaters(self, phase, system_change, heating_controls_only): + def recommend_hhr_storage_heaters(self, phase, system_change, heating_controls_only): """ - We recommend electric storage heaters as an upgrade to the heating system. We will recommend upgrading to a high heat retention storage system, if the current system is not already high heat retention storage @@ -360,6 +364,11 @@ class HeatingRecommender: n_heated_rooms=self.property.data["number-heated-rooms"] ) + is_override = "heating" in self.property.override + if is_override: + boiler_costs = override_costs(boiler_costs) + description = "Heating system has already been upgraded, no further action needed." + boiler_recommendation = { "phase": recommendation_phase, "parts": [ @@ -370,6 +379,7 @@ class HeatingRecommender: "starting_u_value": None, "new_u_value": None, "sap_points": None, + "is_override": is_override, "simulation_config": simulation_config, **boiler_costs } diff --git a/recommendations/WindowsRecommendations.py b/recommendations/WindowsRecommendations.py index d7404e3b..b2fe20a6 100644 --- a/recommendations/WindowsRecommendations.py +++ b/recommendations/WindowsRecommendations.py @@ -4,6 +4,7 @@ import numpy as np from backend.Property import Property from recommendations.Costs import Costs +from recommendation_utils import override_costs class WindowsRecommendations: @@ -70,18 +71,23 @@ class WindowsRecommendations: is_secondary_glazing=is_secondary_glazing ) - glazing_type = "secondary glazing" if is_secondary_glazing else "double glazing" - if self.property.windows["glazing_coverage"] in ["partial", "most"]: - description = f"Install {glazing_type} to the remaining windows" + is_override = "windows_glazing" in self.property.override + if is_override: + cost_result = override_costs(cost_result) + description = "The property already has double glazing installed. No further action is required." else: - description = f"Install {glazing_type} to all windows" + glazing_type = "secondary glazing" if is_secondary_glazing else "double glazing" + if self.property.windows["glazing_coverage"] in ["partial", "most"]: + description = f"Install {glazing_type} to the remaining windows" + else: + description = f"Install {glazing_type} to all windows" - if self.property.is_listed: - description += ". Secondary glazing recommended due to listed building status" - elif self.property.is_heritage: - description += ". Secondary glazing recommended due to herigate building status" - elif self.property.in_conservation_area: - description += ". Secondary glazing recommended due to conservation area status" + if self.property.is_listed: + description += ". Secondary glazing recommended due to listed building status" + elif self.property.is_heritage: + description += ". Secondary glazing recommended due to herigate building status" + elif self.property.in_conservation_area: + description += ". Secondary glazing recommended due to conservation area status" self.recommendation = [ { @@ -92,6 +98,7 @@ class WindowsRecommendations: "starting_u_value": None, "new_u_value": None, "sap_points": None, + "is_override": is_override, **cost_result, "is_secondary_glazing": is_secondary_glazing } From 1ee115fa7e73f170d559a24026680677f89aaf5d Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 12 Apr 2024 15:48:44 +0100 Subject: [PATCH 213/248] Added overrides --- recommendations/HotwaterRecommendations.py | 11 ++++++++++- recommendations/LightingRecommendations.py | 7 +++++++ recommendations/SecondaryHeating.py | 12 +++++++++++- recommendations/SolarPvRecommendations.py | 6 ++++++ 4 files changed, 34 insertions(+), 2 deletions(-) diff --git a/recommendations/HotwaterRecommendations.py b/recommendations/HotwaterRecommendations.py index 7f77597f..88cfa932 100644 --- a/recommendations/HotwaterRecommendations.py +++ b/recommendations/HotwaterRecommendations.py @@ -1,5 +1,6 @@ from backend.Property import Property from recommendations.Costs import Costs +from recommendations.recommendation_utils import override_costs class HotwaterRecommendations: @@ -41,6 +42,13 @@ class HotwaterRecommendations: recommendation_cost = self.costs.hot_water_tank_insulation() + is_override = "hot_water_tank_insulation" in self.property.override + if is_override: + recommendation_cost = override_costs(recommendation_cost) + description = "Insulation tank has already been insulated, no further action required" + else: + description = "Insulate hot water tank" + self.recommendations.append( { "phase": phase, @@ -48,10 +56,11 @@ class HotwaterRecommendations: # TODO ], "type": "hot_water_tank_insulation", - "description": "Insulate the hot water tank with an insulation jacket", + "description": description, "starting_u_value": None, "new_u_value": None, "sap_points": None, + "is_override": is_override, **recommendation_cost, "simulation_config": {"hot_water_energy_eff_ending": "Average"} } diff --git a/recommendations/LightingRecommendations.py b/recommendations/LightingRecommendations.py index 352c4d8a..9e4c8e43 100644 --- a/recommendations/LightingRecommendations.py +++ b/recommendations/LightingRecommendations.py @@ -1,6 +1,7 @@ from backend.Property import Property from typing import List from recommendations.Costs import Costs +from recommendations.recommendation_utils import override_costs class LightingRecommendations: @@ -91,6 +92,11 @@ class LightingRecommendations: heat_demand_change, carbon_change = self.estimate_lighting_impact(number_non_lel_outlets) + is_override = "low_energy_lighting" in self.property.override + if is_override: + cost_result = override_costs(cost_result) + description = "Low energy lighting has already been installed, no further action required" + self.recommendation = [ { "phase": phase, @@ -99,6 +105,7 @@ class LightingRecommendations: "description": description, "starting_u_value": None, "new_u_value": None, + "is_override": is_override, # For SAP points, we use the fact that lighting is usually worth 2 points and we scale this to # the proportion of lights that will be set to low energy "sap_points": round(2 * (number_non_lel_outlets / number_lighting_outlets), 2), diff --git a/recommendations/SecondaryHeating.py b/recommendations/SecondaryHeating.py index f31c4c05..e426977e 100644 --- a/recommendations/SecondaryHeating.py +++ b/recommendations/SecondaryHeating.py @@ -1,4 +1,5 @@ from recommendations.Costs import Costs +from recommendations.recommendation_utils import override_costs from backend.Property import Property @@ -38,15 +39,24 @@ class SecondaryHeating: n_rooms = 0 costs = self.costs.heater_removal(n_rooms=n_rooms) + + is_override = "secondary_heating" in self.property.override + if is_override: + costs = override_costs(costs) + description = "Secondary heating system has already been removed, no further action required" + else: + description = "Remove the secondary heating system" + self.recommendation.append( { "phase": phase, "parts": [], "type": "secondary_heating", - "description": "Remove the secondary heating system", + "description": description, "starting_u_value": None, "new_u_value": None, "sap_points": None, + "is_override": is_override, **costs, "simulation_config": { "secondheat_description_ending": "None" diff --git a/recommendations/SolarPvRecommendations.py b/recommendations/SolarPvRecommendations.py index f75003ce..72fcdf4b 100644 --- a/recommendations/SolarPvRecommendations.py +++ b/recommendations/SolarPvRecommendations.py @@ -1,5 +1,6 @@ import numpy as np from recommendations.Costs import Costs +from recommendations.recommendation_utils import override_costs class SolarPvRecommendations: @@ -110,6 +111,10 @@ class SolarPvRecommendations: description = (f"Install a {kw} kilowatt-peak (kWp) solar photovoltaic (PV) p" f"anel system on {round(roof_coverage_percent)}% the roof.") + is_override = "solar_pv" in self.property.override + if is_override: + cost_result = override_costs(cost_result) + self.recommendation.append( { "phase": phase, @@ -119,6 +124,7 @@ class SolarPvRecommendations: "starting_u_value": None, "new_u_value": None, "sap_points": None, + "is_override": is_override, **cost_result, # This is required for simulating the SAP impact. solar_pv_percentage is between 0 & 1 so we scale # back up here From 14a1f35fb16cbf1199afbd66ce50f598b5d7a10b Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 12 Apr 2024 16:27:26 +0100 Subject: [PATCH 214/248] ammended system change costs for first time central heating --- recommendations/Costs.py | 72 +++++++++++++++++++++++++-- recommendations/HeatingRecommender.py | 9 +++- 2 files changed, 77 insertions(+), 4 deletions(-) diff --git a/recommendations/Costs.py b/recommendations/Costs.py index 45c17102..0e67b352 100644 --- a/recommendations/Costs.py +++ b/recommendations/Costs.py @@ -83,6 +83,14 @@ CONVENTIONAL_BOILER_COSTS = { ROOM_HEATER_REMOVAL_COST = 120 ROOM_HEATER_REMOVAL_LABOUR_HOURS = 3 +# This is a cost quoted by Jim for a system flush - existig system will run more efficiently +SYSTEM_FLUSH_COST = 250 + +SINGLE_RADIATOR_COST = 150 +DOUBLE_RADIATOR_COST = 300 +FLUE_COST = 600 +PIPEWORK_COST = 750 # Min cost is £500 + class Costs: """ @@ -1126,9 +1134,45 @@ class Costs: "labour_days": np.ceil(removal_labour_hours / 8), } - def boiler(self, is_combi, size, exising_room_heaters, n_heated_rooms): + @staticmethod + def _estimate_n_radiators(number_habitable_rooms, total_floor_area, property_type, built_form): + # Base number of radiators: one per habitable room + base_radiators = number_habitable_rooms + + # Additional radiators for non-habitable essential areas (e.g., kitchens, hallways) + additional_radiators = 3 # Initial assumption + + # Adjust additional radiators based on property type + if property_type == 'Flat': + additional_radiators -= 1 # Flats may need fewer radiators due to less exposure + elif property_type in ['House', 'Bungalow', 'Maisonette']: + # Multiple floors in Maisonette may require additional heating points + additional_radiators += 2 # Houses and bungalows might need more due to greater exposure + else: + raise Exception("Invalid property type") + + # Adjust total radiator needs based on built form + form_factor = { + 'Mid-Terrace': 0.95, + 'Semi-Detached': 1.05, + 'Detached': 1.25, + 'End-Terrace': 1.05 + } + + # Calculate total heating power needed and number of radiators based on standard output + total_heating_power_required = total_floor_area * 80 # Watts per square meter + radiator_output = 1000 # Average wattage per radiator + total_radiators_based_on_power = (total_heating_power_required / radiator_output) * form_factor[built_form] + + # Final estimation taking the higher of calculated needs or base room count + estimated_radiators = max(total_radiators_based_on_power, base_radiators + additional_radiators) + return round(estimated_radiators) + + def boiler(self, is_combi, size, exising_room_heaters, system_change, n_heated_rooms, n_rooms): """ Based on a basic estimate of median value £2600 to install a low carbon combi boiler + First time central heating vosts can als be found here: + https://www.checkatrade.com/blog/cost-guides/central-heating-installation-cost/ :return: """ @@ -1137,11 +1181,11 @@ class Costs: # We now need to estimate the cost of the works labour_days = 2 labour_hours = labour_days * 8 - labour_rate = 500 + labour_rate = 300 # Average cost of installation is 1 (maybe 2days) at £300 per day # https://www.checkatrade.com/blog/cost-guides/new-boiler-cost/ - # To be pessimistic, assume 2 days work and £500 day rate + # To be pessimistic, assume 2 days work labour_cost = labour_rate * self.labour_adjustment_factor * labour_days # Add contingency and preliminaries labour_cost = labour_cost * (1 + self.CONTINGENCY + self.PRELIMINARIES) @@ -1161,6 +1205,28 @@ class Costs: subtotal_before_vat += removal_costing["subtotal"] labour_hours += removal_costing["labour_hours"] labour_days += removal_costing["labour_days"] + vat += removal_costing["vat"] + + if system_change: + # We need the cost of radiators + n_radiators = self._estimate_n_radiators( + number_habitable_rooms=n_rooms, + total_floor_area=self.property.floor_area, + property_type=self.property.data["property-type"], + built_form=self.property.data["built-form"] + ) + + additionals_labour_cost = labour_rate * self.labour_adjustment_factor + radiator_cost = DOUBLE_RADIATOR_COST * n_radiators + system_change_cost = radiator_cost + FLUE_COST + PIPEWORK_COST + additionals_labour_cost + system_change_cost_before_vat = system_change_cost / (1 + self.VAT_RATE) + system_change_vat = system_change_cost - system_change_cost_before_vat + # We add an extra labour day for the system change + labour_days += 1 + labour_hours += 8 + total_cost += system_change_cost + subtotal_before_vat += system_change_cost_before_vat + vat += system_change_vat return { "total": total_cost, diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py index 27e4985a..d83b755e 100644 --- a/recommendations/HeatingRecommender.py +++ b/recommendations/HeatingRecommender.py @@ -18,6 +18,11 @@ class HeatingRecommender: self.recommendations = [] def recommend(self, phase=0): + + # TODO: We could have a system flush recommendation for an existing boiler, where there is no need to replace + # the boiler, but instead flushing the system will make it run more efficiently. There is a cost for this + # in the Costs class, stored as SYSTEM_FLUSH_COST + self.recommendations = [] # This first iteration of the recommender will provide very basic recommendation # We recommend heating controls based on the main heating system @@ -361,7 +366,9 @@ class HeatingRecommender: is_combi=is_combi, size=f"{boiler_size}kw", exising_room_heaters=exising_room_heaters, - n_heated_rooms=self.property.data["number-heated-rooms"] + system_change=system_change, + n_heated_rooms=self.property.data["number-heated-rooms"], + n_rooms=self.property.number_of_rooms ) is_override = "heating" in self.property.override From 94f9979f561c5a64acea1fc871c38a9d4868f8e0 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 12 Apr 2024 16:31:11 +0100 Subject: [PATCH 215/248] fixed override bug --- backend/Property.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/Property.py b/backend/Property.py index d000be28..2892b86e 100644 --- a/backend/Property.py +++ b/backend/Property.py @@ -79,7 +79,7 @@ class Property: # of the non-invasive surveys. We reflect that this has been installed in the recommendations, but remove the # cost and instead, provide a message that the measure has already been installed - self.override = ast.literal_eval(override['overrides']) if override is not None else [] + self.override = ast.literal_eval(override['overrides']) if override else [] self.uprn = epc_record.get("uprn") self.full_sap_epc = epc_record.get("full_sap_epc") From d8caacae97006638aed112e7c8682a0a23372690 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 12 Apr 2024 17:46:06 +0100 Subject: [PATCH 216/248] creating non-invasive survey results WIP --- .idea/Model.iml | 2 +- .idea/misc.xml | 2 +- etl/customers/immo/pilot/non_invasive.py | 131 ++++++++++++++++++++++ etl/customers/immo/pilot/requirements.txt | 1 + 4 files changed, 134 insertions(+), 2 deletions(-) create mode 100644 etl/customers/immo/pilot/non_invasive.py create mode 100644 etl/customers/immo/pilot/requirements.txt diff --git a/.idea/Model.iml b/.idea/Model.iml index 4413bb06..b0f9c00d 100644 --- a/.idea/Model.iml +++ b/.idea/Model.iml @@ -7,7 +7,7 @@ - + diff --git a/.idea/misc.xml b/.idea/misc.xml index 6f308057..1122b380 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -3,7 +3,7 @@ - + diff --git a/etl/customers/immo/pilot/non_invasive.py b/etl/customers/immo/pilot/non_invasive.py new file mode 100644 index 00000000..cb978059 --- /dev/null +++ b/etl/customers/immo/pilot/non_invasive.py @@ -0,0 +1,131 @@ +import extract_msg + + +def parse_msg_body(text): + # Split the text into lines + lines = text.split('\r\n') + + # Dictionary to hold the parsed data + data = {} + + # Process each line + for line in lines: + # Remove all asterisks and extra whitespace + clean_line = line.replace('*', '').strip() + + if clean_line: # Ensure the line is not empty after cleaning + # Attempt to split clean '=' if present + if '=' in clean_line: + clean_line = clean_line.replace(' = ', ': ') + + # Use line content as a key with a default value indicating presence + # Generate a unique key for lines without '=' + data[f"Info{len(data) + 1}"] = clean_line + + return data + + +def app(): + """ + This code retrieves the results of the non-invasive surveys, to be stored in S3 + :return: + """ + + # filepath = ("/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data/5 Oaklands B62 " + # "0JA/Immo - 5 Oaklands Halesowen B62 0JA.msg") + # filepath = ("/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data/6 Beech Rd DY1 " + # "4BP/IMMO - 6 Beech Road Dudley DY1 4BP.msg") + # filepath = ( + # "/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data/8 Corporation Rd DY2 " + # "7PX/IMMO - 8 Corporation Road Dudley DY2 7PX.msg" + # ) + # filepath = ( + # "/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data/21 Wells Rd DY5 3TB/" + # "IMMO - 21 Wells Road Brierley Hill DY5 3TB.msg" + # ) + filepath = ( + "/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data/47 Fairfield Rd DY8 " + "5UJ/IMMO - 47 Fairfield Road Wordsley Stourbridge DY8 5UJ.msg" + ) + + with extract_msg.Message(filepath) as msg: + sender = msg.sender + recipients = msg.to + subject = msg.subject + body = msg.body + # If the msg has attachments, they can be extracted as well + attachments = msg.attachments + + from pprint import pprint + pprint(parse_msg_body(body)) + + # We manually create the non-invasive notes for the pilot + non_invasive_notes = [ + { + 'address': '5 Oaklands', + 'postcode': 'B62 0JA', + 'Surveyor': 'Carl Fitzgerald - The Warmfront Team', + 'Wall Insulation': 'Cavity wall, retro drilled, containing loose fibre insulation. Consider getting a ' + 'CIGA check and extracting the cavity, replacing with bead insulation. ' + 'There is a shared alleyway with the neighbour, that is a solid brick wall.', + 'Wall Render': 'Partial render between top of ground floor window and bottom of 1st floor window', + 'Existing solar PV': 'No existing solar', + 'Orientation': 'Front house direction: North East, Back house direction: South West', + 'Access to mains?': 'Property has access to the mains', + }, + { + 'address': '6 Beech Road', + 'postcode': 'DY1 4BP', + 'Surveyor': 'Carl Fitzgerald - The Warmfront Team', + 'Wall Insulation': '1st floor is solid brick with external wall insulation. 2nd floor is cavity, ' + 'retro drilled, containing loose fibre insulation. Consider getting a ' + 'CIGA check and extracting the cavity, replacing with bead insulation.', + 'Wall Render': None, + 'Existing solar PV': 'No existing solar', + 'Orientation': 'Side house direction: North East', + 'Access to mains?': 'Property has access to the mains', + }, + { + 'address': '8 Corporation Road', + 'postcode': 'DY2 7PX', + 'Surveyor': 'Carl Fitzgerald - The Warmfront Team', + 'Wall Insulation': "External wall insulation", + 'Wall Render': "Render finish throughout", + 'Existing solar PV': 'No existing solar', + 'Orientation': 'Front house direction: North East, Back house direction: South West', + 'Access to mains?': None, + }, + { + + 'address': '21 Wells Road', + 'postcode': 'DY5 3TB', + 'Surveyor': 'Carl Fitzgerald - The Warmfront Team', + 'Wall Insulation': 'Cavity wall, retro drilled, containing loose fibre insulation. Consider getting a ' + 'CIGA check and extracting the cavity, replacing with bead insulation.', + 'Wall Render': None, + 'Existing solar PV': 'No existing solar', + 'Orientation': 'Front house direction: East, Back house direction: West', + 'Access to mains?': 'Property has access to the mains', + }, + { + 'address': '47 Fairfield Road', + 'postcode': 'DY8 5UJ', + 'Surveyor': 'Carl Fitzgerald - The Warmfront Team', + 'Wall Insulation': 'Cavity wall, retro drilled, containing loose fibre insulation. Consider getting a ' + 'CIGA check and extracting the cavity, replacing with bead insulation.', + 'Wall Render': None, + 'Existing solar PV': 'No existing solar', + 'Orientation': 'Front house direction: East, Back house direction: West', + 'Access to mains?': 'Property has access to the mains', + }, + { + 'address': None, + 'postcode': None, + 'Surveyor': 'Carl Fitzgerald - The Warmfront Team', + 'Wall Insulation': None, + 'Wall Render': None, + 'Existing solar PV': None, + 'Orientation': None, + 'Access to mains?': None, + }, + ] diff --git a/etl/customers/immo/pilot/requirements.txt b/etl/customers/immo/pilot/requirements.txt new file mode 100644 index 00000000..4673ab35 --- /dev/null +++ b/etl/customers/immo/pilot/requirements.txt @@ -0,0 +1 @@ +extract-msg From a158f2353c0f84bb005924441166ef56a899f59c Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sat, 13 Apr 2024 15:36:58 +0100 Subject: [PATCH 217/248] manually created non-invasie notes --- etl/customers/immo/pilot/non_invasive.py | 63 ++++++++++++++++++++---- 1 file changed, 54 insertions(+), 9 deletions(-) diff --git a/etl/customers/immo/pilot/non_invasive.py b/etl/customers/immo/pilot/non_invasive.py index cb978059..c2b8ea64 100644 --- a/etl/customers/immo/pilot/non_invasive.py +++ b/etl/customers/immo/pilot/non_invasive.py @@ -43,9 +43,17 @@ def app(): # "/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data/21 Wells Rd DY5 3TB/" # "IMMO - 21 Wells Road Brierley Hill DY5 3TB.msg" # ) + # filepath = ( + # "/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data/47 Fairfield Rd DY8 " + # "5UJ/IMMO - 47 Fairfield Road Wordsley Stourbridge DY8 5UJ.msg" + # ) + # filepath = ( + # "/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data/91 Osprey Drive DY1 " + # "2JS/IMMO - 91 Osprey Drive Dudley DY1 2JS.msg" + # ) filepath = ( - "/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data/47 Fairfield Rd DY8 " - "5UJ/IMMO - 47 Fairfield Road Wordsley Stourbridge DY8 5UJ.msg" + "/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data/195 Ashenhurst Rd DY1 " + "2JB/IMMO - 195 Ashenhurst Road Dudley DY1 2JB.msg" ) with extract_msg.Message(filepath) as msg: @@ -119,13 +127,50 @@ def app(): 'Access to mains?': 'Property has access to the mains', }, { - 'address': None, - 'postcode': None, + 'address': '53 Bromley', + 'postcode': 'DY5 4PJ', 'Surveyor': 'Carl Fitzgerald - The Warmfront Team', - 'Wall Insulation': None, - 'Wall Render': None, - 'Existing solar PV': None, - 'Orientation': None, - 'Access to mains?': None, + 'Wall Insulation': "Filled at build, partially filled - celotex/king board, 50mm cavity remaining - " + "recommends a cavity wall fill", + "Roof": "Hipped roof", + 'Existing solar PV': 'No existing solar', + 'Orientation': "Front house direction: North, Back house direction: South, Side house direction: West", + 'Access to mains?': 'Property has access to the mains', + }, + { + 'address': '91 Osprey Drive', + 'postcode': 'DY1 2JS', + 'Surveyor': 'Carl Fitzgerald - The Warmfront Team', + 'Wall Insulation': 'Cavity wall, retro drilled, containing loose fibre insulation. Consider getting a ' + 'CIGA check and extracting the cavity, replacing with bead insulation.', + 'Wall Render': 'Tile hung front and rear of property', + 'Existing solar PV': 'No existing solar', + 'Orientation': 'Side house direction: East', + 'Access to mains?': 'Property has access to the mains', + }, + { + 'address': '150 Huntingtree Road', + 'postcode': 'B63 4HP', + 'Surveyor': 'Carl Fitzgerald - The Warmfront Team', + 'Heating': 'Electric (storage heaters)', + 'Wall Insulation': 'Cavity wall, retro drilled, containing loose fibre insulation. Consider getting a ' + 'CIGA check and extracting the cavity, replacing with bead insulation.', + "Roof": "Hipped roof", + 'Existing solar PV': 'No existing solar', + 'Orientation': "Front house direction: North West, Back house direction: South East, Side house direction: " + "North East", + }, + { + 'address': '195 Ashenhurst Road', + 'postcode': 'DY1 2JB', + 'Surveyor': 'Carl Fitzgerald - The Warmfront Team', + 'Wall Insulation': 'Cavity wall, retro drilled, containing loose fibre insulation. Consider getting a ' + 'CIGA check and extracting the cavity, replacing with bead insulation.', + 'Wall Render': "Solid render front and rear of property", + 'Existing solar PV': 'No existing solar', + 'Orientation': 'Front house direction: South, Back house direction: North', + 'Access to mains?': 'Property has access to the mains', }, ] + + # TODO: Push the non-invasive results straight to the database from here From 485c01cbd69cf8b562b2d53da0ae03915edf8d93 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sat, 13 Apr 2024 16:14:01 +0100 Subject: [PATCH 218/248] Added uprns to non-invaive notes --- etl/customers/immo/pilot/non_invasive.py | 35 ++++++++++++++++++------ 1 file changed, 27 insertions(+), 8 deletions(-) diff --git a/etl/customers/immo/pilot/non_invasive.py b/etl/customers/immo/pilot/non_invasive.py index c2b8ea64..0a376388 100644 --- a/etl/customers/immo/pilot/non_invasive.py +++ b/etl/customers/immo/pilot/non_invasive.py @@ -51,18 +51,17 @@ def app(): # "/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data/91 Osprey Drive DY1 " # "2JS/IMMO - 91 Osprey Drive Dudley DY1 2JS.msg" # ) + # filepath = ( + # "/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data/195 Ashenhurst Rd DY1 " + # "2JB/IMMO - 195 Ashenhurst Road Dudley DY1 2JB.msg" + # ) filepath = ( - "/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data/195 Ashenhurst Rd DY1 " - "2JB/IMMO - 195 Ashenhurst Road Dudley DY1 2JB.msg" + "/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data/27 Milton Rd DY1 2JB/IMMO " + "- 27 Milton Road Coseley Bilston WV14 8HZ.msg" ) with extract_msg.Message(filepath) as msg: - sender = msg.sender - recipients = msg.to - subject = msg.subject body = msg.body - # If the msg has attachments, they can be extracted as well - attachments = msg.attachments from pprint import pprint pprint(parse_msg_body(body)) @@ -70,6 +69,7 @@ def app(): # We manually create the non-invasive notes for the pilot non_invasive_notes = [ { + 'uprn': 90028499, 'address': '5 Oaklands', 'postcode': 'B62 0JA', 'Surveyor': 'Carl Fitzgerald - The Warmfront Team', @@ -82,6 +82,7 @@ def app(): 'Access to mains?': 'Property has access to the mains', }, { + 'uprn': 90055152, 'address': '6 Beech Road', 'postcode': 'DY1 4BP', 'Surveyor': 'Carl Fitzgerald - The Warmfront Team', @@ -94,6 +95,7 @@ def app(): 'Access to mains?': 'Property has access to the mains', }, { + 'uprn': 90070461, 'address': '8 Corporation Road', 'postcode': 'DY2 7PX', 'Surveyor': 'Carl Fitzgerald - The Warmfront Team', @@ -104,7 +106,7 @@ def app(): 'Access to mains?': None, }, { - + 'uprn': 90022227, 'address': '21 Wells Road', 'postcode': 'DY5 3TB', 'Surveyor': 'Carl Fitzgerald - The Warmfront Team', @@ -116,6 +118,7 @@ def app(): 'Access to mains?': 'Property has access to the mains', }, { + 'uprn': 90077535, 'address': '47 Fairfield Road', 'postcode': 'DY8 5UJ', 'Surveyor': 'Carl Fitzgerald - The Warmfront Team', @@ -127,6 +130,7 @@ def app(): 'Access to mains?': 'Property has access to the mains', }, { + 'uprn': 90060989, 'address': '53 Bromley', 'postcode': 'DY5 4PJ', 'Surveyor': 'Carl Fitzgerald - The Warmfront Team', @@ -138,6 +142,7 @@ def app(): 'Access to mains?': 'Property has access to the mains', }, { + 'uprn': 90048026, 'address': '91 Osprey Drive', 'postcode': 'DY1 2JS', 'Surveyor': 'Carl Fitzgerald - The Warmfront Team', @@ -149,6 +154,7 @@ def app(): 'Access to mains?': 'Property has access to the mains', }, { + 'uprn': 90093693, 'address': '150 Huntingtree Road', 'postcode': 'B63 4HP', 'Surveyor': 'Carl Fitzgerald - The Warmfront Team', @@ -161,6 +167,7 @@ def app(): "North East", }, { + 'uprn': 90051858, 'address': '195 Ashenhurst Road', 'postcode': 'DY1 2JB', 'Surveyor': 'Carl Fitzgerald - The Warmfront Team', @@ -171,6 +178,18 @@ def app(): 'Orientation': 'Front house direction: South, Back house direction: North', 'Access to mains?': 'Property has access to the mains', }, + { + 'uprn': 90106884, + 'address': '27 Milton Road', + 'postcode': 'WV14 8HZ', + 'Surveyor': 'Carl Fitzgerald - The Warmfront Team', + 'Wall Insulation': 'Cavity wall, retro drilled, containing loose fibre insulation. Consider getting a ' + 'CIGA check and extracting the cavity, replacing with bead insulation.', + 'Wall Render': "Solid render front and rear of property", + 'Existing solar PV': 'No existing solar', + 'Orientation': 'Front house direction: South East, Back house direction: North West', + 'Access to mains?': 'Property has access to the mains', + }, ] # TODO: Push the non-invasive results straight to the database from here From 65f83930d56290fc73846ca4c8626ac46e3cd7c6 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sat, 13 Apr 2024 16:25:12 +0100 Subject: [PATCH 219/248] added is_override to storage of recommendation --- .../db/functions/recommendations_functions.py | 3 ++- .../app/db/models/non_intrusive_surveys.py | 24 +++++++++++++++++++ backend/app/db/models/recommendations.py | 1 + 3 files changed, 27 insertions(+), 1 deletion(-) create mode 100644 backend/app/db/models/non_intrusive_surveys.py diff --git a/backend/app/db/functions/recommendations_functions.py b/backend/app/db/functions/recommendations_functions.py index 1426e339..43daec77 100644 --- a/backend/app/db/functions/recommendations_functions.py +++ b/backend/app/db/functions/recommendations_functions.py @@ -85,7 +85,8 @@ def upload_recommendations(session: Session, recommendations_to_upload, property "co2_equivalent_savings": rec["co2_equivalent_savings"], "total_work_hours": rec["labour_hours"], "energy_cost_savings": rec["energy_cost_savings"], - "labour_days": rec["labour_days"] + "labour_days": rec["labour_days"], + "is_override": rec["is_override"], } for rec in recommendations_to_upload ] diff --git a/backend/app/db/models/non_intrusive_surveys.py b/backend/app/db/models/non_intrusive_surveys.py new file mode 100644 index 00000000..c5f3734a --- /dev/null +++ b/backend/app/db/models/non_intrusive_surveys.py @@ -0,0 +1,24 @@ +from sqlalchemy import Column, BigInteger, String, Float, Boolean, TIMESTAMP, ForeignKey, Enum, Integer +from sqlalchemy.orm import declarative_base +from sqlalchemy.sql import func +from backend.app.db.models.portfolio import Portfolio, PropertyModel +from backend.app.db.models.materials import Material +from datatypes.enums import QuantityUnits + +Base = declarative_base() + + +class NonIntrusiveSurvey(Base): + __tablename__ = 'non_intrusive_survey' + + id = Column(BigInteger, primary_key=True, autoincrement=True) + uprn = Column(Integer, nullable=False) + survey_date = Column(TIMESTAMP, nullable=False) + surveyor = Column(String, nullable=False) + + +class NonIntrusiveSurveyNotes(Base): + id = Column(BigInteger, primary_key=True, autoincrement=True) + survey_id = Column(BigInteger, ForeignKey('non_intrusive_survey.id'), nullable=False) + title = Column(String, nullable=False) + note = Column(String, nullable=False) diff --git a/backend/app/db/models/recommendations.py b/backend/app/db/models/recommendations.py index a492f2f2..be5ff30c 100644 --- a/backend/app/db/models/recommendations.py +++ b/backend/app/db/models/recommendations.py @@ -30,6 +30,7 @@ class Recommendation(Base): rental_yield_increase = Column(Float) total_work_hours = Column(Float) labour_days = Column(Float) + is_override = Column(Boolean, nullable=False, default=False) class RecommendationMaterials(Base): From aaa279463eea2505b3d36ee46c26b33b17955e77 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sat, 13 Apr 2024 16:37:28 +0100 Subject: [PATCH 220/248] Added is_override to heating controls --- .idea/Model.iml | 2 +- .idea/misc.xml | 2 +- recommendations/HeatingControlRecommender.py | 46 ++++++++++++++------ 3 files changed, 35 insertions(+), 15 deletions(-) diff --git a/.idea/Model.iml b/.idea/Model.iml index b0f9c00d..4413bb06 100644 --- a/.idea/Model.iml +++ b/.idea/Model.iml @@ -7,7 +7,7 @@ - + diff --git a/.idea/misc.xml b/.idea/misc.xml index 1122b380..6f308057 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -3,7 +3,7 @@ - + diff --git a/recommendations/HeatingControlRecommender.py b/recommendations/HeatingControlRecommender.py index 76eaba4f..63218163 100644 --- a/recommendations/HeatingControlRecommender.py +++ b/recommendations/HeatingControlRecommender.py @@ -1,5 +1,5 @@ from recommendations.Costs import Costs -from recommendations.recommendation_utils import check_simulation_difference +from recommendations.recommendation_utils import check_simulation_difference, override_costs from backend.Property import Property from etl.epc_clean.epc_attributes.MainheatControlAttributes import MainheatControlAttributes @@ -159,20 +159,30 @@ class HeatingControlRecommender: has_room_thermostat = not needs_room_thermostat has_trvs = not needs_trvs + cost_result = self.costs.roomstat_programmer_trvs( + number_heated_rooms=int(self.property.data["number-heated-rooms"]), + has_programmer=has_programmer, + has_room_thermostat=has_room_thermostat, + has_trvs=has_trvs + ) + + description = "upgrade heating controls to Room thermostat, programmer and TRVs" + + is_override = "heating_control" in self.property.override + if is_override: + cost_result = override_costs(cost_result) + description = "Heating controls have already been upgraded, no further action needed." + self.recommendation.append( { "type": "heating_control", "parts": [], - "description": "upgrade heating controls to Room thermostat, programmer and TRVs", - **self.costs.roomstat_programmer_trvs( - number_heated_rooms=int(self.property.data["number-heated-rooms"]), - has_programmer=has_programmer, - has_room_thermostat=has_room_thermostat, - has_trvs=has_trvs - ), + "description": description, + **cost_result, "starting_u_value": None, "new_u_value": None, "sap_points": None, + "is_override": is_override, "simulation_config": simulation_config } ) @@ -211,18 +221,28 @@ class HeatingControlRecommender: if self.property.data["mainheatc-energy-eff"] in ["Poor", "Very Poor", "Average", "Good"]: simulation_config["mainheatc_energy_eff_ending"] = "Very Good" + cost_result = self.costs.time_and_temperature_zone_control( + number_heated_rooms=int(self.property.data["number-heated-rooms"]) + ) + + description = ("Upgrade heating controls to Smart Thermostats, room sensors and smart radiator valves (time & " + "temperature zone control)") + + is_override = "heating_control" in self.property.override + if is_override: + cost_result = override_costs(cost_result) + description = "Heating controls have already been upgraded, no further action needed." + self.recommendation.append( { "type": "heating_control", "parts": [], - "description": "Upgrade heating controls to Smart Thermostats, room sensors and smart radiator valves " - "(time & temperature zone control)", - **self.costs.time_and_temperature_zone_control( - number_heated_rooms=int(self.property.data["number-heated-rooms"]) - ), + "description": description, + **cost_result, "starting_u_value": None, "new_u_value": None, "sap_points": None, + "is_override": is_override, "simulation_config": simulation_config } ) From 527291b4395eb8b5563f52fd8449faee569d6789 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sat, 13 Apr 2024 16:40:13 +0100 Subject: [PATCH 221/248] Added is_override to mechanical ventilation recommendation --- recommendations/VentilationRecommendations.py | 1 + 1 file changed, 1 insertion(+) diff --git a/recommendations/VentilationRecommendations.py b/recommendations/VentilationRecommendations.py index 07f7cf1e..7ffcda08 100644 --- a/recommendations/VentilationRecommendations.py +++ b/recommendations/VentilationRecommendations.py @@ -69,6 +69,7 @@ class VentilationRecommendations(Definitions): "description": f"Install {n_units} {part[0]['description']} units", "starting_u_value": None, "new_u_value": None, + "is_override": is_override, "sap_points": 0, "heat_demand": 0, "adjusted_heat_demand": 0, From 34d6a075289b0c2d31d75a1bad8ea5c969f12fca Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sat, 13 Apr 2024 17:07:42 +0100 Subject: [PATCH 222/248] Pushed non-intrusive survey results to bd --- .../app/db/functions/non_intrusive_surveys.py | 50 ++++++++++ .../app/db/models/non_intrusive_surveys.py | 8 +- etl/customers/immo/pilot/non_invasive.py | 99 +++++++++++-------- 3 files changed, 110 insertions(+), 47 deletions(-) create mode 100644 backend/app/db/functions/non_intrusive_surveys.py diff --git a/backend/app/db/functions/non_intrusive_surveys.py b/backend/app/db/functions/non_intrusive_surveys.py new file mode 100644 index 00000000..93348121 --- /dev/null +++ b/backend/app/db/functions/non_intrusive_surveys.py @@ -0,0 +1,50 @@ +from sqlalchemy.orm import Session +from backend.app.db.models.non_intrusive_surveys import NonIntrusiveSurvey, NonIntrusiveSurveyNotes + + +def upload_non_intrusive_survey_notes(session: Session, non_invasive_notes, batch_size=500): + """ + Uploads a list of non-intrusive survey notes into the database in batches. Each dictionary in the list represents + one survey and its associated notes. + + :param session: SQLAlchemy Session object through which all database transactions are handled. + :param non_invasive_notes: List of dictionaries where each dictionary contains survey details including 'uprn', + 'survey_date', 'surveyor', and other notes as key-value pairs. + :param batch_size: The size of each batch to be processed (default is 500). + :return: None + """ + + # Helper function to process each batch + def process_batch(batch): + surveys = [] + notes = [] + + for note in batch: + survey = NonIntrusiveSurvey( + uprn=note['uprn'], + survey_date=note['survey_date'], + surveyor=note['surveyor'] + ) + surveys.append(survey) + + session.add_all(surveys) + session.flush() # Get IDs for surveys + + for note, survey in zip(batch, surveys): + for key, value in note.items(): + if key not in ['uprn', 'survey_date', 'surveyor']: + notes.append(NonIntrusiveSurveyNotes( + survey_id=survey.id, + title=key, + note=value + )) + + session.bulk_save_objects(notes) + session.commit() + + # Split the data into batches and process each batch + total = len(non_invasive_notes) + for start in range(0, total, batch_size): + end = min(start + batch_size, total) + batch = non_invasive_notes[start:end] + process_batch(batch) diff --git a/backend/app/db/models/non_intrusive_surveys.py b/backend/app/db/models/non_intrusive_surveys.py index c5f3734a..bc2d8adc 100644 --- a/backend/app/db/models/non_intrusive_surveys.py +++ b/backend/app/db/models/non_intrusive_surveys.py @@ -1,9 +1,5 @@ -from sqlalchemy import Column, BigInteger, String, Float, Boolean, TIMESTAMP, ForeignKey, Enum, Integer +from sqlalchemy import Column, BigInteger, String, TIMESTAMP, ForeignKey, Integer from sqlalchemy.orm import declarative_base -from sqlalchemy.sql import func -from backend.app.db.models.portfolio import Portfolio, PropertyModel -from backend.app.db.models.materials import Material -from datatypes.enums import QuantityUnits Base = declarative_base() @@ -18,6 +14,8 @@ class NonIntrusiveSurvey(Base): class NonIntrusiveSurveyNotes(Base): + __tablename__ = 'non_intrusive_survey_notes' + id = Column(BigInteger, primary_key=True, autoincrement=True) survey_id = Column(BigInteger, ForeignKey('non_intrusive_survey.id'), nullable=False) title = Column(String, nullable=False) diff --git a/etl/customers/immo/pilot/non_invasive.py b/etl/customers/immo/pilot/non_invasive.py index 0a376388..6dc22c62 100644 --- a/etl/customers/immo/pilot/non_invasive.py +++ b/etl/customers/immo/pilot/non_invasive.py @@ -1,4 +1,8 @@ -import extract_msg +# import extract_msg +from datetime import datetime +from sqlalchemy.orm import sessionmaker +from backend.app.db.connection import db_engine +from backend.app.db.functions.non_intrusive_surveys import upload_non_intrusive_survey_notes def parse_msg_body(text): @@ -55,24 +59,25 @@ def app(): # "/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data/195 Ashenhurst Rd DY1 " # "2JB/IMMO - 195 Ashenhurst Road Dudley DY1 2JB.msg" # ) - filepath = ( - "/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data/27 Milton Rd DY1 2JB/IMMO " - "- 27 Milton Road Coseley Bilston WV14 8HZ.msg" - ) - - with extract_msg.Message(filepath) as msg: - body = msg.body - - from pprint import pprint - pprint(parse_msg_body(body)) + # filepath = ( + # "/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data/27 Milton Rd DY1 2JB/IMMO " + # "- 27 Milton Road Coseley Bilston WV14 8HZ.msg" + # ) + # + # with extract_msg.Message(filepath) as msg: + # body = msg.body + # + # from pprint import pprint + # pprint(parse_msg_body(body)) # We manually create the non-invasive notes for the pilot non_invasive_notes = [ { 'uprn': 90028499, - 'address': '5 Oaklands', - 'postcode': 'B62 0JA', - 'Surveyor': 'Carl Fitzgerald - The Warmfront Team', + # 'address': '5 Oaklands', + # 'postcode': 'B62 0JA', + 'surveyor': 'Carl Fitzgerald - The Warmfront Team', + 'survey_date': datetime.strptime('2024-04-11', '%Y-%m-%d'), 'Wall Insulation': 'Cavity wall, retro drilled, containing loose fibre insulation. Consider getting a ' 'CIGA check and extracting the cavity, replacing with bead insulation. ' 'There is a shared alleyway with the neighbour, that is a solid brick wall.', @@ -83,9 +88,10 @@ def app(): }, { 'uprn': 90055152, - 'address': '6 Beech Road', - 'postcode': 'DY1 4BP', - 'Surveyor': 'Carl Fitzgerald - The Warmfront Team', + # 'address': '6 Beech Road', + # 'postcode': 'DY1 4BP', + 'surveyor': 'Carl Fitzgerald - The Warmfront Team', + 'survey_date': datetime.strptime('2024-04-11', '%Y-%m-%d'), 'Wall Insulation': '1st floor is solid brick with external wall insulation. 2nd floor is cavity, ' 'retro drilled, containing loose fibre insulation. Consider getting a ' 'CIGA check and extracting the cavity, replacing with bead insulation.', @@ -96,9 +102,10 @@ def app(): }, { 'uprn': 90070461, - 'address': '8 Corporation Road', - 'postcode': 'DY2 7PX', - 'Surveyor': 'Carl Fitzgerald - The Warmfront Team', + # 'address': '8 Corporation Road', + # 'postcode': 'DY2 7PX', + 'surveyor': 'Carl Fitzgerald - The Warmfront Team', + 'survey_date': datetime.strptime('2024-04-11', '%Y-%m-%d'), 'Wall Insulation': "External wall insulation", 'Wall Render': "Render finish throughout", 'Existing solar PV': 'No existing solar', @@ -107,9 +114,10 @@ def app(): }, { 'uprn': 90022227, - 'address': '21 Wells Road', - 'postcode': 'DY5 3TB', - 'Surveyor': 'Carl Fitzgerald - The Warmfront Team', + # 'address': '21 Wells Road', + # 'postcode': 'DY5 3TB', + 'surveyor': 'Carl Fitzgerald - The Warmfront Team', + 'survey_date': datetime.strptime('2024-04-11', '%Y-%m-%d'), 'Wall Insulation': 'Cavity wall, retro drilled, containing loose fibre insulation. Consider getting a ' 'CIGA check and extracting the cavity, replacing with bead insulation.', 'Wall Render': None, @@ -119,9 +127,10 @@ def app(): }, { 'uprn': 90077535, - 'address': '47 Fairfield Road', - 'postcode': 'DY8 5UJ', - 'Surveyor': 'Carl Fitzgerald - The Warmfront Team', + # 'address': '47 Fairfield Road', + # 'postcode': 'DY8 5UJ', + 'surveyor': 'Carl Fitzgerald - The Warmfront Team', + 'survey_date': datetime.strptime('2024-04-11', '%Y-%m-%d'), 'Wall Insulation': 'Cavity wall, retro drilled, containing loose fibre insulation. Consider getting a ' 'CIGA check and extracting the cavity, replacing with bead insulation.', 'Wall Render': None, @@ -131,9 +140,10 @@ def app(): }, { 'uprn': 90060989, - 'address': '53 Bromley', - 'postcode': 'DY5 4PJ', - 'Surveyor': 'Carl Fitzgerald - The Warmfront Team', + # 'address': '53 Bromley', + # 'postcode': 'DY5 4PJ', + 'surveyor': 'Carl Fitzgerald - The Warmfront Team', + 'survey_date': datetime.strptime('2024-04-11', '%Y-%m-%d'), 'Wall Insulation': "Filled at build, partially filled - celotex/king board, 50mm cavity remaining - " "recommends a cavity wall fill", "Roof": "Hipped roof", @@ -143,9 +153,10 @@ def app(): }, { 'uprn': 90048026, - 'address': '91 Osprey Drive', - 'postcode': 'DY1 2JS', - 'Surveyor': 'Carl Fitzgerald - The Warmfront Team', + # 'address': '91 Osprey Drive', + # 'postcode': 'DY1 2JS', + 'surveyor': 'Carl Fitzgerald - The Warmfront Team', + 'survey_date': datetime.strptime('2024-04-11', '%Y-%m-%d'), 'Wall Insulation': 'Cavity wall, retro drilled, containing loose fibre insulation. Consider getting a ' 'CIGA check and extracting the cavity, replacing with bead insulation.', 'Wall Render': 'Tile hung front and rear of property', @@ -155,9 +166,10 @@ def app(): }, { 'uprn': 90093693, - 'address': '150 Huntingtree Road', - 'postcode': 'B63 4HP', - 'Surveyor': 'Carl Fitzgerald - The Warmfront Team', + # 'address': '150 Huntingtree Road', + # 'postcode': 'B63 4HP', + 'surveyor': 'Carl Fitzgerald - The Warmfront Team', + 'survey_date': datetime.strptime('2024-04-11', '%Y-%m-%d'), 'Heating': 'Electric (storage heaters)', 'Wall Insulation': 'Cavity wall, retro drilled, containing loose fibre insulation. Consider getting a ' 'CIGA check and extracting the cavity, replacing with bead insulation.', @@ -168,9 +180,10 @@ def app(): }, { 'uprn': 90051858, - 'address': '195 Ashenhurst Road', - 'postcode': 'DY1 2JB', - 'Surveyor': 'Carl Fitzgerald - The Warmfront Team', + # 'address': '195 Ashenhurst Road', + # 'postcode': 'DY1 2JB', + 'surveyor': 'Carl Fitzgerald - The Warmfront Team', + 'survey_date': datetime.strptime('2024-04-11', '%Y-%m-%d'), 'Wall Insulation': 'Cavity wall, retro drilled, containing loose fibre insulation. Consider getting a ' 'CIGA check and extracting the cavity, replacing with bead insulation.', 'Wall Render': "Solid render front and rear of property", @@ -180,9 +193,10 @@ def app(): }, { 'uprn': 90106884, - 'address': '27 Milton Road', - 'postcode': 'WV14 8HZ', - 'Surveyor': 'Carl Fitzgerald - The Warmfront Team', + # 'address': '27 Milton Road', + # 'postcode': 'WV14 8HZ', + 'surveyor': 'Carl Fitzgerald - The Warmfront Team', + 'survey_date': datetime.strptime('2024-04-11', '%Y-%m-%d'), 'Wall Insulation': 'Cavity wall, retro drilled, containing loose fibre insulation. Consider getting a ' 'CIGA check and extracting the cavity, replacing with bead insulation.', 'Wall Render': "Solid render front and rear of property", @@ -192,4 +206,5 @@ def app(): }, ] - # TODO: Push the non-invasive results straight to the database from here + session = sessionmaker(bind=db_engine)() + upload_non_intrusive_survey_notes(session=session, non_invasive_notes=non_invasive_notes, batch_size=500) From 954fa9d32c5d30bd63098b74512b006b47bf3056 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sun, 14 Apr 2024 14:57:13 +0100 Subject: [PATCH 223/248] changed is_override to already_installed in recommendations --- backend/Property.py | 4 ++-- backend/app/plan/router.py | 17 +++++++++-------- backend/app/plan/schemas.py | 2 +- recommendations/FireplaceRecommendations.py | 6 +++--- recommendations/FloorRecommendations.py | 10 +++++----- recommendations/HeatingControlRecommender.py | 12 ++++++------ recommendations/HeatingRecommender.py | 12 ++++++------ recommendations/HotwaterRecommendations.py | 6 +++--- recommendations/LightingRecommendations.py | 6 +++--- recommendations/RoofRecommendations.py | 10 +++++----- recommendations/SecondaryHeating.py | 6 +++--- recommendations/SolarPvRecommendations.py | 6 +++--- recommendations/VentilationRecommendations.py | 10 +++++----- recommendations/WallRecommendations.py | 16 ++++++++-------- recommendations/WindowsRecommendations.py | 6 +++--- 15 files changed, 65 insertions(+), 64 deletions(-) diff --git a/backend/Property.py b/backend/Property.py index 2892b86e..a8ed9129 100644 --- a/backend/Property.py +++ b/backend/Property.py @@ -61,7 +61,7 @@ class Property: n_bedrooms = None def __init__( - self, id, postcode, address, epc_record, override=None, **kwargs + self, id, postcode, address, epc_record, already_installed=None, **kwargs ): self.epc_record = epc_record @@ -79,7 +79,7 @@ class Property: # of the non-invasive surveys. We reflect that this has been installed in the recommendations, but remove the # cost and instead, provide a message that the measure has already been installed - self.override = ast.literal_eval(override['overrides']) if override else [] + self.already_installed = ast.literal_eval(already_installed['already_installed']) if already_installed else [] self.uprn = epc_record.get("uprn") self.full_sap_epc = epc_record.get("full_sap_epc") diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index 08ce0dcc..49e14872 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -74,7 +74,7 @@ async def trigger_plan(body: PlanTriggerRequest): # TODO: We should store the trigger file path in the database with the plan so we can track the file that # triggered the plan - # TODO: Create the ability to congigure/switch off certain measures + # TODO: if the measure is already installed, it should actually be the very first phase try: session.begin() @@ -85,10 +85,10 @@ async def trigger_plan(body: PlanTriggerRequest): if body.patches_file_path: patches = read_csv_from_s3(bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.patches_file_path) - overrides = [] - if body.overrides_file_path: - overrides = read_csv_from_s3( - bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.overrides_file_path + already_installed = [] + if body.already_installed_file_path: + already_installed = read_csv_from_s3( + bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.already_installed_file_path ) cleaning_data = read_dataframe_from_s3_parquet( @@ -142,8 +142,9 @@ async def trigger_plan(body: PlanTriggerRequest): cleaning_data=cleaning_data ) - override = next(( - x for x in overrides if (x["address"] == config["address"]) and (x["postcode"] == config["postcode"]) + property_already_installed = next(( + x for x in already_installed if + (x["address"] == config["address"]) and (x["postcode"] == config["postcode"]) ), {}) input_properties.append( Property( @@ -151,7 +152,7 @@ async def trigger_plan(body: PlanTriggerRequest): address=epc_searcher.address_clean, postcode=epc_searcher.postcode_clean, epc_record=prepared_epc, - override=override, + already_installed=property_already_installed, **Property.extract_kwargs(config) ) ) diff --git a/backend/app/plan/schemas.py b/backend/app/plan/schemas.py index ec49e41e..76eb49d2 100644 --- a/backend/app/plan/schemas.py +++ b/backend/app/plan/schemas.py @@ -9,7 +9,7 @@ class PlanTriggerRequest(BaseModel): goal_value: str portfolio_id: int trigger_file_path: str - overrides_file_path: Optional[str] = None + already_installed_file_path: Optional[str] = None patches_file_path: Optional[str] = None exclusions: Optional[conlist(str, min_items=1)] = None diff --git a/recommendations/FireplaceRecommendations.py b/recommendations/FireplaceRecommendations.py index c1114f31..601a8eb0 100644 --- a/recommendations/FireplaceRecommendations.py +++ b/recommendations/FireplaceRecommendations.py @@ -32,8 +32,8 @@ class FireplaceRecommendations(Definitions): if number_open_fireplaces == 0: return - is_override = "sealing_open_fireplace" in self.property.override - estimated_cost = number_open_fireplaces * self.COST_OF_WORK if not is_override else 0 + already_installed = "sealing_open_fireplace" in self.property.already_installed + estimated_cost = number_open_fireplaces * self.COST_OF_WORK if not already_installed else 0 # We recommend installing two mechanical ventilation systems self.recommendation = [ @@ -45,7 +45,7 @@ class FireplaceRecommendations(Definitions): "starting_u_value": None, "new_u_value": None, "sap_points": None, - "is_override": is_override, + "already_installed": already_installed, "total": estimated_cost, # Take a very basic estimate of 6 hours, multipled by the number of open fireplaces to seal "labour_hours": 6 * number_open_fireplaces, diff --git a/recommendations/FloorRecommendations.py b/recommendations/FloorRecommendations.py index b7bd370c..3f764d83 100644 --- a/recommendations/FloorRecommendations.py +++ b/recommendations/FloorRecommendations.py @@ -193,8 +193,8 @@ class FloorRecommendations(Definitions): non_insulation_materials=non_insulation_materials ) - is_override = "suspended_floor_insulation" in self.property.override - if is_override: + already_installed = "suspended_floor_insulation" in self.property.already_installed + if already_installed: cost_result = override_costs(cost_result) elif material["type"] == "solid_floor_insulation": @@ -204,8 +204,8 @@ class FloorRecommendations(Definitions): non_insulation_materials=non_insulation_materials ) - is_override = "solid_floor_insulation" in self.property.override - if is_override: + already_installed = "solid_floor_insulation" in self.property.already_installed + if already_installed: cost_result = override_costs(cost_result) else: raise NotImplementedError("Implement me!") @@ -226,7 +226,7 @@ class FloorRecommendations(Definitions): "starting_u_value": u_value, "new_u_value": new_u_value, "sap_points": None, - "is_override": is_override, + "already_installed": already_installed, **cost_result } ) diff --git a/recommendations/HeatingControlRecommender.py b/recommendations/HeatingControlRecommender.py index 63218163..d24ad811 100644 --- a/recommendations/HeatingControlRecommender.py +++ b/recommendations/HeatingControlRecommender.py @@ -168,8 +168,8 @@ class HeatingControlRecommender: description = "upgrade heating controls to Room thermostat, programmer and TRVs" - is_override = "heating_control" in self.property.override - if is_override: + already_installed = "heating_control" in self.property.already_installed + if already_installed: cost_result = override_costs(cost_result) description = "Heating controls have already been upgraded, no further action needed." @@ -182,7 +182,7 @@ class HeatingControlRecommender: "starting_u_value": None, "new_u_value": None, "sap_points": None, - "is_override": is_override, + "already_installed": already_installed, "simulation_config": simulation_config } ) @@ -228,8 +228,8 @@ class HeatingControlRecommender: description = ("Upgrade heating controls to Smart Thermostats, room sensors and smart radiator valves (time & " "temperature zone control)") - is_override = "heating_control" in self.property.override - if is_override: + already_installed = "heating_control" in self.property.already_installed + if already_installed: cost_result = override_costs(cost_result) description = "Heating controls have already been upgraded, no further action needed." @@ -242,7 +242,7 @@ class HeatingControlRecommender: "starting_u_value": None, "new_u_value": None, "sap_points": None, - "is_override": is_override, + "already_installed": already_installed, "simulation_config": simulation_config } ) diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py index d83b755e..432dc6a6 100644 --- a/recommendations/HeatingRecommender.py +++ b/recommendations/HeatingRecommender.py @@ -144,8 +144,8 @@ class HeatingRecommender: recommendation_description = f"{description} and {controls_description}" - is_override = "cavity_wall_insulation" in self.property.override - if is_override: + already_installed = "cavity_wall_insulation" in self.property.already_installed + if already_installed: total_costs = override_costs(total_costs) recommendation_description = "Heating system has already been upgraded, no further action needed." @@ -159,7 +159,7 @@ class HeatingRecommender: "starting_u_value": None, "new_u_value": None, "sap_points": None, - "is_override": is_override, + "already_installed": already_installed, **total_costs, "simulation_config": recommendation_simulation_config } @@ -371,8 +371,8 @@ class HeatingRecommender: n_rooms=self.property.number_of_rooms ) - is_override = "heating" in self.property.override - if is_override: + already_installed = "heating" in self.property.already_installed + if already_installed: boiler_costs = override_costs(boiler_costs) description = "Heating system has already been upgraded, no further action needed." @@ -386,7 +386,7 @@ class HeatingRecommender: "starting_u_value": None, "new_u_value": None, "sap_points": None, - "is_override": is_override, + "already_installed": already_installed, "simulation_config": simulation_config, **boiler_costs } diff --git a/recommendations/HotwaterRecommendations.py b/recommendations/HotwaterRecommendations.py index 88cfa932..9c5c7045 100644 --- a/recommendations/HotwaterRecommendations.py +++ b/recommendations/HotwaterRecommendations.py @@ -42,8 +42,8 @@ class HotwaterRecommendations: recommendation_cost = self.costs.hot_water_tank_insulation() - is_override = "hot_water_tank_insulation" in self.property.override - if is_override: + already_installed = "hot_water_tank_insulation" in self.property.already_installed + if already_installed: recommendation_cost = override_costs(recommendation_cost) description = "Insulation tank has already been insulated, no further action required" else: @@ -60,7 +60,7 @@ class HotwaterRecommendations: "starting_u_value": None, "new_u_value": None, "sap_points": None, - "is_override": is_override, + "already_installed": already_installed, **recommendation_cost, "simulation_config": {"hot_water_energy_eff_ending": "Average"} } diff --git a/recommendations/LightingRecommendations.py b/recommendations/LightingRecommendations.py index 9e4c8e43..31720579 100644 --- a/recommendations/LightingRecommendations.py +++ b/recommendations/LightingRecommendations.py @@ -92,8 +92,8 @@ class LightingRecommendations: heat_demand_change, carbon_change = self.estimate_lighting_impact(number_non_lel_outlets) - is_override = "low_energy_lighting" in self.property.override - if is_override: + already_installed = "low_energy_lighting" in self.property.already_installed + if already_installed: cost_result = override_costs(cost_result) description = "Low energy lighting has already been installed, no further action required" @@ -105,7 +105,7 @@ class LightingRecommendations: "description": description, "starting_u_value": None, "new_u_value": None, - "is_override": is_override, + "already_installed": already_installed, # For SAP points, we use the fact that lighting is usually worth 2 points and we scale this to # the proportion of lights that will be set to low energy "sap_points": round(2 * (number_non_lel_outlets / number_lighting_outlets), 2), diff --git a/recommendations/RoofRecommendations.py b/recommendations/RoofRecommendations.py index 5ba7e82e..dc5ee7db 100644 --- a/recommendations/RoofRecommendations.py +++ b/recommendations/RoofRecommendations.py @@ -207,8 +207,8 @@ class RoofRecommendations: floor_area=self.property.insulation_floor_area, material=material ) - is_override = "loft_insulation" in self.property.override - if is_override: + already_installed = "loft_insulation" in self.property.already_installed + if already_installed: cost_result = override_costs(cost_result) elif material["type"] == "flat_roof_insulation": cost_result = self.costs.flat_roof_insulation( @@ -216,8 +216,8 @@ class RoofRecommendations: material=material, non_insulation_materials=non_insulation_materials ) - is_override = "flat_roof_insulation" in self.property.override - if is_override: + already_installed = "flat_roof_insulation" in self.property.already_installed + if already_installed: cost_result = override_costs(cost_result) else: raise ValueError("Invalid material type") @@ -238,7 +238,7 @@ class RoofRecommendations: "starting_u_value": u_value, "new_u_value": new_u_value, "sap_points": None, - "is_override": is_override, + "already_installed": already_installed, **cost_result } ) diff --git a/recommendations/SecondaryHeating.py b/recommendations/SecondaryHeating.py index e426977e..5d763510 100644 --- a/recommendations/SecondaryHeating.py +++ b/recommendations/SecondaryHeating.py @@ -40,8 +40,8 @@ class SecondaryHeating: costs = self.costs.heater_removal(n_rooms=n_rooms) - is_override = "secondary_heating" in self.property.override - if is_override: + already_installed = "secondary_heating" in self.property.already_installed + if already_installed: costs = override_costs(costs) description = "Secondary heating system has already been removed, no further action required" else: @@ -56,7 +56,7 @@ class SecondaryHeating: "starting_u_value": None, "new_u_value": None, "sap_points": None, - "is_override": is_override, + "already_installed": already_installed, **costs, "simulation_config": { "secondheat_description_ending": "None" diff --git a/recommendations/SolarPvRecommendations.py b/recommendations/SolarPvRecommendations.py index 72fcdf4b..58cf9735 100644 --- a/recommendations/SolarPvRecommendations.py +++ b/recommendations/SolarPvRecommendations.py @@ -111,8 +111,8 @@ class SolarPvRecommendations: description = (f"Install a {kw} kilowatt-peak (kWp) solar photovoltaic (PV) p" f"anel system on {round(roof_coverage_percent)}% the roof.") - is_override = "solar_pv" in self.property.override - if is_override: + already_installed = "solar_pv" in self.property.already_installed + if already_installed: cost_result = override_costs(cost_result) self.recommendation.append( @@ -124,7 +124,7 @@ class SolarPvRecommendations: "starting_u_value": None, "new_u_value": None, "sap_points": None, - "is_override": is_override, + "already_installed": already_installed, **cost_result, # This is required for simulating the SAP impact. solar_pv_percentage is between 0 & 1 so we scale # back up here diff --git a/recommendations/VentilationRecommendations.py b/recommendations/VentilationRecommendations.py index 7ffcda08..5b36bd9c 100644 --- a/recommendations/VentilationRecommendations.py +++ b/recommendations/VentilationRecommendations.py @@ -50,11 +50,11 @@ class VentilationRecommendations(Definitions): part = self.materials.copy() - is_override = "cavity_wall_insulation" in self.property.override + already_installed = "cavity_wall_insulation" in self.property.already_installed - estimated_cost = n_units * part[0]["cost"] if not is_override else 0 - labour_hours = 4 * n_units if not is_override else 0 - labour_days = 4 * n_units / 8.0 if not is_override else 0 + estimated_cost = n_units * part[0]["cost"] if not already_installed else 0 + labour_hours = 4 * n_units if not already_installed else 0 + labour_days = 4 * n_units / 8.0 if not already_installed else 0 part[0]["total"] = estimated_cost part[0]["quantity"] = n_units @@ -69,7 +69,7 @@ class VentilationRecommendations(Definitions): "description": f"Install {n_units} {part[0]['description']} units", "starting_u_value": None, "new_u_value": None, - "is_override": is_override, + "already_installed": already_installed, "sap_points": 0, "heat_demand": 0, "adjusted_heat_demand": 0, diff --git a/recommendations/WallRecommendations.py b/recommendations/WallRecommendations.py index 471a62cb..feb2620b 100644 --- a/recommendations/WallRecommendations.py +++ b/recommendations/WallRecommendations.py @@ -221,8 +221,8 @@ class WallRecommendations(Definitions): material=material.to_dict(), ) - is_override = "cavity_wall_insulation" in self.property.override - if is_override: + already_installed = "cavity_wall_insulation" in self.property.already_installed + if already_installed: cost_result = override_costs(cost_result) recommendations.append( @@ -241,7 +241,7 @@ class WallRecommendations(Definitions): "starting_u_value": u_value, "new_u_value": new_u_value, "sap_points": None, - "is_override": is_override, + "already_installed": already_installed, **cost_result } ) @@ -282,8 +282,8 @@ class WallRecommendations(Definitions): material=material.to_dict(), non_insulation_materials=non_insulation_materials ) - is_override = "internal_wall_insulation" in self.property.override - if is_override: + already_installed = "internal_wall_insulation" in self.property.already_installed + if already_installed: cost_result = override_costs(cost_result) elif material["type"] == "external_wall_insulation": @@ -292,8 +292,8 @@ class WallRecommendations(Definitions): material=material.to_dict(), non_insulation_materials=non_insulation_materials ) - is_override = "external_wall_insulation" in self.property.override - if is_override: + already_installed = "external_wall_insulation" in self.property.already_installed + if already_installed: cost_result = override_costs(cost_result) else: raise ValueError("Invalid material type") @@ -313,7 +313,7 @@ class WallRecommendations(Definitions): "description": self._make_description(material), "starting_u_value": u_value, "new_u_value": new_u_value, - "is_override": is_override, + "already_installed": already_installed, "sap_points": None, **cost_result } diff --git a/recommendations/WindowsRecommendations.py b/recommendations/WindowsRecommendations.py index b2fe20a6..b7c2823a 100644 --- a/recommendations/WindowsRecommendations.py +++ b/recommendations/WindowsRecommendations.py @@ -71,8 +71,8 @@ class WindowsRecommendations: is_secondary_glazing=is_secondary_glazing ) - is_override = "windows_glazing" in self.property.override - if is_override: + already_installed = "windows_glazing" in self.property.already_installed + if already_installed: cost_result = override_costs(cost_result) description = "The property already has double glazing installed. No further action is required." else: @@ -98,7 +98,7 @@ class WindowsRecommendations: "starting_u_value": None, "new_u_value": None, "sap_points": None, - "is_override": is_override, + "already_installed": already_installed, **cost_result, "is_secondary_glazing": is_secondary_glazing } From c58389a26695d863d003a4cf2c9f26515f9898ea Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sun, 14 Apr 2024 14:57:46 +0100 Subject: [PATCH 224/248] updated push to db --- backend/app/db/functions/recommendations_functions.py | 2 +- backend/app/db/models/recommendations.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/app/db/functions/recommendations_functions.py b/backend/app/db/functions/recommendations_functions.py index 43daec77..b22ce92f 100644 --- a/backend/app/db/functions/recommendations_functions.py +++ b/backend/app/db/functions/recommendations_functions.py @@ -86,7 +86,7 @@ def upload_recommendations(session: Session, recommendations_to_upload, property "total_work_hours": rec["labour_hours"], "energy_cost_savings": rec["energy_cost_savings"], "labour_days": rec["labour_days"], - "is_override": rec["is_override"], + "already_installed": rec["already_installed"], } for rec in recommendations_to_upload ] diff --git a/backend/app/db/models/recommendations.py b/backend/app/db/models/recommendations.py index be5ff30c..186f87a8 100644 --- a/backend/app/db/models/recommendations.py +++ b/backend/app/db/models/recommendations.py @@ -30,7 +30,7 @@ class Recommendation(Base): rental_yield_increase = Column(Float) total_work_hours = Column(Float) labour_days = Column(Float) - is_override = Column(Boolean, nullable=False, default=False) + already_installed = Column(Boolean, nullable=False, default=False) class RecommendationMaterials(Base): From f1e3bca9bff0c68ba9ce068c91a91268da794cb0 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sun, 14 Apr 2024 14:59:30 +0100 Subject: [PATCH 225/248] updated asset list for immo to reference already installed --- etl/customers/immo/pilot/asset_list.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/etl/customers/immo/pilot/asset_list.py b/etl/customers/immo/pilot/asset_list.py index 07ebe884..d8839924 100644 --- a/etl/customers/immo/pilot/asset_list.py +++ b/etl/customers/immo/pilot/asset_list.py @@ -48,7 +48,7 @@ patches = [ # This is information that is found as a result of the non-invasives, that mean that certain measures # have been installed already. To reflect this in the front end, it is included in the recommendation, however # the cost is removed and instead, a message is presented saying that the measure is already installed. -overrides = [ +already_installed = [ { 'address': '5 Oaklands', 'postcode': 'B62 0JA', @@ -87,11 +87,11 @@ def app(): ) # Store overrides in s3 - overrides_filename = f"{USER_ID}/{PORTFOLIO_ID}/overrides.json" + already_installed_filename = f"{USER_ID}/{PORTFOLIO_ID}/already_installed.json" save_csv_to_s3( - dataframe=pd.DataFrame(overrides), + dataframe=pd.DataFrame(already_installed), bucket_name="retrofit-plan-inputs-dev", - file_name=overrides_filename + file_name=already_installed_filename ) # Store patches in s3 @@ -109,7 +109,7 @@ def app(): "goal": "Increase EPC", "goal_value": "C", "trigger_file_path": filename, - "overrides_file_path": overrides_filename, + "already_installed_file_path": already_installed_filename, "patches_file_path": patches_filename, "budget": None, } @@ -122,7 +122,7 @@ def app(): "goal": "Increase EPC", "goal_value": "B", "trigger_file_path": filename, - "overrides_file_path": overrides_filename, + "already_installed_file_path": already_installed_filename, "patches_file_path": patches_filename, "budget": None, } From 046ac3dc39bc7c478a91fcaa58bddc30508c5166 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sun, 14 Apr 2024 15:05:31 +0100 Subject: [PATCH 226/248] fixed bug in already installed --- etl/customers/immo/pilot/asset_list.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/etl/customers/immo/pilot/asset_list.py b/etl/customers/immo/pilot/asset_list.py index d8839924..e587cc25 100644 --- a/etl/customers/immo/pilot/asset_list.py +++ b/etl/customers/immo/pilot/asset_list.py @@ -52,7 +52,7 @@ already_installed = [ { 'address': '5 Oaklands', 'postcode': 'B62 0JA', - "overrides": ["windows_glazing"] + "already_installed": ["windows_glazing"] } ] From 56bf3c121fbc0d4bb31a5e1b073b80daac7dba51 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 15 Apr 2024 13:31:46 +0100 Subject: [PATCH 227/248] Adding cdn to terraform --- infrastructure/terraform/main.tf | 9 +++ .../terraform/modules/cloudfront/main.tf | 65 +++++++++++++++++++ .../terraform/modules/cloudfront/variables.tf | 9 +++ 3 files changed, 83 insertions(+) create mode 100644 infrastructure/terraform/modules/cloudfront/main.tf create mode 100644 infrastructure/terraform/modules/cloudfront/variables.tf diff --git a/infrastructure/terraform/main.tf b/infrastructure/terraform/main.tf index d545cdf8..1d0562dd 100644 --- a/infrastructure/terraform/main.tf +++ b/infrastructure/terraform/main.tf @@ -181,4 +181,13 @@ module "lambda_carbon_prediction_ecr" { module "lambda_heat_prediction_ecr" { ecr_name = "lambda-heat-prediction-${var.stage}" source = "./modules/ecr" +} + +############################################## +# CDN - Cloudfront +############################################## +module "cloudfront_distribution" { + source = "./modules/cloudfront" + bucket_name = module.s3.bucket_name + stage = var.stage } \ No newline at end of file diff --git a/infrastructure/terraform/modules/cloudfront/main.tf b/infrastructure/terraform/modules/cloudfront/main.tf new file mode 100644 index 00000000..fbb88160 --- /dev/null +++ b/infrastructure/terraform/modules/cloudfront/main.tf @@ -0,0 +1,65 @@ +resource "aws_cloudfront_distribution" "s3_distribution" { + origin { + domain_name = "${aws_s3_bucket.bucket.bucket_regional_domain_name}" + origin_id = "S3-${var.bucket_name}" + + s3_origin_config { + origin_access_identity = aws_cloudfront_origin_access_identity.oai.cloudfront_access_identity_path + } + } + + enabled = true + + default_cache_behavior { + allowed_methods = ["GET", "HEAD"] + cached_methods = ["GET", "HEAD"] + target_origin_id = "S3-${var.bucket_name}" + viewer_protocol_policy = "redirect-to-https" + compress = true + + forwarded_values { + query_string = false + cookies { + forward = "none" + } + } + + min_ttl = 0 + default_ttl = 86400 + max_ttl = 31536000 + } + + price_class = "PriceClass_All" + + restrictions { + geo_restriction { + restriction_type = "none" + } + } + + viewer_certificate { + cloudfront_default_certificate = true + } +} + +resource "aws_cloudfront_origin_access_identity" "oai" { + comment = "OAI for ${var.bucket_name}" +} + +resource "aws_s3_bucket_policy" "bucket_policy" { + bucket = aws_s3_bucket.bucket.id + + policy = jsonencode({ + Version = "2012-10-17" + Statement = [ + { + Effect = "Allow" + Principal = { + AWS = "arn:aws:iam::cloudfront:user/CloudFront Origin Access Identity ${aws_cloudfront_origin_access_identity.oai.id}" + } + Action = "s3:GetObject" + Resource = "${aws_s3_bucket.bucket.arn}/*" + }, + ] + }) +} diff --git a/infrastructure/terraform/modules/cloudfront/variables.tf b/infrastructure/terraform/modules/cloudfront/variables.tf new file mode 100644 index 00000000..433edc24 --- /dev/null +++ b/infrastructure/terraform/modules/cloudfront/variables.tf @@ -0,0 +1,9 @@ +variable "bucket_name" { + description = "The name of the bucket" + type = string +} + +variable "stage" { + description = "The deployment stage" + type = string +} From ce546b56f7db4a88d82ee3f72148d2b4fe64f1c2 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 15 Apr 2024 13:38:23 +0100 Subject: [PATCH 228/248] passing additional data to cloudfront distribution --- infrastructure/terraform/main.tf | 9 ++++++--- .../terraform/modules/cloudfront/main.tf | 6 +++--- .../terraform/modules/cloudfront/variables.tf | 15 +++++++++++++++ infrastructure/terraform/modules/s3/outputs.tf | 12 ++++++++++++ 4 files changed, 36 insertions(+), 6 deletions(-) diff --git a/infrastructure/terraform/main.tf b/infrastructure/terraform/main.tf index 1d0562dd..fde25487 100644 --- a/infrastructure/terraform/main.tf +++ b/infrastructure/terraform/main.tf @@ -187,7 +187,10 @@ module "lambda_heat_prediction_ecr" { # CDN - Cloudfront ############################################## module "cloudfront_distribution" { - source = "./modules/cloudfront" - bucket_name = module.s3.bucket_name - stage = var.stage + source = "./modules/cloudfront" + bucket_name = module.s3.bucket_name + bucket_id = module.s3.bucket_id + bucket_arn = module.s3.bucket_arn + bucket_domain_name = module.s3.bucket_domain_name + stage = var.stage } \ No newline at end of file diff --git a/infrastructure/terraform/modules/cloudfront/main.tf b/infrastructure/terraform/modules/cloudfront/main.tf index fbb88160..281ff09f 100644 --- a/infrastructure/terraform/modules/cloudfront/main.tf +++ b/infrastructure/terraform/modules/cloudfront/main.tf @@ -1,6 +1,6 @@ resource "aws_cloudfront_distribution" "s3_distribution" { origin { - domain_name = "${aws_s3_bucket.bucket.bucket_regional_domain_name}" + domain_name = var.bucket_domain_name origin_id = "S3-${var.bucket_name}" s3_origin_config { @@ -47,7 +47,7 @@ resource "aws_cloudfront_origin_access_identity" "oai" { } resource "aws_s3_bucket_policy" "bucket_policy" { - bucket = aws_s3_bucket.bucket.id + bucket = var.bucket_id policy = jsonencode({ Version = "2012-10-17" @@ -58,7 +58,7 @@ resource "aws_s3_bucket_policy" "bucket_policy" { AWS = "arn:aws:iam::cloudfront:user/CloudFront Origin Access Identity ${aws_cloudfront_origin_access_identity.oai.id}" } Action = "s3:GetObject" - Resource = "${aws_s3_bucket.bucket.arn}/*" + Resource = "${var.bucket_arn}/*" }, ] }) diff --git a/infrastructure/terraform/modules/cloudfront/variables.tf b/infrastructure/terraform/modules/cloudfront/variables.tf index 433edc24..88f770a8 100644 --- a/infrastructure/terraform/modules/cloudfront/variables.tf +++ b/infrastructure/terraform/modules/cloudfront/variables.tf @@ -7,3 +7,18 @@ variable "stage" { description = "The deployment stage" type = string } + +variable "bucket_id" { + description = "The ID of the S3 bucket" + type = string +} + +variable "bucket_arn" { + description = "The ARN of the S3 bucket" + type = string +} + +variable "bucket_domain_name" { + description = "The regional domain name of the S3 bucket" + type = string +} \ No newline at end of file diff --git a/infrastructure/terraform/modules/s3/outputs.tf b/infrastructure/terraform/modules/s3/outputs.tf index a5e7ddb4..7668dbc4 100644 --- a/infrastructure/terraform/modules/s3/outputs.tf +++ b/infrastructure/terraform/modules/s3/outputs.tf @@ -2,3 +2,15 @@ output "bucket_name" { description = "The name of the S3 bucket" value = aws_s3_bucket.bucket.bucket } + +output "bucket_id" { + value = aws_s3_bucket.bucket.id +} + +output "bucket_arn" { + value = aws_s3_bucket.bucket.arn +} + +output "bucket_domain_name" { + value = aws_s3_bucket.bucket.bucket_regional_domain_name +} \ No newline at end of file From e6f9416c8e4b3452f42c47044503c4fdcd68b7cf Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 15 Apr 2024 14:05:36 +0100 Subject: [PATCH 229/248] upgrade db instance version --- infrastructure/terraform/main.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/infrastructure/terraform/main.tf b/infrastructure/terraform/main.tf index fde25487..55266e10 100644 --- a/infrastructure/terraform/main.tf +++ b/infrastructure/terraform/main.tf @@ -66,7 +66,7 @@ resource "aws_security_group" "allow_db" { resource "aws_db_instance" "default" { allocated_storage = var.allocated_storage engine = "postgres" - engine_version = "14.7" + engine_version = "14.10" instance_class = var.instance_class db_name = var.database_name username = jsondecode(data.aws_secretsmanager_secret_version.db_credentials.secret_string)["db_assessment_model_username"] From 6076eb4f24905ad026c7a0dca9eb3d15f7678a5b Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 15 Apr 2024 14:50:00 +0100 Subject: [PATCH 230/248] building photo upload app --- .idea/Model.iml | 2 +- .idea/misc.xml | 2 +- etl/non_invasive_surveys/photos/README.md | 19 +++ etl/non_invasive_surveys/photos/app.py | 120 ++++++++++++++++++ .../photos/requirements.txt | 3 + 5 files changed, 144 insertions(+), 2 deletions(-) create mode 100644 etl/non_invasive_surveys/photos/README.md create mode 100644 etl/non_invasive_surveys/photos/app.py create mode 100644 etl/non_invasive_surveys/photos/requirements.txt diff --git a/.idea/Model.iml b/.idea/Model.iml index 4413bb06..c75af922 100644 --- a/.idea/Model.iml +++ b/.idea/Model.iml @@ -7,7 +7,7 @@ - + diff --git a/.idea/misc.xml b/.idea/misc.xml index 6f308057..1f2c584d 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -3,7 +3,7 @@ - + diff --git a/etl/non_invasive_surveys/photos/README.md b/etl/non_invasive_surveys/photos/README.md new file mode 100644 index 00000000..9dbe951f --- /dev/null +++ b/etl/non_invasive_surveys/photos/README.md @@ -0,0 +1,19 @@ +# Non Intrusive Surveys - photo upload + +This folder contains photos taken during non-intrusive surveys. Photos are stored in folders named after the survey ID. + +## Getting started + +Install the required packages by running the following command: + +```bash +pip install -r requirements.txt +``` + +## Usage + +The main application is found in the app.py file. To run the application, use the following command: + +```bash +python app.py +``` \ No newline at end of file diff --git a/etl/non_invasive_surveys/photos/app.py b/etl/non_invasive_surveys/photos/app.py new file mode 100644 index 00000000..1b6790f9 --- /dev/null +++ b/etl/non_invasive_surveys/photos/app.py @@ -0,0 +1,120 @@ +import boto3 +from PIL import Image +from pathlib import Path +from dotenv import load_dotenv + +# Inputs +ENV_FILEPATH = "etl/non_invasive_surveys/photos/.env" +PHOTO_DIRECTORY = "/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data" +FOLDER_UPRN_LOOKUP = { + "91 Osprey Drive DY1 2JS": 90048026, + "195 Ashenhurst Rd DY1 2JB": 90051858, + "6 Beech Rd DY1 4BP": 90055152, + "53 Bromley DY5 4PJ": 90060989, + "5 Oaklands B62 0JA": 90028499, + "47 Fairfield Rd DY8 5UJ": 90077535, + "150 Huntingtree Rd B63 4HP": 90093693, + "27 Milton Rd DY1 2JB": 90106884, + "21 Wells Rd DY5 3TB": 90022227, + "8 Corporation Rd DY2 7PX": 90070461 +} + + +def list_subdirectories(directory_path): + """ + List all subdirectories within a given directory. + + :param directory_path: Path to the directory. + :return: A list of paths to the subdirectories. + """ + directory = Path(directory_path) + subdirectories = [subdir for subdir in directory.iterdir() if subdir.is_dir()] + return subdirectories + + +def list_files_in_directory(directory_path, file_extension=".jpg"): + """ + List all files with a specific extension within a given directory and its subdirectories. + + :param directory_path: Path to the directory to scan. + :param file_extension: File extension to filter by. + :return: A list of paths to the files. + """ + # Convert the directory path to a Path object if it's not already one + directory = Path(directory_path) if not isinstance(directory_path, Path) else directory_path + + # List all files of the specified type in the directory and subdirectories + file_list = [file for file in directory.rglob(f'*{file_extension}')] + + return file_list + + +def create_images(input_path): + # Load the image + with Image.open(input_path) as img: + # Create a thumbnail + thumbnail = img.copy() + thumbnail.thumbnail((128, 128), Image.ANTIALIAS) # Resize to 128x128 (or any desired size) + thumbnail.save('thumbnail.jpg') + + # Create a 1080p version + full_hd = img.copy() + full_hd.thumbnail((1920, 1080), Image.ANTIALIAS) # Resize to 1080p + full_hd.save('1080p.jpg') + + # Return paths to the processed images + return 'thumbnail.jpg', '1080p.jpg', input_path + + +def upload_to_s3(bucket_name, file_path, object_name): + s3_client = boto3.client('s3') + s3_client.upload_file(file_path, bucket_name, object_name) + print(f"Uploaded {object_name} to S3 bucket {bucket_name}") + + +def upload_photos_to_s3(bucket_name, photo_paths): + # Upload each photo + for path in photo_paths: + object_name = path.split('/')[-1] # Assuming the path format is folder/filename + upload_to_s3(bucket_name, path, object_name) + + +def generate_cdn_url(distribution_domain, object_name): + return f"https://{distribution_domain}/{object_name}" + + +def process_and_upload_images(input_image_path, bucket_name, distribution_domain): + # Create images + thumbnail, full_hd, original = create_images(input_image_path) + + # Upload images + upload_photos_to_s3(bucket_name, [thumbnail, full_hd, original]) + + # Generate CDN links + cdn_links = [generate_cdn_url(distribution_domain, path.split('/')[-1]) for path in [thumbnail, full_hd, original]] + + return cdn_links + + +def app(): + """ + This application is tasked with uploading the photos, recorded during the non-invasive surveys, to s3 and the + database. + To begin with, this app will simply read the files from the local machine, however we will come up with a more + efficient way to do this in the future. + + :return: + """ + + # List all files in the directory using pathlib + property_directories = list_subdirectories(PHOTO_DIRECTORY) + + # For each property, we want to list all of the photos in the directory + for property_dir in property_directories: + photo_files = list_files_in_directory(property_dir) + + # We now want to convert each file, and upload it to s3 + for photo_filepath in photo_files: + process_and_upload_images( + photo_filepath, "retrofit-datalake-dev", "cdn.retrofit.com" + ) diff --git a/etl/non_invasive_surveys/photos/requirements.txt b/etl/non_invasive_surveys/photos/requirements.txt new file mode 100644 index 00000000..2199a0b4 --- /dev/null +++ b/etl/non_invasive_surveys/photos/requirements.txt @@ -0,0 +1,3 @@ +Pillow +boto3 +python-dotenv \ No newline at end of file From d3a175468330774214e4c7225157dd4481cb60cd Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 15 Apr 2024 15:20:38 +0100 Subject: [PATCH 231/248] modifying photo upload code --- etl/non_invasive_surveys/photos/app.py | 43 ++++++++++++++++++++------ 1 file changed, 34 insertions(+), 9 deletions(-) diff --git a/etl/non_invasive_surveys/photos/app.py b/etl/non_invasive_surveys/photos/app.py index 1b6790f9..ffd993a6 100644 --- a/etl/non_invasive_surveys/photos/app.py +++ b/etl/non_invasive_surveys/photos/app.py @@ -1,4 +1,5 @@ import boto3 +import os from PIL import Image from pathlib import Path from dotenv import load_dotenv @@ -19,6 +20,10 @@ FOLDER_UPRN_LOOKUP = { "8 Corporation Rd DY2 7PX": 90070461 } +load_dotenv(ENV_FILEPATH) +CLOUDFRONT_DISTRIBUTION_DOMAIN_NAME = os.getenv("CLOUDFRONT_DISTRIBUTION_DOMAIN_NAME", None) +CDN_BUCKET_NAME = os.getenv("CDN_BUCKET_NAME", None) + def list_subdirectories(directory_path): """ @@ -49,21 +54,33 @@ def list_files_in_directory(directory_path, file_extension=".jpg"): return file_list -def create_images(input_path): +def create_images(input_path, uprn): + # Need to create local directory if it doesn't exist + os.makedirs(f"non_invasive_photos/{uprn}", exist_ok=True) + # Load the image with Image.open(input_path) as img: + # Define output paths + thumbnail_path = f"non_invasive_photos/{uprn}/thumbnail.jpg" + full_hd_path = f"non_invasive_photos/{uprn}/1080p.jpg" + webp_path = f"non_invasive_photos/{uprn}/webp.webp" # Save as WebP format + # Create a thumbnail thumbnail = img.copy() - thumbnail.thumbnail((128, 128), Image.ANTIALIAS) # Resize to 128x128 (or any desired size) - thumbnail.save('thumbnail.jpg') + thumbnail.thumbnail((128, 128), Image.Resampling.LANCZOS) # High-quality downsampling + thumbnail.save(thumbnail_path, 'JPEG', quality=85) # Save as JPEG with quality setting # Create a 1080p version full_hd = img.copy() - full_hd.thumbnail((1920, 1080), Image.ANTIALIAS) # Resize to 1080p - full_hd.save('1080p.jpg') + full_hd.thumbnail((1920, 1080), Image.Resampling.LANCZOS) + full_hd.save(full_hd_path, 'JPEG', quality=90) # Slightly higher quality for larger image + + # Convert to WebP for better compression + webp = img.copy() + webp.save(webp_path, 'WEBP', quality=90) # Return paths to the processed images - return 'thumbnail.jpg', '1080p.jpg', input_path + return thumbnail_path, full_hd_path, webp_path def upload_to_s3(bucket_name, file_path, object_name): @@ -83,9 +100,9 @@ def generate_cdn_url(distribution_domain, object_name): return f"https://{distribution_domain}/{object_name}" -def process_and_upload_images(input_image_path, bucket_name, distribution_domain): +def process_and_upload_images(uprn, input_image_path, bucket_name, distribution_domain): # Create images - thumbnail, full_hd, original = create_images(input_image_path) + thumbnail, full_hd, original = create_images(str(uprn), input_image_path) # Upload images upload_photos_to_s3(bucket_name, [thumbnail, full_hd, original]) @@ -93,6 +110,10 @@ def process_and_upload_images(input_image_path, bucket_name, distribution_domain # Generate CDN links cdn_links = [generate_cdn_url(distribution_domain, path.split('/')[-1]) for path in [thumbnail, full_hd, original]] + # Delete local files + for path in [thumbnail, full_hd, original]: + os.remove(path) + return cdn_links @@ -112,9 +133,13 @@ def app(): # For each property, we want to list all of the photos in the directory for property_dir in property_directories: photo_files = list_files_in_directory(property_dir) + uprn = FOLDER_UPRN_LOOKUP[property_dir.name] # We now want to convert each file, and upload it to s3 for photo_filepath in photo_files: process_and_upload_images( - photo_filepath, "retrofit-datalake-dev", "cdn.retrofit.com" + uprn=uprn, + input_image_path=photo_filepath, + bucket_name=CDN_BUCKET_NAME, + distribution_domain=CLOUDFRONT_DISTRIBUTION_DOMAIN_NAME ) From 5d3440815d7616bf3af37ca68136a73d610f071a Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 15 Apr 2024 16:33:15 +0100 Subject: [PATCH 232/248] Pushing non-invasive photos to app wip --- .../photos/README.md | 0 .../photos/app.py | 28 +++++++++++-------- .../photos/requirements.txt | 0 3 files changed, 16 insertions(+), 12 deletions(-) rename etl/{non_invasive_surveys => non_intrusive_surveys}/photos/README.md (100%) rename etl/{non_invasive_surveys => non_intrusive_surveys}/photos/app.py (84%) rename etl/{non_invasive_surveys => non_intrusive_surveys}/photos/requirements.txt (100%) diff --git a/etl/non_invasive_surveys/photos/README.md b/etl/non_intrusive_surveys/photos/README.md similarity index 100% rename from etl/non_invasive_surveys/photos/README.md rename to etl/non_intrusive_surveys/photos/README.md diff --git a/etl/non_invasive_surveys/photos/app.py b/etl/non_intrusive_surveys/photos/app.py similarity index 84% rename from etl/non_invasive_surveys/photos/app.py rename to etl/non_intrusive_surveys/photos/app.py index ffd993a6..c531355b 100644 --- a/etl/non_invasive_surveys/photos/app.py +++ b/etl/non_intrusive_surveys/photos/app.py @@ -5,7 +5,7 @@ from pathlib import Path from dotenv import load_dotenv # Inputs -ENV_FILEPATH = "etl/non_invasive_surveys/photos/.env" +ENV_FILEPATH = "etl/non_intrusive_surveys/photos/.env" PHOTO_DIRECTORY = "/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data" FOLDER_UPRN_LOOKUP = { "91 Osprey Drive DY1 2JS": 90048026, @@ -55,25 +55,29 @@ def list_files_in_directory(directory_path, file_extension=".jpg"): def create_images(input_path, uprn): + # Define the base directory path + base_directory = f"non_intrusive_photos/{uprn}" + print(f"Creating directory: {base_directory}") # Debug: print the directory to be created + # Need to create local directory if it doesn't exist - os.makedirs(f"non_invasive_photos/{uprn}", exist_ok=True) + os.makedirs(base_directory, exist_ok=True) + + # Define output paths + thumbnail_path = os.path.join(base_directory, "thumbnail.jpg") + full_hd_path = os.path.join(base_directory, "1080p.jpg") + webp_path = os.path.join(base_directory, "webp.webp") # Save as WebP format # Load the image with Image.open(input_path) as img: - # Define output paths - thumbnail_path = f"non_invasive_photos/{uprn}/thumbnail.jpg" - full_hd_path = f"non_invasive_photos/{uprn}/1080p.jpg" - webp_path = f"non_invasive_photos/{uprn}/webp.webp" # Save as WebP format - # Create a thumbnail thumbnail = img.copy() - thumbnail.thumbnail((128, 128), Image.Resampling.LANCZOS) # High-quality downsampling - thumbnail.save(thumbnail_path, 'JPEG', quality=85) # Save as JPEG with quality setting + thumbnail.thumbnail((128, 128), Image.Resampling.LANCZOS) + thumbnail.save(thumbnail_path, 'JPEG', quality=85) # Create a 1080p version full_hd = img.copy() full_hd.thumbnail((1920, 1080), Image.Resampling.LANCZOS) - full_hd.save(full_hd_path, 'JPEG', quality=90) # Slightly higher quality for larger image + full_hd.save(full_hd_path, 'JPEG', quality=90) # Convert to WebP for better compression webp = img.copy() @@ -102,10 +106,10 @@ def generate_cdn_url(distribution_domain, object_name): def process_and_upload_images(uprn, input_image_path, bucket_name, distribution_domain): # Create images - thumbnail, full_hd, original = create_images(str(uprn), input_image_path) + thumbnail, full_hd, original = create_images(input_image_path, uprn=str(uprn)) # Upload images - upload_photos_to_s3(bucket_name, [thumbnail, full_hd, original]) + upload_photos_to_s3(bucket_name, photo_paths=[thumbnail, full_hd, original]) # Generate CDN links cdn_links = [generate_cdn_url(distribution_domain, path.split('/')[-1]) for path in [thumbnail, full_hd, original]] diff --git a/etl/non_invasive_surveys/photos/requirements.txt b/etl/non_intrusive_surveys/photos/requirements.txt similarity index 100% rename from etl/non_invasive_surveys/photos/requirements.txt rename to etl/non_intrusive_surveys/photos/requirements.txt From d6fa81939d6a0f7752728953250b3554995a5297 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 15 Apr 2024 23:41:24 +0100 Subject: [PATCH 233/248] creating new aggregations for front end --- .idea/Model.iml | 2 +- .idea/misc.xml | 2 +- backend/Property.py | 8 +- .../app/db/functions/portfolio_functions.py | 3 +- backend/app/plan/router.py | 128 +++++++++++++++++- recommendations/Recommendations.py | 11 +- 6 files changed, 146 insertions(+), 8 deletions(-) diff --git a/.idea/Model.iml b/.idea/Model.iml index c75af922..4413bb06 100644 --- a/.idea/Model.iml +++ b/.idea/Model.iml @@ -7,7 +7,7 @@ - + diff --git a/.idea/misc.xml b/.idea/misc.xml index 1f2c584d..6f308057 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -3,7 +3,7 @@ - + diff --git a/backend/Property.py b/backend/Property.py index a8ed9129..7b5a6bc3 100644 --- a/backend/Property.py +++ b/backend/Property.py @@ -142,6 +142,8 @@ class Property: self.current_adjusted_energy = None self.expected_adjusted_energy = None + self.current_energy_bill = None + self.expected_energy_bill = None self.recommendations_scoring_data = [] @@ -892,12 +894,16 @@ class Property: return component_data - def set_adjusted_energy(self, current_adjusted_energy, expected_adjusted_energy): + def set_adjusted_energy( + self, current_adjusted_energy, expected_adjusted_energy, current_energy_bill, expected_energy_bill + ): """ Stores these values for usage later """ self.current_adjusted_energy = current_adjusted_energy self.expected_adjusted_energy = expected_adjusted_energy + self.current_energy_bill = current_energy_bill + self.expected_energy_bill = expected_energy_bill def set_windows_count(self): """ diff --git a/backend/app/db/functions/portfolio_functions.py b/backend/app/db/functions/portfolio_functions.py index ead8280f..69203368 100644 --- a/backend/app/db/functions/portfolio_functions.py +++ b/backend/app/db/functions/portfolio_functions.py @@ -4,7 +4,7 @@ from backend.app.db.models.portfolio import Portfolio def aggregate_portfolio_recommendations( - session, portfolio_id: int, total_valuation_increase: float, labour_days: float + session, portfolio_id: int, total_valuation_increase: float, labour_days: float, aggregated_data: dict ): # Aggregate multiple fields aggregates = ( @@ -27,6 +27,7 @@ def aggregate_portfolio_recommendations( "energy_savings": aggregates.energy_savings or 0, "co2_equivalent_savings": aggregates.co2_equivalent_savings or 0, "energy_cost_savings": aggregates.energy_cost_savings or 0, + **aggregated_data } # Get the portfolio and update the fields diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index 49e14872..b8b2d5c8 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -1,3 +1,4 @@ +import json from datetime import datetime from tqdm import tqdm @@ -57,6 +58,109 @@ def patch_epc(patch, epc_records): return epc_records +def extract_portfolio_aggregation_data( + input_properties, total_valuation_increase, recommendations, new_epc_bands +): + # We aggregate a number of metrics for the portfolio: + # 1) A breakdown of the number of properties in each EPC band + # a) before retrofit + # b) after retrofit + # 2) Number of units + # 3) Co2/unit + # a) before retrofit + # b) after retrofit + # 4) Energy bulls/unit + # a) before retrofit + # b) after retrofit + # 5) Average valuation improvement/unit + # 6) Total cost + # 7) Cost per unit + # 8) £ per CO2 saved + # 9) £ per SAP point + + # We need to construct the underlyind data for this + + # Helper function to reformat the EPC data + def reformat_epc_data(epc_counts): + # Define all possible EPC bands in the required order + epc_bands = ["G", "F", "E", "D", "C", "B", "A"] + + # Create the formatted data list by checking each band in the order + formatted_data = [] + for band in epc_bands: + # Get the count from the dictionary, defaulting to 0 if not present + count = epc_counts.get(band, 0) + # Append the formatted dictionary to the list + formatted_data.append({"name": band, band: count}) + + return formatted_data + + n_units = len(input_properties) + + agg_data = [] + for p in input_properties: + # Get the recommendations for the property + property_recommendations = recommendations.get(p.id, []) + if not property_recommendations: + continue + # Get just the default recommendations + default_recommendations = [r for r in property_recommendations if r["default"]] + + # We can now calculate multiple outputs based on default recommendations + carbon_savings = sum([r["co2_equivalent_savings"] for r in default_recommendations]) + + pre_retrofit_co2 = p.data["co2-emissions-current"] + post_retrofit_co2 = pre_retrofit_co2 - carbon_savings + + pre_retrofit_energy_bill = p.current_energy_bill + post_retrofit_energy_bill = p.expected_energy_bill + + cost = sum([r["total"] for r in default_recommendations]) + sap_point_improvement = sum([r["sap_points"] for r in default_recommendations]) + + agg_data.append({ + "pre_retrofit_epc": p.data["current-energy-rating"], + "post_retrofit_epc": new_epc_bands[p.id], + "pre_retrofit_co2": pre_retrofit_co2, + "post_retrofit_co2": post_retrofit_co2, + "pre_retrofit_energy_bill": pre_retrofit_energy_bill, + "post_retrofit_energy_bill": post_retrofit_energy_bill, + "cost": cost, + "sap_point_improvement": sap_point_improvement + }) + + agg_data = pd.DataFrame(agg_data) + + n_units_to_retrofit = len(agg_data) + + valuation_improvment_per_unit = total_valuation_increase / n_units_to_retrofit + + total_carbon_saved = agg_data["pre_retrofit_co2"].sum() - agg_data["post_retrofit_co2"].sum() + total_sap_points = agg_data["sap_point_improvement"].sum() + + aggregation_data = { + "epc_breakdown_pre_retrofit": json.dumps( + reformat_epc_data(agg_data["pre_retrofit_epc"].value_counts().to_dict()) + ), + "epc_breakdown_post_retrofit": json.dumps( + reformat_epc_data(agg_data["post_retrofit_epc"].value_counts().to_dict()) + ), + "number_of_properties": n_units, + "n_units_to_retrofit": n_units_to_retrofit, + "co2_per_unit_pre_retrofit": agg_data["pre_retrofit_co2"].mean(), + "co2_per_unit_post_retrofit": agg_data["post_retrofit_co2"].mean(), + "energy_bill_per_unit_pre_retrofit": agg_data["pre_retrofit_energy_bill"].mean(), + "energy_bill_per_unit_post_retrofit": agg_data["post_retrofit_energy_bill"].mean(), + "valuation_improvement_per_unit": valuation_improvment_per_unit, + "total_cost": agg_data["cost"].sum(), + "cost_per_unit": agg_data["cost"].mean(), + "cost_per_co2_saved": agg_data["cost"].sum() / total_carbon_saved, + "cost_per_sap_point": agg_data["cost"].sum() / total_sap_points + } + + return aggregation_data + + router = APIRouter( prefix="/plan", tags=["plan"], @@ -243,7 +347,13 @@ async def trigger_plan(body: PlanTriggerRequest): property_instance = [p for p in input_properties if p.id == property_id][0] - recommendations_with_impact, current_adjusted_energy, expected_adjusted_energy = ( + ( + recommendations_with_impact, + current_adjusted_energy, + expected_adjusted_energy, + current_energy_bill, + expected_energy_bill + ) = ( Recommendations.calculate_recommendation_impact( property_instance=property_instance, all_predictions=all_predictions, @@ -254,7 +364,9 @@ async def trigger_plan(body: PlanTriggerRequest): # Store the resulting adjusted energy in the property instance property_instance.set_adjusted_energy( current_adjusted_energy=current_adjusted_energy, - expected_adjusted_energy=expected_adjusted_energy + expected_adjusted_energy=expected_adjusted_energy, + current_energy_bill=current_energy_bill, + expected_energy_bill=expected_energy_bill ) input_measures = prepare_input_measures(recommendations_with_impact, body.goal) @@ -316,6 +428,7 @@ async def trigger_plan(body: PlanTriggerRequest): logger.info("Uploading recommendations to the database") property_valuation_increases = [] session.commit() + new_epc_bands = {} for i in range(0, len(input_properties), BATCH_SIZE): try: # Take a slice of the input_properties list to make a batch @@ -327,6 +440,7 @@ async def trigger_plan(body: PlanTriggerRequest): total_sap_points = sum([r["sap_points"] for r in default_recommendations]) new_sap_points = float(p.data["current-energy-efficiency"]) + total_sap_points new_epc = sap_to_epc(new_sap_points) + new_epc_bands[p.id] = new_epc valuations = PropertyValuation.estimate(property_instance=p, target_epc=new_epc) @@ -392,11 +506,19 @@ async def trigger_plan(body: PlanTriggerRequest): [sum(r["labour_days"] for r in rec_group if r["default"]) for p_id, rec_group in recommendations.items()] )) + aggregated_data = extract_portfolio_aggregation_data( + input_properties=input_properties, + total_valuation_increase=total_valuation_increase, + recommendations=recommendations, + new_epc_bands=new_epc_bands + ) + aggregate_portfolio_recommendations( session, portfolio_id=body.portfolio_id, total_valuation_increase=total_valuation_increase, - labour_days=labour_days + labour_days=labour_days, + aggregated_data=aggregated_data ) # Commit final changes diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py index 68fead16..659b41a8 100644 --- a/recommendations/Recommendations.py +++ b/recommendations/Recommendations.py @@ -281,6 +281,9 @@ class Recommendations: current_adjusted_energy - expected_adjusted_energy ) + current_energy_bill = AnnualBillSavings.calculate_annual_bill(current_adjusted_energy) + expected_energy_bill = AnnualBillSavings.calculate_annual_bill(expected_adjusted_energy) + for recommendations_by_type in property_recommendations: for rec in recommendations_by_type: @@ -355,4 +358,10 @@ class Recommendations: rec["heat_demand"] is None) or (rec["energy_cost_savings"] is None): raise ValueError("sap points, co2 or heat demand is missing") - return property_recommendations, current_adjusted_energy, expected_adjusted_energy + return ( + property_recommendations, + current_adjusted_energy, + expected_adjusted_energy, + current_energy_bill, + expected_energy_bill + ) From cc6277c191dea07ce1a8a26b8083e1eebdd2887b Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 15 Apr 2024 23:52:10 +0100 Subject: [PATCH 234/248] extended outputs --- backend/app/plan/router.py | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index b8b2d5c8..f7a825db 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -115,6 +115,9 @@ def extract_portfolio_aggregation_data( pre_retrofit_energy_bill = p.current_energy_bill post_retrofit_energy_bill = p.expected_energy_bill + pre_retrofit_energy_consumption = p.current_adjusted_energy + post_retrofit_energy_consumption = p.expected_adjusted_energy + cost = sum([r["total"] for r in default_recommendations]) sap_point_improvement = sum([r["sap_points"] for r in default_recommendations]) @@ -125,6 +128,8 @@ def extract_portfolio_aggregation_data( "post_retrofit_co2": post_retrofit_co2, "pre_retrofit_energy_bill": pre_retrofit_energy_bill, "post_retrofit_energy_bill": post_retrofit_energy_bill, + "pre_retrofit_energy_consumption": pre_retrofit_energy_consumption, + "post_retrofit_energy_consumption": post_retrofit_energy_consumption, "cost": cost, "sap_point_improvement": sap_point_improvement }) @@ -138,6 +143,9 @@ def extract_portfolio_aggregation_data( total_carbon_saved = agg_data["pre_retrofit_co2"].sum() - agg_data["post_retrofit_co2"].sum() total_sap_points = agg_data["sap_point_improvement"].sum() + def format_money(amount): + return f"£{amount:,.0f}" + aggregation_data = { "epc_breakdown_pre_retrofit": json.dumps( reformat_epc_data(agg_data["pre_retrofit_epc"].value_counts().to_dict()) @@ -147,15 +155,18 @@ def extract_portfolio_aggregation_data( ), "number_of_properties": n_units, "n_units_to_retrofit": n_units_to_retrofit, - "co2_per_unit_pre_retrofit": agg_data["pre_retrofit_co2"].mean(), - "co2_per_unit_post_retrofit": agg_data["post_retrofit_co2"].mean(), - "energy_bill_per_unit_pre_retrofit": agg_data["pre_retrofit_energy_bill"].mean(), - "energy_bill_per_unit_post_retrofit": agg_data["post_retrofit_energy_bill"].mean(), - "valuation_improvement_per_unit": valuation_improvment_per_unit, - "total_cost": agg_data["cost"].sum(), - "cost_per_unit": agg_data["cost"].mean(), - "cost_per_co2_saved": agg_data["cost"].sum() / total_carbon_saved, - "cost_per_sap_point": agg_data["cost"].sum() / total_sap_points + "co2_per_unit_pre_retrofit": str(round(agg_data["pre_retrofit_co2"].mean(), 2)) + "t", + "co2_per_unit_post_retrofit": str(round(agg_data["post_retrofit_co2"].mean(), 2)) + "t", + "energy_bill_per_unit_pre_retrofit": format_money(agg_data["pre_retrofit_energy_bill"].mean()), + "energy_bill_per_unit_post_retrofit": format_money(agg_data["post_retrofit_energy_bill"].mean()), + "energy_consumption_per_unit_pre_retrofit": str( + round(agg_data["pre_retrofit_energy_consumption"].mean())) + "kWh", + "energy_consumption_per_unit_post_retrofit": str( + round(agg_data["post_retrofit_energy_consumption"].mean())) + "kWh", + "valuation_improvement_per_unit": format_money(valuation_improvment_per_unit), + "cost_per_unit": format_money(agg_data["cost"].mean()), + "cost_per_co2_saved": format_money(agg_data["cost"].sum() / total_carbon_saved), + "cost_per_sap_point": format_money(agg_data["cost"].sum() / total_sap_points) } return aggregation_data From 83d472a7108019fb7ea9f21c9196a5abba154ad0 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 16 Apr 2024 03:05:26 +0100 Subject: [PATCH 235/248] debugging --- backend/app/db/models/portfolio.py | 15 +++++++++++++++ backend/app/plan/router.py | 18 ++++++++++++------ recommendations/Recommendations.py | 3 +++ 3 files changed, 30 insertions(+), 6 deletions(-) diff --git a/backend/app/db/models/portfolio.py b/backend/app/db/models/portfolio.py index 830866e6..aa0146c0 100644 --- a/backend/app/db/models/portfolio.py +++ b/backend/app/db/models/portfolio.py @@ -45,6 +45,21 @@ class Portfolio(Base): labour_days = Column(Float) created_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc)) updated_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc)) + # Aggregations for summary + epc_breakdown_pre_retrofit = Column(Text) + epc_breakdown_post_retrofit = Column(Text) + n_units_to_retrofit = Column(Integer) + co2_per_unit_pre_retrofit = Column(Text) + co2_per_unit_post_retrofit = Column(Text) + energy_bill_per_unit_pre_retrofit = Column(Text) + energy_bill_per_unit_post_retrofit = Column(Text) + energy_consumption_per_unit_pre_retrofit = Column(Text) + energy_consumption_per_unit_post_retrofit = Column(Text) + valuation_improvement_per_unit = Column(Text) + cost_per_unit = Column(Text) + cost_per_co2_saved = Column(Text) + cost_per_sap_point = Column(Text) + valuation_return_on_investment = Column(Text) class PropertyCreationStatus(enum.Enum): diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index f7a825db..661858b7 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -99,10 +99,9 @@ def extract_portfolio_aggregation_data( agg_data = [] for p in input_properties: - # Get the recommendations for the property + # Get the recommendations for the property - we include all properties, even ones without recommendations property_recommendations = recommendations.get(p.id, []) - if not property_recommendations: - continue + # Get just the default recommendations default_recommendations = [r for r in property_recommendations if r["default"]] @@ -113,11 +112,16 @@ def extract_portfolio_aggregation_data( post_retrofit_co2 = pre_retrofit_co2 - carbon_savings pre_retrofit_energy_bill = p.current_energy_bill - post_retrofit_energy_bill = p.expected_energy_bill + post_retrofit_energy_bill = p.current_energy_bill - sum( + [r["energy_cost_savings"] for r in default_recommendations] + ) pre_retrofit_energy_consumption = p.current_adjusted_energy - post_retrofit_energy_consumption = p.expected_adjusted_energy + post_retrofit_energy_consumption = p.current_adjusted_energy - sum( + [r["adjusted_heat_demand"] for r in default_recommendations] + ) + # Add up energy savings cost = sum([r["total"] for r in default_recommendations]) sap_point_improvement = sum([r["sap_points"] for r in default_recommendations]) @@ -166,7 +170,9 @@ def extract_portfolio_aggregation_data( "valuation_improvement_per_unit": format_money(valuation_improvment_per_unit), "cost_per_unit": format_money(agg_data["cost"].mean()), "cost_per_co2_saved": format_money(agg_data["cost"].sum() / total_carbon_saved), - "cost_per_sap_point": format_money(agg_data["cost"].sum() / total_sap_points) + "cost_per_sap_point": format_money(agg_data["cost"].sum() / total_sap_points), + "valuation_return_on_investment": str(round(total_valuation_increase / agg_data["cost"].sum(), 2)) + # TODO: Could we add 10yr carbon credits value? } return aggregation_data diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py index 659b41a8..e626ecfa 100644 --- a/recommendations/Recommendations.py +++ b/recommendations/Recommendations.py @@ -272,6 +272,8 @@ class Recommendations: current_epc_rating=property_instance.data["current-energy-rating"], ) + # TODO: This isn't quite right as this is based on EVERY possible measure, not just the ones that are + # actually implemented expected_adjusted_energy = AnnualBillSavings.adjust_energy_to_metered( epc_energy_consumption=expected_heat_demand, current_epc_rating=property_instance.data["current-energy-rating"], @@ -281,6 +283,7 @@ class Recommendations: current_adjusted_energy - expected_adjusted_energy ) + # TODO: We should determine if the home is gas & electricity or just electricity current_energy_bill = AnnualBillSavings.calculate_annual_bill(current_adjusted_energy) expected_energy_bill = AnnualBillSavings.calculate_annual_bill(expected_adjusted_energy) From 0f7e815379eacb6d76100a25186cd38e23d9b8c3 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 16 Apr 2024 11:18:36 +0100 Subject: [PATCH 236/248] updating text for valuation improvement --- backend/app/plan/router.py | 49 +++++++++++++++++++++++++++++++------- 1 file changed, 41 insertions(+), 8 deletions(-) diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index 661858b7..45d87dd3 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -59,7 +59,7 @@ def patch_epc(patch, epc_records): def extract_portfolio_aggregation_data( - input_properties, total_valuation_increase, recommendations, new_epc_bands + input_properties, total_valuation_increase, recommendations, new_epc_bands, property_value_increase_ranges ): # We aggregate a number of metrics for the portfolio: # 1) A breakdown of the number of properties in each EPC band @@ -69,7 +69,7 @@ def extract_portfolio_aggregation_data( # 3) Co2/unit # a) before retrofit # b) after retrofit - # 4) Energy bulls/unit + # 4) Energy bill/unit # a) before retrofit # b) after retrofit # 5) Average valuation improvement/unit @@ -105,6 +105,8 @@ def extract_portfolio_aggregation_data( # Get just the default recommendations default_recommendations = [r for r in property_recommendations if r["default"]] + has_recommendations = len(default_recommendations) > 0 + # We can now calculate multiple outputs based on default recommendations carbon_savings = sum([r["co2_equivalent_savings"] for r in default_recommendations]) @@ -125,6 +127,15 @@ def extract_portfolio_aggregation_data( cost = sum([r["total"] for r in default_recommendations]) sap_point_improvement = sum([r["sap_points"] for r in default_recommendations]) + lower_bound_valuation_uplift = ( + property_value_increase_ranges[p.id]["lower_bound_increased_value"] - + property_value_increase_ranges[p.id]["current_value"] + ) + upper_bound_valuation_uplift = ( + property_value_increase_ranges[p.id]["upper_bound_increased_value"] - + property_value_increase_ranges[p.id]["current_value"] + ) + agg_data.append({ "pre_retrofit_epc": p.data["current-energy-rating"], "post_retrofit_epc": new_epc_bands[p.id], @@ -135,14 +146,22 @@ def extract_portfolio_aggregation_data( "pre_retrofit_energy_consumption": pre_retrofit_energy_consumption, "post_retrofit_energy_consumption": post_retrofit_energy_consumption, "cost": cost, - "sap_point_improvement": sap_point_improvement + "sap_point_improvement": sap_point_improvement, + "lower_bound_valuation_uplift": lower_bound_valuation_uplift, + "upper_bound_valuation_uplift": upper_bound_valuation_uplift, + "has_recommendations": has_recommendations }) agg_data = pd.DataFrame(agg_data) - n_units_to_retrofit = len(agg_data) + n_units_to_retrofit = agg_data["has_recommendations"].sum() - valuation_improvment_per_unit = total_valuation_increase / n_units_to_retrofit + valuation_improvement_lower_bound_per_unit = ( + agg_data["lower_bound_valuation_uplift"].mean() + ) + valuation_improvement_upper_bound_per_unit = ( + agg_data["upper_bound_valuation_uplift"].mean() + ) total_carbon_saved = agg_data["pre_retrofit_co2"].sum() - agg_data["post_retrofit_co2"].sum() total_sap_points = agg_data["sap_point_improvement"].sum() @@ -150,6 +169,17 @@ def extract_portfolio_aggregation_data( def format_money(amount): return f"£{amount:,.0f}" + valuation_improvment_per_unit = format_money( + total_valuation_increase / n_units) + (f" ({format_money(valuation_improvement_lower_bound_per_unit)} - " + f"{format_money(valuation_improvement_upper_bound_per_unit)})") + + valuation_return_on_investment = ( + str(round(total_valuation_increase / agg_data["cost"].sum(), 2)) + + f" (" + f"{agg_data['lower_bound_valuation_uplift'].sum() / agg_data['cost'].sum():,.2f} - " + f"{agg_data['upper_bound_valuation_uplift'].sum() / agg_data['cost'].sum():,.2f})" + ) + aggregation_data = { "epc_breakdown_pre_retrofit": json.dumps( reformat_epc_data(agg_data["pre_retrofit_epc"].value_counts().to_dict()) @@ -167,11 +197,11 @@ def extract_portfolio_aggregation_data( round(agg_data["pre_retrofit_energy_consumption"].mean())) + "kWh", "energy_consumption_per_unit_post_retrofit": str( round(agg_data["post_retrofit_energy_consumption"].mean())) + "kWh", - "valuation_improvement_per_unit": format_money(valuation_improvment_per_unit), + "valuation_improvement_per_unit": valuation_improvment_per_unit, "cost_per_unit": format_money(agg_data["cost"].mean()), "cost_per_co2_saved": format_money(agg_data["cost"].sum() / total_carbon_saved), "cost_per_sap_point": format_money(agg_data["cost"].sum() / total_sap_points), - "valuation_return_on_investment": str(round(total_valuation_increase / agg_data["cost"].sum(), 2)) + "valuation_return_on_investment": valuation_return_on_investment, # TODO: Could we add 10yr carbon credits value? } @@ -446,6 +476,7 @@ async def trigger_plan(body: PlanTriggerRequest): property_valuation_increases = [] session.commit() new_epc_bands = {} + property_value_increase_ranges = {} for i in range(0, len(input_properties), BATCH_SIZE): try: # Take a slice of the input_properties list to make a batch @@ -460,6 +491,7 @@ async def trigger_plan(body: PlanTriggerRequest): new_epc_bands[p.id] = new_epc valuations = PropertyValuation.estimate(property_instance=p, target_epc=new_epc) + property_value_increase_ranges[p.id] = valuations # Your existing operations property_details_epc = p.get_property_details_epc( @@ -527,7 +559,8 @@ async def trigger_plan(body: PlanTriggerRequest): input_properties=input_properties, total_valuation_increase=total_valuation_increase, recommendations=recommendations, - new_epc_bands=new_epc_bands + new_epc_bands=new_epc_bands, + property_value_increase_ranges=property_value_increase_ranges ) aggregate_portfolio_recommendations( From 02399667798370cab35608dc5edac17db7de1960 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 16 Apr 2024 11:32:15 +0100 Subject: [PATCH 237/248] setting up non-invasive recommendations --- etl/customers/immo/pilot/asset_list.py | 29 +++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/etl/customers/immo/pilot/asset_list.py b/etl/customers/immo/pilot/asset_list.py index e587cc25..614fa8a0 100644 --- a/etl/customers/immo/pilot/asset_list.py +++ b/etl/customers/immo/pilot/asset_list.py @@ -21,6 +21,7 @@ council_tax_bands = pd.DataFrame(council_tax_bands) # This is information we need to override on the EPC itself, for instance if a new survey has been conducted and # that has not reached the API +# For 53 Bromley, the non-invasives found the walls to be partially filled patches = [ { 'address': '6 Beech Road', 'postcode': 'DY1 4BP', @@ -42,7 +43,11 @@ patches = [ 'energy-consumption-current': '491', 'co2-emissions-current': '5.0', 'potential-energy-efficiency': '87' - } + }, + { + 'address': '53 Bromley', 'postcode': 'DY5 4PJ', + 'walls-description': 'Cavity wall, partial insulation', + }, ] # This is information that is found as a result of the non-invasives, that mean that certain measures @@ -56,6 +61,19 @@ already_installed = [ } ] +non_invasive_recommendations = [ + {'address': '8 Corporation Road', 'postcode': 'DY2 7PX', 'recommendations': []}, + {'address': '21 Wells Road', 'postcode': 'DY5 3TB', 'recommendations': ['cavity_extract_and_refill']}, + {'address': '27 Milton Road', 'postcode': 'WV14 8HZ', 'recommendations': ['cavity_extract_and_refill']}, + {'address': '195 Ashenhurst Road', 'postcode': 'DY1 2JB', 'recommendations': ['cavity_extract_and_refill']}, + {'address': '53 Bromley', 'postcode': 'DY5 4PJ', 'recommendations': ['cavity_surveyed_as_filled_is_partial']}, + {'address': '91 Osprey Drive', 'postcode': 'DY1 2JS', 'recommendations': ['cavity_extract_and_refill']}, + {'address': '47 Fairfield Road', 'postcode': 'DY8 5UJ', 'recommendations': ['cavity_extract_and_refill']}, + {'address': '150 Huntingtree Road', 'postcode': 'B63 4HP', 'recommendations': ['cavity_extract_and_refill']}, + {'address': '6 Beech Road', 'postcode': 'DY1 4BP', 'recommendations': []}, + {'address': '5 Oaklands', 'postcode': 'B62 0JA', 'recommendations': ['cavity_extract_and_refill']}, +] + def app(): raw_asset_list = read_excel_from_s3( @@ -102,6 +120,14 @@ def app(): file_name=patches_filename ) + # Store non-invasive recommendations in S3 + non_invasive_recommendations_filename = f"{USER_ID}/{PORTFOLIO_ID}/non_invasive_recommendations.json" + save_csv_to_s3( + dataframe=pd.DataFrame(non_invasive_recommendations), + bucket_name="retrofit-plan-inputs-dev", + file_name=non_invasive_recommendations_filename + ) + # EPC C portoflio body = { "portfolio_id": str(PORTFOLIO_ID), @@ -111,6 +137,7 @@ def app(): "trigger_file_path": filename, "already_installed_file_path": already_installed_filename, "patches_file_path": patches_filename, + "non_invasive_recommendations_file_path": non_invasive_recommendations_filename, "budget": None, } print(body) From b3e7675488b7004cc98f171b8d78793188345148 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 16 Apr 2024 11:38:58 +0100 Subject: [PATCH 238/248] added non-invasive recommendations to property class --- backend/Property.py | 7 ++++++- backend/app/plan/router.py | 13 +++++++++++++ backend/app/plan/schemas.py | 1 + etl/customers/immo/pilot/asset_list.py | 1 + 4 files changed, 21 insertions(+), 1 deletion(-) diff --git a/backend/Property.py b/backend/Property.py index 7b5a6bc3..2d1dbd5d 100644 --- a/backend/Property.py +++ b/backend/Property.py @@ -61,7 +61,8 @@ class Property: n_bedrooms = None def __init__( - self, id, postcode, address, epc_record, already_installed=None, **kwargs + self, id, postcode, address, epc_record, already_installed=None, property_non_invasive_recommendations=None, + **kwargs ): self.epc_record = epc_record @@ -80,6 +81,10 @@ class Property: # cost and instead, provide a message that the measure has already been installed self.already_installed = ast.literal_eval(already_installed['already_installed']) if already_installed else [] + self.non_invasive_recommendations = ( + ast.literal_eval(property_non_invasive_recommendations['recommendations']) if + property_non_invasive_recommendations else [] + ) self.uprn = epc_record.get("uprn") self.full_sap_epc = epc_record.get("full_sap_epc") diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index 45d87dd3..e5a2aa79 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -242,6 +242,12 @@ async def trigger_plan(body: PlanTriggerRequest): bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.already_installed_file_path ) + non_invasive_recommendations = [] + if body.non_invasive_recommendations_file_path: + non_invasive_recommendations = read_csv_from_s3( + bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.non_invasive_recommendations_file_path + ) + cleaning_data = read_dataframe_from_s3_parquet( bucket_name=get_settings().DATA_BUCKET, file_key="sap_change_model/cleaning_dataset.parquet", ) @@ -297,6 +303,12 @@ async def trigger_plan(body: PlanTriggerRequest): x for x in already_installed if (x["address"] == config["address"]) and (x["postcode"] == config["postcode"]) ), {}) + + property_non_invasive_recommendations = next(( + x for x in non_invasive_recommendations if + (x["address"] == config["address"]) and (x["postcode"] == config["postcode"]) + ), {}) + input_properties.append( Property( id=property_id, @@ -304,6 +316,7 @@ async def trigger_plan(body: PlanTriggerRequest): postcode=epc_searcher.postcode_clean, epc_record=prepared_epc, already_installed=property_already_installed, + non_invasive_recommendations=property_non_invasive_recommendations, **Property.extract_kwargs(config) ) ) diff --git a/backend/app/plan/schemas.py b/backend/app/plan/schemas.py index 76eb49d2..59c0ebef 100644 --- a/backend/app/plan/schemas.py +++ b/backend/app/plan/schemas.py @@ -11,6 +11,7 @@ class PlanTriggerRequest(BaseModel): trigger_file_path: str already_installed_file_path: Optional[str] = None patches_file_path: Optional[str] = None + non_invasive_recommendations_file_path: Optional[str] = None exclusions: Optional[conlist(str, min_items=1)] = None # Pre-defined list of possibilities for exclusions diff --git a/etl/customers/immo/pilot/asset_list.py b/etl/customers/immo/pilot/asset_list.py index 614fa8a0..57fa5957 100644 --- a/etl/customers/immo/pilot/asset_list.py +++ b/etl/customers/immo/pilot/asset_list.py @@ -151,6 +151,7 @@ def app(): "trigger_file_path": filename, "already_installed_file_path": already_installed_filename, "patches_file_path": patches_filename, + "non_invasive_recommendations_file_path": non_invasive_recommendations_filename, "budget": None, } print(body) From 0c1fb0360fa1473d4123e3a41c3a82f65d9a3512 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 16 Apr 2024 11:50:02 +0100 Subject: [PATCH 239/248] fixed patching of partial cwi description --- backend/app/plan/router.py | 2 ++ etl/customers/immo/pilot/asset_list.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index e5a2aa79..7200d2ef 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -52,6 +52,8 @@ def patch_epc(patch, epc_records): """ for patch_variable, patch_value in patch.items(): + if patch_value == "": + continue if patch_variable in epc_records["original_epc"]: epc_records["original_epc"][patch_variable] = patch_value diff --git a/etl/customers/immo/pilot/asset_list.py b/etl/customers/immo/pilot/asset_list.py index 57fa5957..6329a2be 100644 --- a/etl/customers/immo/pilot/asset_list.py +++ b/etl/customers/immo/pilot/asset_list.py @@ -46,7 +46,7 @@ patches = [ }, { 'address': '53 Bromley', 'postcode': 'DY5 4PJ', - 'walls-description': 'Cavity wall, partial insulation', + 'walls-description': 'Cavity wall, partial insulation (assumed)', }, ] From 4cf4d67ac91610d19e418aa33ae794a37c1be505 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 16 Apr 2024 13:21:14 +0100 Subject: [PATCH 240/248] Added cavity extraction and re-fill recommendation and costing --- backend/Property.py | 17 ++++++++++++++--- backend/app/plan/router.py | 14 ++++++++------ recommendations/Costs.py | 13 ++++++++++++- recommendations/Recommendations.py | 19 ++++++++++++++----- recommendations/WallRecommendations.py | 17 +++++++++++++++-- 5 files changed, 63 insertions(+), 17 deletions(-) diff --git a/backend/Property.py b/backend/Property.py index 2d1dbd5d..2e6cbbb6 100644 --- a/backend/Property.py +++ b/backend/Property.py @@ -61,7 +61,7 @@ class Property: n_bedrooms = None def __init__( - self, id, postcode, address, epc_record, already_installed=None, property_non_invasive_recommendations=None, + self, id, postcode, address, epc_record, already_installed=None, non_invasive_recommendations=None, **kwargs ): @@ -82,8 +82,8 @@ class Property: self.already_installed = ast.literal_eval(already_installed['already_installed']) if already_installed else [] self.non_invasive_recommendations = ( - ast.literal_eval(property_non_invasive_recommendations['recommendations']) if - property_non_invasive_recommendations else [] + ast.literal_eval(non_invasive_recommendations['recommendations']) if + non_invasive_recommendations else [] ) self.uprn = epc_record.get("uprn") @@ -284,6 +284,7 @@ class Property: recommendation_record=recommendation_record, recommendations=previous_phase_representatives + [rec], primary_recommendation_id=rec["recommendation_id"], + non_invasive_recommendations=self.non_invasive_recommendations, ) self.recommendations_scoring_data.append(scoring_dict) @@ -293,6 +294,7 @@ class Property: recommendation_record, recommendations: list, primary_recommendation_id: int, + non_invasive_recommendations: list = None, ): """ This function will iterate through a list of recommendations and apply a simulation for each recommendation @@ -301,10 +303,12 @@ class Property: :param recommendation_record: The record of the property, which will be updated :param recommendations: The list of recommendations to apply :param primary_recommendation_id: The id of the primary recommendation, which is used to identify the record + :param non_invasive_recommendations: The list of non-invasive recommendations :return: The updated recommendation record """ output = recommendation_record.copy() + non_invasive_recommendations = [] if non_invasive_recommendations is None else non_invasive_recommendations for col in [ "walls_insulation_thickness", @@ -323,6 +327,13 @@ class Property: "external_wall_insulation", "cavity_wall_insulation", ]: + + # # If we have a non-incasive recommendation that the cavity wall is partially filled, we skip the + # # cavity wall insulation recommendation (since on the EPC, the property will look like how it did + # # before any works) + # if "cavity_surveyed_as_filled_is_partial" in non_invasive_recommendations: + # continue + # The upgrade made here is to the u-value of the walls and the description of the # insulation thickness output["walls_thermal_transmittance_ending"] = recommendation[ diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index 7200d2ef..9854abe8 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -171,11 +171,13 @@ def extract_portfolio_aggregation_data( def format_money(amount): return f"£{amount:,.0f}" - valuation_improvment_per_unit = format_money( - total_valuation_increase / n_units) + (f" ({format_money(valuation_improvement_lower_bound_per_unit)} - " - f"{format_money(valuation_improvement_upper_bound_per_unit)})") + valuation_improvment_per_unit = str( + format_money( + total_valuation_increase / n_units) + (f" ({format_money(valuation_improvement_lower_bound_per_unit)} - " + f"{format_money(valuation_improvement_upper_bound_per_unit)})") + ) - valuation_return_on_investment = ( + valuation_return_on_investment = str( str(round(total_valuation_increase / agg_data["cost"].sum(), 2)) + f" (" f"{agg_data['lower_bound_valuation_uplift'].sum() / agg_data['cost'].sum():,.2f} - " @@ -189,8 +191,8 @@ def extract_portfolio_aggregation_data( "epc_breakdown_post_retrofit": json.dumps( reformat_epc_data(agg_data["post_retrofit_epc"].value_counts().to_dict()) ), - "number_of_properties": n_units, - "n_units_to_retrofit": n_units_to_retrofit, + "number_of_properties": int(n_units), + "n_units_to_retrofit": int(n_units_to_retrofit), "co2_per_unit_pre_retrofit": str(round(agg_data["pre_retrofit_co2"].mean(), 2)) + "t", "co2_per_unit_post_retrofit": str(round(agg_data["post_retrofit_co2"].mean(), 2)) + "t", "energy_bill_per_unit_pre_retrofit": format_money(agg_data["pre_retrofit_energy_bill"].mean()), diff --git a/recommendations/Costs.py b/recommendations/Costs.py index 0e67b352..852bb11f 100644 --- a/recommendations/Costs.py +++ b/recommendations/Costs.py @@ -91,6 +91,10 @@ DOUBLE_RADIATOR_COST = 300 FLUE_COST = 600 PIPEWORK_COST = 750 # Min cost is £500 +# This is the cost per meter squared for cavity extraction +# https://www.checkatrade.com/blog/cost-guides/cavity-wall-insulation-removal-cost/ +CAVITY_EXTRACTION_COST = 21.5 + class Costs: """ @@ -173,7 +177,7 @@ class Costs: if not self.labour_adjustment_factor: raise ValueError("Labour adjustment factor not found") - def cavity_wall_insulation(self, wall_area, material): + def cavity_wall_insulation(self, wall_area, material, is_extraction_and_refill=False): """ Calculates the total cost for cavity wall insulation based on material and labor costs, including contingency, preliminaries, profit, and VAT. @@ -208,6 +212,13 @@ class Costs: # Assume a team of 2 labour_days = (labour_hours / 8) / 2 + if is_extraction_and_refill: + # bump up the cost of the work + total_cost = total_cost + CAVITY_EXTRACTION_COST * wall_area + # Additional 2 days work + labour_hours = labour_hours + (2 * 8) + labour_days = labour_days + 2 + return { "total": total_cost, "subtotal": subtotal_before_vat, diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py index e626ecfa..5960d7be 100644 --- a/recommendations/Recommendations.py +++ b/recommendations/Recommendations.py @@ -149,12 +149,14 @@ class Recommendations: property_recommendations = self.insert_temp_recommendation_id(property_recommendations) # We also need to create the representative recommendations for each recommendation type - property_representative_recommendations = self.create_representative_recommendations(property_recommendations) + property_representative_recommendations = self.create_representative_recommendations( + property_recommendations, non_invasive_recommendations=self.property_instance.non_invasive_recommendations + ) return property_recommendations, property_representative_recommendations @staticmethod - def create_representative_recommendations(property_recommendations): + def create_representative_recommendations(property_recommendations, non_invasive_recommendations): """ This method will create a representative recommendation for each recommendation type In order to create a representative recommendation, we choose the recommendation that has: @@ -169,6 +171,13 @@ class Recommendations: for recommendations_by_type in property_recommendations: + # If the property was initially surveyed as filled, but the cavity was only partially filled, we don't + # want to include the cavity wall insulation recommendation in the defaults + # if (recommendations_by_type[0].get("type") == "cavity_wall_insulation") and ( + # "cavity_surveyed_as_filled_is_partial" in non_invasive_recommendations + # ): + # continue + if recommendations_by_type[0].get("type") == "mechanical_ventilation": continue @@ -238,13 +247,13 @@ class Recommendations: property_sap_predictions = all_predictions["sap_change_predictions"][ all_predictions["sap_change_predictions"]["property_id"] == str(property_instance.id) - ] + ].copy() property_heat_predictions = all_predictions["heat_demand_predictions"][ all_predictions["heat_demand_predictions"]["property_id"] == str(property_instance.id) - ] + ].copy() property_carbon_predictions = all_predictions["carbon_change_predictions"][ all_predictions["carbon_change_predictions"]["property_id"] == str(property_instance.id) - ] + ].copy() property_recommendations = recommendations[property_instance.id].copy() diff --git a/recommendations/WallRecommendations.py b/recommendations/WallRecommendations.py index feb2620b..20fc453c 100644 --- a/recommendations/WallRecommendations.py +++ b/recommendations/WallRecommendations.py @@ -113,7 +113,9 @@ class WallRecommendations(Definitions): insulation_thickness = self.property.walls["insulation_thickness"] # We check if the wall is already insulated and if so, we exit - if (insulation_thickness in ["average", "above average"]) or self.property.walls["is_filled_cavity"]: + if ((insulation_thickness in ["average", "above average"]) or self.property.walls["is_filled_cavity"]) and ( + "cavity_extract_and_refill" not in self.property.non_invasive_recommendations + ): return if u_value: @@ -216,15 +218,26 @@ class WallRecommendations(Definitions): if new_u_value <= self.BUILDING_REGULATIONS_PART_L_CAVITY_WALL_MAX_U_VALUE: lowest_selected_u_value = update_lowest_selected_u_value(lowest_selected_u_value, new_u_value) + is_extraction_and_refill = "cavity_extract_and_refill" in self.property.non_invasive_recommendations + cost_result = self.costs.cavity_wall_insulation( wall_area=self.property.insulation_wall_area, material=material.to_dict(), + is_extraction_and_refill=is_extraction_and_refill ) already_installed = "cavity_wall_insulation" in self.property.already_installed if already_installed: cost_result = override_costs(cost_result) + if is_extraction_and_refill: + description = f"Extract and refill cavity wall insulation with {material['description']}" + else: + description = self._make_description(material) + + # updated the new u-value with the best possible our installers have + new_u_value = max(0.31, new_u_value) + recommendations.append( { "phase": phase, @@ -237,7 +250,7 @@ class WallRecommendations(Definitions): ) ], "type": "cavity_wall_insulation", - "description": self._make_description(material), + "description": description, "starting_u_value": u_value, "new_u_value": new_u_value, "sap_points": None, From e000c87cad98963e8c734a5cf8990a5a7b713217 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 18 Apr 2024 12:16:13 +0100 Subject: [PATCH 241/248] added patches for immo pilot 2 --- etl/customers/immo/pilot/asset_list_2.py | 126 +++++++++++++++++++++++ 1 file changed, 126 insertions(+) create mode 100644 etl/customers/immo/pilot/asset_list_2.py diff --git a/etl/customers/immo/pilot/asset_list_2.py b/etl/customers/immo/pilot/asset_list_2.py new file mode 100644 index 00000000..f722a490 --- /dev/null +++ b/etl/customers/immo/pilot/asset_list_2.py @@ -0,0 +1,126 @@ +import pandas as pd +from utils.s3 import read_excel_from_s3 +from utils.s3 import save_csv_to_s3 + +USER_ID = 8 +PORTFOLIO_ID = 72 + +# For +patches = [ + { + 'address': '116 Parkes Hall Road', + 'postcode': 'DY1 3RJ', + 'walls-description': 'Cavity wall, filled cavity', + 'walls-energy-eff': 'Average', + 'roof-description': 'Pitched, 270 mm loft insulation', + 'roof-energy-eff': 'Good', + 'windows-description': 'Fully double glazed', + 'windows-energy-eff': 'Good', + 'mainheat-description': 'Boiler and radiators, mains gas', + 'mainheat-energy-eff': 'Good', + 'mainheatcont-description': 'Programmer, room thermostat and TRVs', + 'mainheatc-energy-eff': 'Good', + 'lighting-description': 'Low energy lighting in 27% of fixed outlets', + 'lighting-energy-eff': 'Good', + 'floor-description': 'Solid, no insulation (assumed)', + 'secondheat-description': 'None', + 'current-energy-efficiency': '73', + 'current-energy-rating': 'C', + 'energy-consumption-current': '184', + 'co2-emissions-current': '2.4', + 'potential-energy-efficiency': '88', + 'total-floor-area': '73', + 'construction-age-band': 'England and Wales: 1930-1949', + 'property-type': 'House', + 'built-form': 'Mid-Terrace', + } +] + +# This is information that is found as a result of the non-invasives, that mean that certain measures +# have been installed already. To reflect this in the front end, it is included in the recommendation, however +# the cost is removed and instead, a message is presented saying that the measure is already installed. +already_installed = [] + +non_invasive_recommendations = [] + + +def app(): + raw_asset_list = read_excel_from_s3( + bucket_name="retrofit-datalake-dev", + file_key="customers/Immo/Dudley Asset List - Hestia - pilot2.xlsx", + header_row=0 + ) + + raw_asset_list = raw_asset_list[raw_asset_list["in_pilot"]].copy() + + # Extract address and postcode + raw_asset_list["address"] = raw_asset_list["Full Address"].str.split(",").str[0] + raw_asset_list["postcode"] = raw_asset_list["Full Address"].str.split(",").str[-1].str.strip() + + # We're provided with number of bathrooms and number of bedrooms. + asset_list = raw_asset_list.rename( + columns={ + "No. of Beds": "n_bedrooms", + "No. of WC's": "n_bathrooms" + } + ) + + # Store the asset list in s3 + filename = f"{USER_ID}/{PORTFOLIO_ID}/pilot.csv" + save_csv_to_s3( + dataframe=asset_list, + bucket_name="retrofit-plan-inputs-dev", + file_name=filename + ) + + # Store overrides in s3 + already_installed_filename = f"{USER_ID}/{PORTFOLIO_ID}/already_installed.json" + save_csv_to_s3( + dataframe=pd.DataFrame(already_installed), + bucket_name="retrofit-plan-inputs-dev", + file_name=already_installed_filename + ) + + # Store patches in s3 + patches_filename = f"{USER_ID}/{PORTFOLIO_ID}/patches.json" + save_csv_to_s3( + dataframe=pd.DataFrame(patches), + bucket_name="retrofit-plan-inputs-dev", + file_name=patches_filename + ) + + # Store non-invasive recommendations in S3 + non_invasive_recommendations_filename = f"{USER_ID}/{PORTFOLIO_ID}/non_invasive_recommendations.json" + save_csv_to_s3( + dataframe=pd.DataFrame(non_invasive_recommendations), + bucket_name="retrofit-plan-inputs-dev", + file_name=non_invasive_recommendations_filename + ) + + # EPC C portoflio + body = { + "portfolio_id": str(PORTFOLIO_ID), + "housing_type": "Private", + "goal": "Increase EPC", + "goal_value": "C", + "trigger_file_path": filename, + "already_installed_file_path": already_installed_filename, + "patches_file_path": patches_filename, + "non_invasive_recommendations_file_path": non_invasive_recommendations_filename, + "budget": None, + } + print(body) + + # EPC B portoflio + body = { + "portfolio_id": str(PORTFOLIO_ID + 1), + "housing_type": "Private", + "goal": "Increase EPC", + "goal_value": "B", + "trigger_file_path": filename, + "already_installed_file_path": already_installed_filename, + "patches_file_path": patches_filename, + "non_invasive_recommendations_file_path": non_invasive_recommendations_filename, + "budget": None, + } + print(body) From acada27061d09f47ac76ecd2785c95eb39e741d3 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 18 Apr 2024 15:16:46 +0100 Subject: [PATCH 242/248] rounding up roof coverage % --- backend/SearchEpc.py | 9 +++++++-- backend/app/plan/router.py | 11 +++++++++-- backend/ml_models/Valuation.py | 8 ++++++++ etl/customers/immo/pilot/asset_list_2.py | 21 ++++++++++++++++++--- etl/epc/Record.py | 2 +- recommendations/SolarPvRecommendations.py | 10 +++++++--- 6 files changed, 50 insertions(+), 11 deletions(-) diff --git a/backend/SearchEpc.py b/backend/SearchEpc.py index cc2ee4a9..44178792 100644 --- a/backend/SearchEpc.py +++ b/backend/SearchEpc.py @@ -709,8 +709,13 @@ class SearchEpc: self.full_sap_epc = {} # Finally, set a standardised address 1 and postcode - self.address_clean = self.ordnance_survey_client.address_os - self.postcode_clean = self.ordnance_survey_client.postcode_os + self.address_clean = ( + self.ordnance_survey_client.address_os if self.ordnance_survey_client.address_os else self.address1 + ) + self.postcode_clean = ( + self.ordnance_survey_client.postcode_os if self.ordnance_survey_client.postcode_os else + self.postcode + ) return os_response = self.ordnance_survey_client.get_places_api() diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index 9854abe8..a8464ee6 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -52,6 +52,10 @@ def patch_epc(patch, epc_records): """ for patch_variable, patch_value in patch.items(): + + if patch_variable in ["address", "postcode"]: + continue + if patch_value == "": continue if patch_variable in epc_records["original_epc"]: @@ -268,9 +272,12 @@ async def trigger_plan(body: PlanTriggerRequest): postcode=config["postcode"], uprn=uprn, auth_token=get_settings().EPC_AUTH_TOKEN, - os_api_key=get_settings().ORDNANCE_SURVEY_API_KEY + os_api_key=get_settings().ORDNANCE_SURVEY_API_KEY, ) - epc_searcher.find_property() + epc_searcher.ordnance_survey_client.built_form = config.get("built_form", None) + epc_searcher.ordnance_survey_client.property_type = config.get("property_type", None) + # For the moment, our OS API access is unavailable, so we skip and interpolate + epc_searcher.find_property(skip_os=True) # Create a record in db property_id, is_new = create_property( session, body.portfolio_id, epc_searcher.address_clean, epc_searcher.postcode_clean, epc_searcher.uprn diff --git a/backend/ml_models/Valuation.py b/backend/ml_models/Valuation.py index 251c016a..39ea5a98 100644 --- a/backend/ml_models/Valuation.py +++ b/backend/ml_models/Valuation.py @@ -63,6 +63,14 @@ class PropertyValuation: 90093693: 279_000, # Based on Zoopla 90055152: 149_000, # Based on Zoopla 90028499: 238_000, # Based on Zoopla + # IMMO Dudley Pilot 2- search by going to https://www.zoopla.co.uk/property/uprn/{uprn}/ + 90039318: 177_000, # Based on Zoopla + 90038384: 170_000, # Based on Zoopla + 90105380: 185_000, # Based on Zoopla + 90124001: 165_000, # Based on Zoopla + 90013980: 148_000, # Based on Zoopla + 90087154: 184_000, # Based on Zoopla + 90046817: 167_000, # Based on Zoopla } # We base our valuation uplifts on a number of sources diff --git a/etl/customers/immo/pilot/asset_list_2.py b/etl/customers/immo/pilot/asset_list_2.py index f722a490..121e7a81 100644 --- a/etl/customers/immo/pilot/asset_list_2.py +++ b/etl/customers/immo/pilot/asset_list_2.py @@ -10,6 +10,7 @@ patches = [ { 'address': '116 Parkes Hall Road', 'postcode': 'DY1 3RJ', + 'uprn': '90046817', 'walls-description': 'Cavity wall, filled cavity', 'walls-energy-eff': 'Average', 'roof-description': 'Pitched, 270 mm loft insulation', @@ -21,7 +22,7 @@ patches = [ 'mainheatcont-description': 'Programmer, room thermostat and TRVs', 'mainheatc-energy-eff': 'Good', 'lighting-description': 'Low energy lighting in 27% of fixed outlets', - 'lighting-energy-eff': 'Good', + 'lighting-energy-eff': 'Average', 'floor-description': 'Solid, no insulation (assumed)', 'secondheat-description': 'None', 'current-energy-efficiency': '73', @@ -39,7 +40,11 @@ patches = [ # This is information that is found as a result of the non-invasives, that mean that certain measures # have been installed already. To reflect this in the front end, it is included in the recommendation, however # the cost is removed and instead, a message is presented saying that the measure is already installed. -already_installed = [] +already_installed = [ + { + 'address': '28 Sangwin Road', 'postcode': 'WV14 9EQ', "already_installed": ["loft_insulation"] + } +] non_invasive_recommendations = [] @@ -58,13 +63,23 @@ def app(): raw_asset_list["postcode"] = raw_asset_list["Full Address"].str.split(",").str[-1].str.strip() # We're provided with number of bathrooms and number of bedrooms. + # THe UPRNs are not the official ones asset_list = raw_asset_list.rename( columns={ "No. of Beds": "n_bedrooms", - "No. of WC's": "n_bathrooms" + "No. of WC's": "n_bathrooms", + 'Property Type': 'property_type', + 'Architype': 'built_form' } ) + # Remap the values + asset_list["built_form"] = asset_list["built_form"].map({ + "SEMI DETACHED": "Semi-Detached", + "MID TERRACE": "Mid-Terrace", + "END TERRACE": "End-Terrace", + }) + # Store the asset list in s3 filename = f"{USER_ID}/{PORTFOLIO_ID}/pilot.csv" save_csv_to_s3( diff --git a/etl/epc/Record.py b/etl/epc/Record.py index e74330a2..9a965c6a 100644 --- a/etl/epc/Record.py +++ b/etl/epc/Record.py @@ -191,7 +191,7 @@ class EPCRecord: This method will clean the records using the data processor """ epc_data_processor = EPCDataProcessor( - data=self.epc_record_as_dataframe("prepared_epc"), + data=self.epc_record_as_dataframe("prepared_epc").copy(), run_mode="newdata", cleaning_averages=self.cleaning_data, ) diff --git a/recommendations/SolarPvRecommendations.py b/recommendations/SolarPvRecommendations.py index 58cf9735..b44557ab 100644 --- a/recommendations/SolarPvRecommendations.py +++ b/recommendations/SolarPvRecommendations.py @@ -56,14 +56,18 @@ class SolarPvRecommendations: if not is_valid_property_type or not is_valid_roof_type or not has_no_existing_solar_pv: return + solar_pv_percentage = self.property.solar_pv_percentage + # We round up to the neaest 10% + solar_pv_percentage = np.ceil(solar_pv_percentage * 10) / 10 + # For the solar recommendations, we produce the following scenarios: # 1) Solar panels only, we present a high, medium and low coverage # 2) With and without battery roof_coverage_scenarios = [ - self.property.solar_pv_percentage - 0.1, self.property.solar_pv_percentage, + solar_pv_percentage - 0.1, solar_pv_percentage, ] - if self.property.solar_pv_percentage <= 0.4: - roof_coverage_scenarios.append(self.property.solar_pv_percentage + 0.1) + if solar_pv_percentage <= 0.4: + roof_coverage_scenarios.append(solar_pv_percentage + 0.1) # We make sure we haven't gone too low or high - we allow no more than 60% coverage roof_coverage_scenarios = [v for v in roof_coverage_scenarios if 0 <= v <= 0.6] # If we only have two scenarios, we add a coverage scenario 10% less than the smallest From db2586061598471f182fc338668618dfd4109a61 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 18 Apr 2024 16:01:41 +0100 Subject: [PATCH 243/248] Completed pilot 2 --- etl/customers/immo/pilot/asset_list_2.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/etl/customers/immo/pilot/asset_list_2.py b/etl/customers/immo/pilot/asset_list_2.py index 121e7a81..1b4fad9a 100644 --- a/etl/customers/immo/pilot/asset_list_2.py +++ b/etl/customers/immo/pilot/asset_list_2.py @@ -43,6 +43,15 @@ patches = [ already_installed = [ { 'address': '28 Sangwin Road', 'postcode': 'WV14 9EQ', "already_installed": ["loft_insulation"] + }, + { + 'address': '51 Hillwood Road', 'postcode': 'B62 8NQ', "already_installed": ["loft_insulation"] + }, + { + 'address': '47 Watsons Close', 'postcode': 'DY2 7HL', "already_installed": ["loft_insulation"] + }, + { + 'address': '44 Hatfield Road', 'postcode': 'DY9 7LW', "already_installed": ["loft_insulation"] } ] From 3593b7ae9ebd4245985a2dabc80446b23f00d84e Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 19 Apr 2024 13:54:04 +0100 Subject: [PATCH 244/248] Added boiler upgrade recommendation --- etl/customers/gla_croydon_demo/asset_list.py | 5 ++-- recommendations/Costs.py | 12 ++------ recommendations/HeatingRecommender.py | 31 +++++++++----------- 3 files changed, 20 insertions(+), 28 deletions(-) diff --git a/etl/customers/gla_croydon_demo/asset_list.py b/etl/customers/gla_croydon_demo/asset_list.py index 7dde8926..1655979b 100644 --- a/etl/customers/gla_croydon_demo/asset_list.py +++ b/etl/customers/gla_croydon_demo/asset_list.py @@ -34,8 +34,9 @@ def app(): low_memory=False ) - z = epc_data.groupby(["WALLS_DESCRIPTION", "WALLS_ENERGY_EFF"]).size().reset_index(name="count") - z = z[z["MAINHEAT_DESCRIPTION"] == "Boiler and radiators, mains gas"] + z = epc_data[epc_data["MAINHEAT_DESCRIPTION"] == "Boiler and radiators, mains gas"] + z["HOTWATER_DESCRIPTION"].value_counts() + z["MAIN_FUEL"].value_counts() # Filter on entries where we have a UPRN epc_data = epc_data[~pd.isnull(epc_data["UPRN"])] diff --git a/recommendations/Costs.py b/recommendations/Costs.py index 852bb11f..d7a8ad2f 100644 --- a/recommendations/Costs.py +++ b/recommendations/Costs.py @@ -67,18 +67,12 @@ LOW_CARBON_COMBI_BOILER = 2200 # https://www.greenmatch.co.uk/boilers/35kw-boiler # https://www.greenmatch.co.uk/boilers/40kw-boiler # These are exclusive of installation costs -COMBI_BOILER_COSTS = { +CONDENSING_BOILER_COSTS = { "30kw": 1550, "35kw": 1610, "40kw": 1625 } -CONVENTIONAL_BOILER_COSTS = { - "30kw": 1117, - "35kw": 1546, - "40kw": 1776 -} - # Assumes 3 hours to remove each heater (including re-decorating) ROOM_HEATER_REMOVAL_COST = 120 ROOM_HEATER_REMOVAL_LABOUR_HOURS = 3 @@ -1179,7 +1173,7 @@ class Costs: estimated_radiators = max(total_radiators_based_on_power, base_radiators + additional_radiators) return round(estimated_radiators) - def boiler(self, is_combi, size, exising_room_heaters, system_change, n_heated_rooms, n_rooms): + def boiler(self, size, exising_room_heaters, system_change, n_heated_rooms, n_rooms): """ Based on a basic estimate of median value £2600 to install a low carbon combi boiler First time central heating vosts can als be found here: @@ -1187,7 +1181,7 @@ class Costs: :return: """ - unit_cost = COMBI_BOILER_COSTS[size] if is_combi else CONVENTIONAL_BOILER_COSTS[size] + unit_cost = CONDENSING_BOILER_COSTS[size] # The unit cost is the cost without VAT # We now need to estimate the cost of the works labour_days = 2 diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py index 432dc6a6..2423901a 100644 --- a/recommendations/HeatingRecommender.py +++ b/recommendations/HeatingRecommender.py @@ -312,7 +312,15 @@ class HeatingRecommender: simulation_config = {} boiler_costs = {} boiler_recommendation = {} - if self.property.data["mainheat-energy-eff"] in ["Very Poor", "Poor", "Average"]: + + has_inefficient_space_heating = self.property.data["mainheat-energy-eff"] in ["Very Poor", "Poor", "Average"] + + has_inefficient_mains_water = ( + self.property.hotwater["clean_description"] in ["From main system"] and + self.property.data["hot-water-energy-eff"] in ["Very Poor", "Poor", "Average"] + ) + + if has_inefficient_space_heating or has_inefficient_mains_water: boiler_size = self.estimate_boiler_size( property_type=self.property.data["property-type"], built_form=self.property.data["built-form"], @@ -321,22 +329,12 @@ class HeatingRecommender: num_heated_rooms=self.property.data["number-heated-rooms"], ) - # We recommend a combi boiler under the following conditions - # 1) If there are 4 or fewer rooms (we don't use heqted rooms because none of the rooms could be - # heated if there is no existing heating system). - # 2) There 1 or fewer bathrooms - # Otherwise, we recommend a gas condensing boiler, which will server a larger property, that has multiple - # bathrooms - is_combi = ( - (self.property.number_of_rooms <= 4) and - (self.property.n_bathrooms in [None, 0, 1]) - ) - if is_combi: - description = "Upgrade to a new combi boiler" - else: - description = "Upgrade to a new gas condensing boiler" + description = "Upgrade to a new condensing boiler" - simulation_config = {"mainheat_energy_eff_ending": "Good"} + simulation_config = { + "mainheat_energy_eff_ending": "Good", + "hot_water_energy_eff_ending": "Good" + } if system_change: # Installation of a boiler improves the hot water system so we need to reflect this in # the outcome of the recommendation @@ -363,7 +361,6 @@ class HeatingRecommender: } boiler_costs = self.costs.boiler( - is_combi=is_combi, size=f"{boiler_size}kw", exising_room_heaters=exising_room_heaters, system_change=system_change, From 391cb356ee12270aa9f5a4ffeff6a917f07ff05e Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 19 Apr 2024 14:07:47 +0100 Subject: [PATCH 245/248] debugging recommendation when we have independent boiler upgrade and heating controls --- recommendations/HeatingRecommender.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py index 2423901a..aa5cabdb 100644 --- a/recommendations/HeatingRecommender.py +++ b/recommendations/HeatingRecommender.py @@ -394,9 +394,13 @@ class HeatingRecommender: controls_recommender.recommend(heating_description="Boiler and radiators, mains gas") # We may have 2 recommendations from the heating controls - if not controls_recommender.recommendation: + if not controls_recommender.recommendation and not boiler_recommendation: return + if not system_change and len(boiler_recommendation): + # If there is not a system change, we add the boiler recommendation at point. + self.recommendations.append(boiler_recommendation) + if system_change: # We combine the heating and controls recommendations, in the case of a system change combined_recommendations = [] From 8bd899bcba8739b3232ec254fa799ff8497efb0f Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 19 Apr 2024 16:43:13 +0100 Subject: [PATCH 246/248] debugging structure of heating recommendations --- backend/app/plan/router.py | 1 + recommendations/HeatingRecommender.py | 8 ++++---- recommendations/Recommendations.py | 9 +++++++-- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index a8464ee6..06d1aadf 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -380,6 +380,7 @@ async def trigger_plan(body: PlanTriggerRequest): logger.info("Preparing data for scoring in sap change api") recommendations_scoring_data = pd.DataFrame(recommendations_scoring_data) + recommendations_scoring_data = recommendations_scoring_data.drop( columns=["rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending", "carbon_ending"] diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py index aa5cabdb..fe5cdd46 100644 --- a/recommendations/HeatingRecommender.py +++ b/recommendations/HeatingRecommender.py @@ -399,7 +399,7 @@ class HeatingRecommender: if not system_change and len(boiler_recommendation): # If there is not a system change, we add the boiler recommendation at point. - self.recommendations.append(boiler_recommendation) + self.recommendations.append([boiler_recommendation]) if system_change: # We combine the heating and controls recommendations, in the case of a system change @@ -417,12 +417,12 @@ class HeatingRecommender: combined_recommendations.extend(combined_recommendation) # Overwrite the existing boiler recommendation - self.recommendations.extend(combined_recommendations) + self.recommendations.append(combined_recommendations) else: # We increment the recommendation phase, since the heating controls are separate from the boiler upgrade # but we'll only upgrade if we have a heating recommendation has_heating_recommendation = any( - recommendation["type"] == "heating" for recommendation in self.recommendations + rec["type"] == "heating" for recommendation in self.recommendations for rec in recommendation ) if has_heating_recommendation: recommendation_phase += 1 @@ -431,6 +431,6 @@ class HeatingRecommender: for recommendation in controls_recommender.recommendation: recommendation["phase"] = recommendation_phase - self.recommendations.extend(controls_recommender.recommendation) + self.recommendations.append(controls_recommender.recommendation) return diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py index 5960d7be..aba75ad9 100644 --- a/recommendations/Recommendations.py +++ b/recommendations/Recommendations.py @@ -111,11 +111,16 @@ class Recommendations: if "heating" not in self.exclusions: self.heating_recommender.recommend(phase=phase) if self.heating_recommender.recommendations: - property_recommendations.append(self.heating_recommender.recommendations) + if len(self.heating_recommender.recommendations) == 1: + property_recommendations.append(self.heating_recommender.recommendations) + else: + property_recommendations.extend(self.heating_recommender.recommendations) # We check if we have distinct heating and heating controls recommendations # If so, we increment by 2 (one of the heating system, one for the heating controls) # otherwise we incremenet by 1 - max_used_phase = max([rec["phase"] for rec in self.heating_recommender.recommendations]) + max_used_phase = max( + [rec["phase"] for recs in self.heating_recommender.recommendations for rec in recs] + ) amount_to_increment = max_used_phase - phase + 1 phase += amount_to_increment From 7bdf2147badefd9f43250ac0eedc933f6378b842 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 19 Apr 2024 18:38:16 +0100 Subject: [PATCH 247/248] restructured output of heating and heating control recommendations --- backend/app/plan/router.py | 20 ++++++++++---------- recommendations/HeatingRecommender.py | 16 +++++++++------- recommendations/Recommendations.py | 19 +++++++++++++------ 3 files changed, 32 insertions(+), 23 deletions(-) diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index 06d1aadf..ebaf482d 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -282,16 +282,16 @@ async def trigger_plan(body: PlanTriggerRequest): property_id, is_new = create_property( session, body.portfolio_id, epc_searcher.address_clean, epc_searcher.postcode_clean, epc_searcher.uprn ) - if not is_new: - continue - - create_property_targets( - session, - property_id=property_id, - portfolio_id=body.portfolio_id, - epc_target=body.goal_value, - heat_demand_target=None - ) + # if not is_new: + # continue + # + # create_property_targets( + # session, + # property_id=property_id, + # portfolio_id=body.portfolio_id, + # epc_target=body.goal_value, + # heat_demand_target=None + # ) epc_records = { 'original_epc': epc_searcher.newest_epc.copy(), diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py index fe5cdd46..537125a1 100644 --- a/recommendations/HeatingRecommender.py +++ b/recommendations/HeatingRecommender.py @@ -15,7 +15,8 @@ class HeatingRecommender: self.property = property_instance self.costs = Costs(self.property) - self.recommendations = [] + self.heating_recommendations = [] + self.heating_control_recommendations = [] def recommend(self, phase=0): @@ -23,7 +24,8 @@ class HeatingRecommender: # the boiler, but instead flushing the system will make it run more efficiently. There is a cost for this # in the Costs class, stored as SYSTEM_FLUSH_COST - self.recommendations = [] + self.heating_recommendations = [] + self.heating_control_recommendations = [] # This first iteration of the recommender will provide very basic recommendation # We recommend heating controls based on the main heating system @@ -254,7 +256,7 @@ class HeatingRecommender: system_change=system_change ) - self.recommendations.extend(recommendations) + self.heating_recommendations.extend(recommendations) @staticmethod def estimate_boiler_size(property_type, built_form, floor_area, floor_height, num_heated_rooms): @@ -399,7 +401,7 @@ class HeatingRecommender: if not system_change and len(boiler_recommendation): # If there is not a system change, we add the boiler recommendation at point. - self.recommendations.append([boiler_recommendation]) + self.heating_recommendations.extend([boiler_recommendation]) if system_change: # We combine the heating and controls recommendations, in the case of a system change @@ -417,12 +419,12 @@ class HeatingRecommender: combined_recommendations.extend(combined_recommendation) # Overwrite the existing boiler recommendation - self.recommendations.append(combined_recommendations) + self.heating_recommendations.extend(combined_recommendations) else: # We increment the recommendation phase, since the heating controls are separate from the boiler upgrade # but we'll only upgrade if we have a heating recommendation has_heating_recommendation = any( - rec["type"] == "heating" for recommendation in self.recommendations for rec in recommendation + rec["type"] == "heating" for rec in self.heating_recommendations ) if has_heating_recommendation: recommendation_phase += 1 @@ -431,6 +433,6 @@ class HeatingRecommender: for recommendation in controls_recommender.recommendation: recommendation["phase"] = recommendation_phase - self.recommendations.append(controls_recommender.recommendation) + self.heating_control_recommendations.extend(controls_recommender.recommendation) return diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py index aba75ad9..06dc2d61 100644 --- a/recommendations/Recommendations.py +++ b/recommendations/Recommendations.py @@ -110,16 +110,23 @@ class Recommendations: # Heating and Electical systems if "heating" not in self.exclusions: self.heating_recommender.recommend(phase=phase) - if self.heating_recommender.recommendations: - if len(self.heating_recommender.recommendations) == 1: - property_recommendations.append(self.heating_recommender.recommendations) - else: - property_recommendations.extend(self.heating_recommender.recommendations) + if ( + self.heating_recommender.heating_recommendations or + self.heating_recommender.heating_control_recommendations + ): + if self.heating_recommender.heating_recommendations: + property_recommendations.append(self.heating_recommender.heating_recommendations) + + if self.heating_recommender.heating_control_recommendations: + property_recommendations.append(self.heating_recommender.heating_control_recommendations) + # We check if we have distinct heating and heating controls recommendations # If so, we increment by 2 (one of the heating system, one for the heating controls) # otherwise we incremenet by 1 max_used_phase = max( - [rec["phase"] for recs in self.heating_recommender.recommendations for rec in recs] + [rec["phase"] for rec in + self.heating_recommender.heating_recommendations + + self.heating_recommender.heating_control_recommendations] ) amount_to_increment = max_used_phase - phase + 1 phase += amount_to_increment From 5a879572f46fba68fc136f2d0681805119e60ccb Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 23 Apr 2024 15:34:29 +0100 Subject: [PATCH 248/248] final modifications for immo pilot --- etl/customers/immo/pilot/asset_list_2.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/etl/customers/immo/pilot/asset_list_2.py b/etl/customers/immo/pilot/asset_list_2.py index 1b4fad9a..52260f57 100644 --- a/etl/customers/immo/pilot/asset_list_2.py +++ b/etl/customers/immo/pilot/asset_list_2.py @@ -51,7 +51,9 @@ already_installed = [ 'address': '47 Watsons Close', 'postcode': 'DY2 7HL', "already_installed": ["loft_insulation"] }, { - 'address': '44 Hatfield Road', 'postcode': 'DY9 7LW', "already_installed": ["loft_insulation"] + 'address': '44 Hatfield Road', + 'postcode': 'DY9 7LW', + "already_installed": ["loft_insulation", "cavity_wall_insulation"] } ]