From 980f439f49e2a1504099cd9204e79cbba328e951 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 19 Jun 2025 22:48:25 +0100 Subject: [PATCH] debugging calico epc search to handle the strict blocks --- asset_list/AssetList.py | 94 ++++++++++++++++------- asset_list/app.py | 4 - asset_list/hubspot/prepare_for_hubspot.py | 14 ++-- asset_list/utils.py | 8 +- backend/Funding.py | 93 ++++++++++++++++++---- backend/SearchEpc.py | 27 ++++++- backend/tests/test_funding.py | 52 +++++++++++++ 7 files changed, 236 insertions(+), 56 deletions(-) create mode 100644 backend/tests/test_funding.py diff --git a/asset_list/AssetList.py b/asset_list/AssetList.py index acca0c58..130d1242 100644 --- a/asset_list/AssetList.py +++ b/asset_list/AssetList.py @@ -298,7 +298,7 @@ class AssetList: "Has the property been re-walled?", "Is the property tile hung?", "Does the property have a render?", "Does the property have cladding?", "Gable Wall Obstructions", "Does the property have foliage that needs removal?", - "Potential unsafe environment", "Date of Inspection" + "Potential unsafe environment", "Date of Inspection", "Borescoped?" ] NON_INTRUSIVES_ELIGIBILITY_COLUMN = "Eligibility (Red/Yellow/Green)" @@ -354,6 +354,7 @@ class AssetList: # Work type prefixes: # Empties EMPTY_CAVITY_NON_INTRUSIVE = "Non-Intrusive Data Shows Empty Cavity" + EMPTY_CAVITY_NON_INTRUSIVE_YEAR = 'Non-Intrusive Data Shows Empty Cavity, built after 2002' EPC_EMPTY_INSPECTIONS_RETRO_DRILLED = "EPC Shows Empty Cavity, inspections show retro drilled" EPC_EMPTY_INSPECTIONS_FILLED = "EPC Shows Empty Cavity, inspections show filled or other" EPC_EMPTY_INSPECTIONS_FILLED_AT_BUILD = "EPC Shows Empty Cavity, inspections show filled at build" @@ -1280,7 +1281,8 @@ class AssetList: ) self.standardised_asset_list["SAP Category"] = np.where( - pd.isnull(self.standardised_asset_list[self.STANDARD_SAP]), + pd.isnull(self.standardised_asset_list[self.STANDARD_SAP]) & + pd.isnull(self.standardised_asset_list[self.EPC_API_DATA_NAMES["current-energy-efficiency"]]), "SAP Unknown", self.standardised_asset_list["SAP Category"] ) @@ -1745,8 +1747,6 @@ class AssetList: self.standardised_asset_list["solar_epc_loft_needs_topup"] ) - z = self.standardised_asset_list[self.standardised_asset_list["landlord_property_id"] == "DW150120029"] - self.standardised_asset_list["solar_eligible"] = ( # Property isn't a flag not_a_flat & @@ -2035,14 +2035,15 @@ class AssetList: self.standardised_asset_list["cavity_reason"] = np.where( (self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] == "flat") & (self.standardised_asset_list["SAP Category"] == "SAP Rating 76 or more"), - None, + self.standardised_asset_list["cavity_reason"] + " - (unlikely to quality)", self.standardised_asset_list["cavity_reason"] ) + # Split cavity_reason on the colon and check if the first part is equal to one of the two options above # that indicates empties self.standardised_asset_list["identified_empty_cavity"] = ( self.standardised_asset_list["cavity_reason"].str.split(":").str[0].isin( - [self.EMPTY_CAVITY_NON_INTRUSIVE, self.EPC_EMPTY] + [self.EMPTY_CAVITY_NON_INTRUSIVE, self.EMPTY_CAVITY_NON_INTRUSIVE_YEAR, self.EPC_EMPTY] ) ) @@ -2078,6 +2079,7 @@ class AssetList: NUM_RE = re.compile(r'\b\d+[A-Za-z]?\b') # captures 12, 12A, etc. expanded_rows = [] + for _, row in blocks.iterrows(): addr = str(row[self.STANDARD_ADDRESS_1]) @@ -2194,16 +2196,9 @@ class AssetList: # if we have any blocks, where work is eligible, we flag them now # These blocks may be refecence via the landlord_block_reference field, or by property types being # blocks of flats - has_landlord_block_reference = self.landlord_block_reference is not None + has_landlord_block_reference = sum(~pd.isnull(self.standardised_asset_list[self.STANDARD_BLOCK_REFERENCE])) if has_landlord_block_reference: - - # # If we blocks of flats, without a landlord block reference, we create this - # self.fill_landlord_block_reference(has_blocks_of_flats) - # - # # If we have blocks of flats, we split these out into individual units - # self.split_blocks() - # For blocks that have a 50% allocation, we create project codes self.block_analysis() # find any block refs with more than 50% emptires @@ -2265,13 +2260,18 @@ class AssetList: block_analysis = [] for block_reference, group in self.standardised_asset_list.groupby(self.STANDARD_BLOCK_REFERENCE): + cavity_breakdown = group["cavity_reason"].fillna("No Eligibility").value_counts(normalize=True) * 100 if all(cavity_breakdown.index == "No Eligibility"): continue # We check the % of empty vs not empty as right now, we're focused on empty - n_empties = ((group["identified_empty_cavity"] == True) & (~pd.isnull(group["cavity_reason"]))).sum() + n_empties = ( + (group["identified_empty_cavity"] == True) & + (~pd.isnull(group["cavity_reason"])) & + (~group["cavity_reason"].str.contains("(unlikely to quality)", case=False, na=False, regex=False)) + ).sum() works = group["hubspot_status"] above_threshold = works.map(LABEL_TO_ENUM.get).dropna() @@ -2293,6 +2293,36 @@ class AssetList: block_analysis = block_analysis.fillna(0) # We flag which properties are eligible for works. We need at least 50% + block_analysis["Eligible for Works"] = ( + block_analysis["Percentage of Empties"] >= 0.50 + ) + block_analysis = block_analysis.sort_values("Percentage of Empties", ascending=False) + + # For properties that are NOT eligible, we should update the cavity reason + ineligible_blocks = block_analysis[ + ~block_analysis["Eligible for Works"] + ]["Block Reference"].values + + eligible_blocks = block_analysis[ + block_analysis["Eligible for Works"] + ]["Block Reference"].values + + self.standardised_asset_list["cavity_reason"] = np.where( + self.standardised_asset_list[self.STANDARD_BLOCK_REFERENCE].isin(ineligible_blocks), + self.standardised_asset_list["cavity_reason"] + " (Flat in block with less than 50% eligible)", + self.standardised_asset_list["cavity_reason"] + ) + + # if the property is in a block of flats that eligible, but the property itself is not eligible, we flag this + # The criteria is: + # =The property should be in a block of flats + + self.standardised_asset_list["cavity_reason"] = np.where( + self.standardised_asset_list[self.STANDARD_BLOCK_REFERENCE].isin(eligible_blocks), + self.standardised_asset_list["cavity_reason"] + + " " + "(Flat in block with more than 50% eligible, but not eligible itself)", + self.standardised_asset_list["cavity_reason"] + ) self.block_analysis_df = block_analysis @@ -2434,13 +2464,13 @@ class AssetList: ) # Format the two date columns - programme_data["survey_week"] = pd.to_datetime(programme_data["survey_week"], errors="coerce") + programme_data["survey_date"] = pd.to_datetime(programme_data["survey_date"], errors="coerce") programme_data[self.EPC_API_DATA_NAMES["inspection-date"]] = pd.to_datetime( programme_data[self.EPC_API_DATA_NAMES["inspection-date"]], errors="coerce" ) # Convert to dd/mm/yyyy format - programme_data["survey_week"] = programme_data["survey_week"].dt.strftime("%d/%m/%Y") + programme_data["survey_date"] = programme_data["survey_date"].dt.strftime("%d/%m/%Y") programme_data[self.EPC_API_DATA_NAMES["inspection-date"]] = ( programme_data[self.EPC_API_DATA_NAMES["inspection-date"]].dt.strftime("%d/%m/%Y") ) @@ -2457,12 +2487,14 @@ class AssetList: ready_to_be_scheduled = ( ( programme_data["hubspot_status"] == hubspot_config.HubspotProcessStatus.READY_TO_BE_SCHEDULED.label - ) & (~pd.isnull(programme_data["survey_week"]) & ~pd.isnull(programme_data["surveyor"])) + ) & (~pd.isnull(programme_data["survey_date"])) ) - completed_works = ( - programme_data["hubspot_status"] != hubspot_config.HubspotProcessStatus.READY_TO_BE_SCHEDULED.label - ) - programme_data = programme_data[ready_to_be_scheduled | completed_works] + # completed_works = ( + # (programme_data["hubspot_status"] != + # hubspot_config.HubspotProcessStatus.READY_TO_BE_SCHEDULED.label) & + # (~pd.isnull(programme_data["hubspot_status"])) + # ) + programme_data = programme_data[ready_to_be_scheduled] # Merge on the contact details programme_data = programme_data.merge( @@ -2505,11 +2537,13 @@ class AssetList: self.CRM_HISTORICAL_CAVITY_PRODUCT["name"] ) else: + # We shouldn't have any missing products programme_data = programme_data[ - ~pd.isnull(programme_data["domna_product"]) & - ~pd.isnull(programme_data["surveyor"]) & - ~pd.isnull(programme_data["survey_week"]) - ] + ~pd.isnull(programme_data["survey_date"]) + ] + + if pd.isnull(programme_data["domna_product"]).sum(): + raise ValueError("Missing products") programme_data = programme_data.drop(columns=["solar_product", "cavity_product"]) product_df = ( @@ -2686,7 +2720,7 @@ class AssetList: 'Last EPC: Room Height ': self.EPC_API_DATA_NAMES["floor-height"], 'Last EPC: Age Band ': self.EPC_API_DATA_NAMES["construction-age-band"], 'Pipeline ': 'Pipeline ', - 'Expected Commencement Date ': "survey_week", + 'Expected Commencement Date ': "survey_date", 'Deal Name ': "dealname", # Need to create this, 'Product ID ': 'Product ID ', 'Name ': 'Name ', @@ -2724,7 +2758,11 @@ class AssetList: # The listing owner email is the same as the surveyor email (deal owner), so they can see the listing programme_data['Listing Owner Email '] = programme_data['Deal Owner'] programme_data['Amount '] = 0 - programme_data["Deal Owner"] = programme_data["Deal Owner"].str.lower() + programme_data["Deal Owner"] = np.where( + ~pd.isnull(programme_data["Deal Owner"]), + programme_data["Deal Owner"].astype(str).str.lower(), + programme_data["Deal Owner"] + ) # We make sure we have all of the columns that we need missed_columns = [c for c in hubspot_config.CRM_UPLOAD_COLUMNS if c not in programme_data.columns] diff --git a/asset_list/app.py b/asset_list/app.py index 08164c19..8158becc 100644 --- a/asset_list/app.py +++ b/asset_list/app.py @@ -553,13 +553,9 @@ def app(): ) asset_list.merge_data(epc_df) - asset_list.extract_attributes() - asset_list.identify_worktypes() - pprint(asset_list.work_type_figures) - # We now flag the status of the property asset_list.label_property_status() asset_list.analyse_geographies() diff --git a/asset_list/hubspot/prepare_for_hubspot.py b/asset_list/hubspot/prepare_for_hubspot.py index 9ffe24ca..0d0abcb2 100644 --- a/asset_list/hubspot/prepare_for_hubspot.py +++ b/asset_list/hubspot/prepare_for_hubspot.py @@ -19,19 +19,19 @@ def app(): # inputs: reconcile_programme = False # If True, the hubspot upload will include all properties with a project code - customer_domain = "https://sandwell.gov.uk" + customer_domain = "https://livewest.co.uk" installer_name = "J & J CRUMP" asset_list_filepath = ( - "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Sandwell/Hubspot/Sandwell BC - Full Asset List MAIN - " - "Standardised.xlsx" + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Livewest/Hubspot/Livewest South-West - Standardised " + "V2.xlsx" ) - asset_list_sheet_name = "Proposed Program" - asset_list_header = 1 + asset_list_sheet_name = "Standardised Asset List" + asset_list_header = 0 contact_details_filepath = ( - "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Sandwell/Hubspot/Sandwell Contact Details.xlsx" + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Livewest/Hubspot/23.06 Livewest Contacts.xlsx" ) - contacts_sheet_name = "Sheet1" + contacts_sheet_name = "Contact Information" contacts_landlord_property_id = "landlord_property_id" contacts_phone_number_column = "phone_number" contacts_secondary_phone_number_column = "secondary_phone_number" diff --git a/asset_list/utils.py b/asset_list/utils.py index ff9db3f8..61dcf8ea 100644 --- a/asset_list/utils.py +++ b/asset_list/utils.py @@ -79,7 +79,13 @@ def get_data( uprn=uprn ) # Force the skipping of estimating the EPC - searcher.ordnance_survey_client.property_type = None + # We check if the property was split + if home["is_expended_block"]: + searcher.ordnance_survey_client.property_type = "Flat" + searcher.property_type = "Flat" + searcher.set_strict_property_type_search() + else: + searcher.ordnance_survey_client.property_type = None searcher.ordnance_survey_client.built_form = None searcher.find_property(skip_os=True) diff --git a/backend/Funding.py b/backend/Funding.py index 78440eac..49d2d293 100644 --- a/backend/Funding.py +++ b/backend/Funding.py @@ -5,7 +5,7 @@ from typing import List from backend.app.plan.schemas import HousingType -class Funding: +class FundingOld: """ Given a property, this class identifies if the home is possibly eligible for funding under the various funding schemes. It will also calculate the expected amount of funding available @@ -413,13 +413,32 @@ class Funding: self.whlg() -class Funding2: +class Funding: """ New class to handle funding calculation """ - def __init__(self, tenure: HousingType): + def __init__( + self, + tenure: HousingType, + social_cavity_abs_rate: float, + social_solid_abs_rate: float, + private_cavity_abs_rate: float, + private_solid_abs_rate: float, + project_scores_matrix, + whlg_eligible_postcodes + ): self.tenure = tenure + self.social_cavity_abs_rate = social_cavity_abs_rate + self.social_solid_abs_rate = social_solid_abs_rate + self.private_cavity_abs_rate = private_cavity_abs_rate + self.private_solid_abs_rate = private_solid_abs_rate + + self.starting_sap_band = None + self.ending_sap_band = None + self.floor_area_band = None + self.project_scores_matrix = project_scores_matrix + self.whlg_eligible_postcodes = whlg_eligible_postcodes @staticmethod def get_sap_band(sap_score_number): @@ -446,8 +465,22 @@ class Funding2: return None + @staticmethod + def get_floor_area_band(floor_area): + if floor_area <= 72: + return "0-72" + + if floor_area <= 97: + return "73-97" + + if floor_area <= 199: + return "98-199" + + return "200" + + @staticmethod def eco4_prs_eligibility( - self, starting_sap: int, measures: List, mainheat_description: str, heating_control_description: str + starting_sap: int, measures: List, mainheat_description: str, heating_control_description: str ): """ Handles the eligibility criteria for private rental properties under eco @@ -481,31 +514,53 @@ class Funding2: return False + def calculate_full_project_abs(self): + + # Filter the project scores matrix + data = self.project_scores_matrix[ + (self.project_scores_matrix["Floor Area Segment"] == self.floor_area_band) & + (self.project_scores_matrix["Starting Band"] == self.starting_sap_band) & + (self.project_scores_matrix["Finishing Band"] == self.ending_sap_band) + ] + + if data.emtpy: + raise ValueError("Missing abs rate, check the project scores matrix") + + return data["Cost Savings"].values[0] + def check_funding( self, measures: List, starting_sap: int, ending_sap: int, + floor_area: float, mainheat_description: str, - heating_control_description: str + heating_control_description: str, + is_cavity: bool ): """ Given a list of measures, this function will check if the package of measures is fundable :param measures: :param starting_sap: :param ending_sap: + :param floor_area: + :param mainheat_description: + :param heating_control_description: + :param is_cavity: Indicates if the property has cavity wall insulation :return: """ - starting_band = self.get_sap_band(starting_sap) - ending_band = self.get_sap_band(ending_sap) + # If it's an E or D, should get to an EPC C + if starting_sap >= 55 and ending_sap < 69: + raise NotImplementedError("This property doesn't have sufficient SAP movement") - # For ECO4 eligibility, the property needs to end at a C if it starts at a D or E, otherwise should end at a - # D - - if starting_band <= 38 & ending_band >= 55: + if starting_sap <= 38 & ending_sap <= 55: # F or G should get to D raise NotImplementedError("Implement F or G to D eligibility") + self.starting_sap_band = self.get_sap_band(starting_sap) + self.ending_sap_band = self.get_sap_band(ending_sap) + self.floor_area_band = self.get_floor_area_band(floor_area) + ######################## # Private ######################## @@ -513,13 +568,25 @@ class Funding2: # 2) GBIS if self.tenure == "Private": - is_eligible = self.eco4_prs_eligibility( + is_eco4_eligible = self.eco4_prs_eligibility( starting_sap=starting_sap, measures=measures, mainheat_description=mainheat_description, heating_control_description=heating_control_description ) - pass + + # Need to implement + # 1) Package has to include an insulation measure + # 2) We should use the funding for the measure that has the largest partial project score + is_gbis_eligible = () + + if not is_eco4_eligible: + return + eco4_abs = self.calculate_full_project_abs() + # We estimate rates now + eco4_funding = ( + eco4_abs * self.private_cavity_abs_rate if is_cavity else eco4_abs & self.private_solid_abs_rate + ) ######################## # Social diff --git a/backend/SearchEpc.py b/backend/SearchEpc.py index 1ee1f950..16dd8f04 100644 --- a/backend/SearchEpc.py +++ b/backend/SearchEpc.py @@ -160,6 +160,9 @@ class SearchEpc: """ Address lines 1 and postcode are mandatory fields. The other address lines are optional but can be used to find the epc for the home, if address1 and postcode are insufficient + + If you wish to run a strict property type search, please run set_strict_property_type_search() + :param address1: string, propery's address line 1 :param postcode: string, propery's postcode :param full_address: string, optional parameter, the full address of the property @@ -189,6 +192,7 @@ class SearchEpc: self.older_epcs = None self.full_sap_epc = None self.metadata = None + self.strict_property_type_search = False # These are the address and postcode values, which we store in the database self.address_clean = None @@ -199,6 +203,14 @@ class SearchEpc: self.property_type = property_type self.fast = fast + def set_strict_property_type_search(self): + """ + This method sets the strict property type search flag to True. When this flag is set, the search will + only return results that match the specified property type. + :return: + """ + self.strict_property_type_search = True + @staticmethod def get_house_number(address: str, postcode=None) -> str | None: """ @@ -315,6 +327,8 @@ class SearchEpc: address_params["address"] = self.address1 if self.postcode: address_params["postcode"] = self.postcode + if self.strict_property_type_search and self.property_type: + address_params["property-type"] = self.property_type.lower() # We attempt the search with uprn params @@ -365,11 +379,16 @@ class SearchEpc: unique_property_types = {r["property-type"] for r in rows} + is_just_a_house = (len(unique_property_types) == 1) & ( + ("House" in unique_property_types) | ("Bungalow" in unique_property_types) + ) + # We allow for variation in property type across flats/maisonettes # If we know that we have a flat/maisonette, we allow for both property types - if property_type in ["Flat", "Maisonette"]: - if ((len(uprns) == 1) and ((len(unique_property_types) == 1) - ) or unique_property_types == {"Flat", "Maisonette"}): + # Make sure we have not JUST a house, or not JUST a flat/maisonette + if property_type in ["Flat", "Maisonette"] and not is_just_a_house: + if (((len(uprns) == 1) and ((len(unique_property_types) == 1) + ) or unique_property_types == {"Flat", "Maisonette"})): return rows if property_type is not None: @@ -424,6 +443,8 @@ class SearchEpc: return rows + raise ValueError("property type and address cannot both be None, at least one must be provided") + @staticmethod def format_address(newest_epc): """ diff --git a/backend/tests/test_funding.py b/backend/tests/test_funding.py new file mode 100644 index 00000000..311ab589 --- /dev/null +++ b/backend/tests/test_funding.py @@ -0,0 +1,52 @@ +import pytest +import pandas as pd +from utils.s3 import read_csv_from_s3 +from backend.Funding import Funding + + +def get_funding_data(): + """ + This function retrieves the eco project scores matrix and the warm homes local grant funding data + :return: + """ + project_scores_matrix = read_csv_from_s3( + bucket_name="retrofit-data-dev", + filepath="funding/ECO4 Full Project Scores Matrix.csv", + ) + project_scores_matrix = pd.DataFrame(project_scores_matrix) + project_scores_matrix.columns = ['Floor Area Segment', 'Starting Band', 'Finishing Band', 'Cost Savings'] + project_scores_matrix["Cost Savings"] = project_scores_matrix["Cost Savings"].astype(float) + + whlg_eligible_postcodes = read_csv_from_s3( + bucket_name="retrofit-data-dev", + filepath="funding/whlg eligible postcodes.csv", + ) + whlg_eligible_postcodes = pd.DataFrame(whlg_eligible_postcodes) + + return project_scores_matrix, whlg_eligible_postcodes + + +class TestFunding: + + def test_prs(self): + eco_project_scores_matrix, whlg_eligible_postcodes = get_funding_data() + funding = Funding( + project_scores_matrix=eco_project_scores_matrix, + whlg_eligible_postcodes=whlg_eligible_postcodes, + social_cavity_abs_rate=13.5, + social_solid_abs_rate=17, + private_cavity_abs_rate=13.5, + private_solid_abs_rate=17, + tenure="Private", + ) + + measures_1 = ["internal_wall_insulation", "solar_pv"] + funding.check_funding( + measures=measures_1, + starting_sap=54, + ending_sap=69, + floor_area=73, + mainheat_description="Boiler and radiators, mains gas", + heating_control_description="Programmer, room thermostat and TRVs", + is_cavity=True + )