diff --git a/.idea/Model.iml b/.idea/Model.iml index c6561970..09f2e496 100644 --- a/.idea/Model.iml +++ b/.idea/Model.iml @@ -7,7 +7,7 @@ - + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml index 50cad4ca..fb10c6b0 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -3,7 +3,7 @@ - + diff --git a/asset_list/AssetList.py b/asset_list/AssetList.py index 4b7a11ec..ad3087c3 100644 --- a/asset_list/AssetList.py +++ b/asset_list/AssetList.py @@ -4,8 +4,8 @@ import re import tiktoken from pprint import pprint from datetime import datetime +import asset_list.hubspot.config as hubspot_config -from numpy.ma.core import masked_not_equal from openai import OpenAI import numpy as np import pandas as pd @@ -29,6 +29,7 @@ from recommendations.recommendation_utils import ( ) from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes +from etl.epc_clean.epc_attributes.WallAttributes import WallAttributes logger = setup_logger() @@ -279,6 +280,7 @@ class AssetList: STANDARD_HEATING_SYSTEM = "landlord_heating_system" STANDARD_EXISTING_PV = "landlord_existing_pv" STANDARD_SAP = "landlord_sap_rating" + STANDARD_BLOCK_REFERENCE = "landlord_block_reference" DOMNA_PROPERTY_ID = "domna_property_id" @@ -292,6 +294,13 @@ class AssetList: "Any further surveyor notes", 'Surveyors Name' ] + NON_INTRUSIVES_NEW_FORMAT_COLNAMES = [ + "Has the property been re-walled?", "Is the property tile hung?", "Does the property have a render?", + "Does the property have cladding?", "Gable Wall Obstructions", + "Does the property have foliage that needs removal?", + "Potential unsafe environment", "Date of Inspection", "Borescoped?" + ] + NON_INTRUSIVES_ELIGIBILITY_COLUMN = "Eligibility (Red/Yellow/Green)" OLD_FORMAT_NON_INTRUSIVE_COLNAMES = ['WFT Findings', 'ECO Eligibility'] @@ -342,6 +351,40 @@ class AssetList: "cavity wall, as built, partial insulation", ] + # Work type prefixes: + # Empties + EMPTY_CAVITY_NON_INTRUSIVE = "Non-Intrusive Data Shows Empty Cavity" + EMPTY_CAVITY_NON_INTRUSIVE_YEAR = 'Non-Intrusive Data Shows Empty Cavity, built after 2002' + EPC_EMPTY_INSPECTIONS_RETRO_DRILLED = "EPC Shows Empty Cavity, inspections show retro drilled" + EPC_EMPTY_INSPECTIONS_FILLED = "EPC Shows Empty Cavity, inspections show filled or other" + EPC_EMPTY_INSPECTIONS_FILLED_AT_BUILD = "EPC Shows Empty Cavity, inspections show filled at build" + EPC_EMPTY_INSPECTIONS_NON_CAVITY = "EPC Shows Empty Cavity, inspections show non-cavity build" + EPC_EMPTY = "EPC Shows Empty Cavity" + LANDLORD_EMPTY_INSPECTIONS_OTHER = ("Landlord Data Shows Empty Cavity, EPC & Inspections Shows Filled or " + "Non-cavity") + # Extraction + EXTRACTION_NON_INTRUSIVE = "Non-Intrusive Data Shows Cavity Extraction" + + # Solar + SOLAR_ELIGIBLE = "Solar Eligible" + SOLAR_ELIGIBLE_SOLID_WALL_UNINSULATED = "Solar Eligible, Solid Wall Uninsulated, EPC E or Below" + SOLAR_ELIGIBLE_NEEDS_HEATING_UPGRADE = "Solar Eligible, Needs Heating Upgrade" + + CRM_HISTORICAL_CAVITY_PRODUCT = { + "id": 156989182176, "unit_price": 0, "name": "Historical ECO Cavity" + } + + CRM_PRODUCTS = { + "Empty Cavity - ECO4": {"id": 82733738177, "unit_price": 1000, "name": "Empty Cavity - ECO4"}, + "Extract & Fill - ECO4": {"id": 100307905778, "unit_price": 500, "name": "Extract & Fill - ECO4"}, + "Solar PV - ECO4": {"id": 82623589564, "unit_price": 1608, "name": "Solar PV - ECO4"}, + "Solar PV + HHRSH - ECO4": {"id": 155529972924, "unit_price": 1608, "name": "Solar PV + HHRSH - ECO4"}, + "Solar PV + Heating Upgrade - ECO4": { + "id": 109265426665, "unit_price": 1608, "name": "Solar PV + Heating Upgrade - ECO4" + }, + "Historical ECO Cavity": CRM_HISTORICAL_CAVITY_PRODUCT + } + def __init__( self, local_filepath, @@ -362,6 +405,7 @@ class AssetList: landlord_heating_system=None, landlord_existing_pv=None, landlord_sap=None, + landlord_block_reference=None, phase=False, header=0 ): @@ -375,7 +419,7 @@ class AssetList: self.standardised_asset_list = self.raw_asset_list.copy() # Will be used to store aggregated figures against the various work types self.work_type_figures = {} - self.flat_data = None + self.block_analysis_df = None self.duplicated_addresses = None self.contact_details = None self.contact_detail_fields = None @@ -386,6 +430,7 @@ class AssetList: self.unmatched_submissions = pd.DataFrame() self.ecosurv = None self.ecosurv_no_match = pd.DataFrame() + self.geographical_areas = pd.DataFrame() # When this is True, we intend to break the programme into multiple phases. We may need to review # how this is structured in the future, as depending on how we get future data, we may need to @@ -400,6 +445,10 @@ class AssetList: self.non_intrusives_eligibility = "Eligibility (Red/Yellow/Green)" in self.raw_asset_list.columns + self.new_format_non_insturives_present = ( + "Has the property been re-walled?" in self.raw_asset_list.columns + ) + # Names of columns self.landlord_property_id = landlord_property_id self.address1_colname = address1_colname @@ -414,6 +463,7 @@ class AssetList: self.landlord_heating_system = landlord_heating_system self.landlord_existing_pv = landlord_existing_pv self.landlord_sap = landlord_sap + self.landlord_block_reference = landlord_block_reference # parameters for cleaning self.full_address_cols_to_concat = full_address_cols_to_concat @@ -479,6 +529,23 @@ class AssetList: self.standardised_asset_list["Archetype"].copy() ) + self.prefixes_to_products = { + # Empty + self.EMPTY_CAVITY_NON_INTRUSIVE: self.CRM_PRODUCTS["Empty Cavity - ECO4"], + self.EPC_EMPTY_INSPECTIONS_RETRO_DRILLED: self.CRM_PRODUCTS["Empty Cavity - ECO4"], + self.EPC_EMPTY_INSPECTIONS_FILLED: self.CRM_PRODUCTS["Empty Cavity - ECO4"], + self.EPC_EMPTY_INSPECTIONS_FILLED_AT_BUILD: self.CRM_PRODUCTS["Empty Cavity - ECO4"], + self.EPC_EMPTY_INSPECTIONS_NON_CAVITY: self.CRM_PRODUCTS["Empty Cavity - ECO4"], + self.EPC_EMPTY: self.CRM_PRODUCTS["Empty Cavity - ECO4"], + self.LANDLORD_EMPTY_INSPECTIONS_OTHER: self.CRM_PRODUCTS["Empty Cavity - ECO4"], + # Extraction + self.EXTRACTION_NON_INTRUSIVE: self.CRM_PRODUCTS["Extract & Fill - ECO4"], + # Solar + self.SOLAR_ELIGIBLE: self.CRM_PRODUCTS["Solar PV - ECO4"], + self.SOLAR_ELIGIBLE_SOLID_WALL_UNINSULATED: self.CRM_PRODUCTS["Solar PV - ECO4"], + self.SOLAR_ELIGIBLE_NEEDS_HEATING_UPGRADE: self.CRM_PRODUCTS["Solar PV + Heating Upgrade - ECO4"], + } + def _extract_address1(self, asset_list, full_address_col, postcode_col, method="first_two_words"): if method not in self.ADDRESS_1_CLEANING_METHODS: @@ -660,6 +727,7 @@ class AssetList: self.landlord_heating_system, self.landlord_existing_pv, self.landlord_sap, + self.landlord_block_reference, ] # Keep just non-null variables (e.g landlord may not provide uprn self.keep_variables = [v for v in variables if v is not None] @@ -677,6 +745,7 @@ class AssetList: self.landlord_heating_system: self.STANDARD_HEATING_SYSTEM, self.landlord_existing_pv: self.STANDARD_EXISTING_PV, self.landlord_sap: self.STANDARD_SAP, + self.landlord_block_reference: self.STANDARD_BLOCK_REFERENCE } self.rename_map = {k: v for k, v in self.rename_map.items() if k is not None} @@ -687,6 +756,9 @@ class AssetList: if self.non_intrusives_eligibility: non_intrusive_columns.append(self.NON_INTRUSIVES_ELIGIBILITY_COLUMN) + if self.new_format_non_insturives_present: + non_intrusive_columns += self.NON_INTRUSIVES_NEW_FORMAT_COLNAMES + if self.old_format_non_intrusives_present: # We check if we have the ECO Eligibility column, which we might not have non_intrusive_columns = [ @@ -920,7 +992,7 @@ class AssetList: self.STANDARD_YEAR_BUILT, self.STANDARD_WALL_CONSTRUCTION, self.STANDARD_HEATING_SYSTEM, - self.STANDARD_EXISTING_PV + self.STANDARD_BLOCK_REFERENCE, ] if v not in self.standardised_asset_list.columns ] for v in missing_variables: @@ -931,6 +1003,38 @@ class AssetList: self.standardised_asset_list[self.STANDARD_LANDLORD_PROPERTY_ID].astype(str) ) + # CLean up the standard SAP column, that can be problematic + if self.landlord_sap is not None: + self.standardised_asset_list[self.STANDARD_SAP] = ( + self.standardised_asset_list[self.STANDARD_SAP] + .astype(str) + .str.replace('\xa0', ' ', regex=False) + .str.strip() + ) + self.standardised_asset_list[self.STANDARD_SAP] = np.where( + self.standardised_asset_list[self.STANDARD_SAP] == "", + None, + self.standardised_asset_list[self.STANDARD_SAP] + ) + self.standardised_asset_list[self.STANDARD_SAP] = ( + self.standardised_asset_list[self.STANDARD_SAP].astype(float) + ) + # If it's zero, we set it to None + self.standardised_asset_list[self.STANDARD_SAP] = np.where( + self.standardised_asset_list[self.STANDARD_SAP] == 0, + None, + self.standardised_asset_list[self.STANDARD_SAP] + ) + + has_blocks_of_flats = (self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] == "block of flats").sum() + + # Perform block splitting, ahead of fetching the EPC data + # If we blocks of flats, without a landlord block reference, we create this + self.fill_landlord_block_reference(has_blocks_of_flats) + + # If we have blocks of flats, we split these out into individual units. + self.split_blocks() + def merge_data(self, df: pd.DataFrame): """ Used to insert data into the standardised asset list, based on the domna property id @@ -1147,7 +1251,7 @@ class AssetList: processed_age_band, how="left" ) - def identify_worktypes(self, cleaned): + def identify_worktypes(self): if self.landlord_sap is not None: # We add a SAP category for all work type identification @@ -1176,6 +1280,13 @@ class AssetList: ) ) + self.standardised_asset_list["SAP Category"] = np.where( + pd.isnull(self.standardised_asset_list[self.STANDARD_SAP]) & + pd.isnull(self.standardised_asset_list[self.EPC_API_DATA_NAMES["current-energy-efficiency"]]), + "SAP Unknown", + self.standardised_asset_list["SAP Category"] + ) + else: # We add a SAP category for all work type identification # We break into 4 categories (54 or less, 55-68, 69-74, 75 or more) @@ -1196,6 +1307,11 @@ class AssetList: ), ) ) + self.standardised_asset_list["SAP Category"] = np.where( + pd.isnull(self.standardised_asset_list[self.EPC_API_DATA_NAMES["current-energy-efficiency"]]), + "SAP Unknown", + self.standardised_asset_list["SAP Category"] + ) # Before we being, we identify if a property has solar already as we use this # for identifying cavity jobs @@ -1426,13 +1542,22 @@ class AssetList: ) ) + # If the landlord has given us the heating system, we default to that on heating upgrades. Because of the + # poor heating in place, if the EPC indicates that this property had a low efficiency heating system but the + # landlord data suggests otherwise (e.g. there's a gas boiler), we default to what the landlord has told us self.standardised_asset_list["solar_epc_data_indicates_requires_heating_upgrade"] = ( - self.standardised_asset_list[self.EPC_API_DATA_NAMES["mainheat-description"]].str.lower().str.contains( - "electric storage heaters|room heaters" + ( + self.standardised_asset_list[self.EPC_API_DATA_NAMES["mainheat-description"]].str.lower().str.contains( + "electric storage heaters|room heaters" + ) & ( + self.standardised_asset_list[ + self.EPC_API_DATA_NAMES["mainheatcont-description"] + ] != "Controls for high heat retention storage heaters" + ) ) & ( - self.standardised_asset_list[ - self.EPC_API_DATA_NAMES["mainheatcont-description"] - ] != "Controls for high heat retention storage heaters" + ~self.standardised_asset_list[self.STANDARD_HEATING_SYSTEM].isin( + ["district heating", "communal heating", "communal gas boiler"] + ) & ~self.standardised_asset_list[self.STANDARD_HEATING_SYSTEM].str.contains("gas ") ) ) @@ -1501,19 +1626,9 @@ class AssetList: else: self.standardised_asset_list["solar_non_intrusives_walls_insulated"] = False - # We merge on the u-value for average thermal transmittance - walls_uvalue_data = pd.DataFrame(cleaned["walls-description"]) - walls_uvalue_data = walls_uvalue_data[ - ~pd.isnull(walls_uvalue_data["thermal_transmittance"]) - ][["original_description", "thermal_transmittance"]].rename( - columns={ - "original_description": self.EPC_API_DATA_NAMES["walls-description"], - "thermal_transmittance": "walls_u_value" - } - ) - self.standardised_asset_list = self.standardised_asset_list.merge( - walls_uvalue_data, how="left", on=self.EPC_API_DATA_NAMES["walls-description"] - ) + self.standardised_asset_list["walls_u_value"] = self.standardised_asset_list[ + self.EPC_API_DATA_NAMES["walls-description"] + ].apply(lambda x: WallAttributes(x).process()["thermal_transmittance"] if not pd.isnull(x) else None) self.standardised_asset_list["solar_epc_walls_insulated"] = ( ( @@ -1526,16 +1641,20 @@ class AssetList: ) ) - # We merge on the u-value for average thermal transmittance - roof_data = pd.DataFrame(cleaned["roof-description"])[ - ["original_description", "thermal_transmittance", "is_pitched", "is_loft"] - ].rename( - columns={ - "original_description": self.EPC_API_DATA_NAMES["roof-description"], - "thermal_transmittance": "roof_u_value", - } - ) - + roof_data = [] + for desc in self.standardised_asset_list[ + self.EPC_API_DATA_NAMES["roof-description"] + ].unique(): + if pd.isnull(desc): + continue + roof_data.append( + { + self.EPC_API_DATA_NAMES["roof-description"]: desc, + **RoofAttributes(desc).process() + } + ) + roof_data = pd.DataFrame(roof_data) + roof_data = roof_data.rename(columns={"thermal_transmittance": "roof_u_value"}) self.standardised_asset_list = self.standardised_asset_list.merge( roof_data, how="left", on=self.EPC_API_DATA_NAMES["roof-description"] ) @@ -1683,10 +1802,10 @@ class AssetList: self.standardised_asset_list["cavity_reason"] = None empty_cavity_map = { - "non_intrusive_indicates_empty_cavity": "Non-Intrusive Data Shows Empty Cavity: ", - "non_intrusive_indicates_empty_cavity_has_solar": "Non-Intrusive Data Shows Empty Cavity - property " + "non_intrusive_indicates_empty_cavity": self.EMPTY_CAVITY_NON_INTRUSIVE + ": ", + "non_intrusive_indicates_empty_cavity_has_solar": f"{self.EMPTY_CAVITY_NON_INTRUSIVE} - property " "already has solar: ", - "non_intrusive_indicates_empty_cavity_no_year_filter": f"Non-Intrusive Data Shows Empty Cavity, " + "non_intrusive_indicates_empty_cavity_no_year_filter": f"{self.EMPTY_CAVITY_NON_INTRUSIVE}, " f"built after {self.EMPTY_CAVITY_YEAR_THRESHOLD}: ", } @@ -1711,7 +1830,7 @@ class AssetList: )) & pd.isnull(self.standardised_asset_list["cavity_reason"]) ), - "EPC Shows Empty Cavity, inspections show retro drilled: " + self.standardised_asset_list[ + f"{self.EPC_EMPTY_INSPECTIONS_RETRO_DRILLED}: " + self.standardised_asset_list[ "SAP Category"], self.standardised_asset_list["cavity_reason"] ) @@ -1723,7 +1842,7 @@ class AssetList: self.standardised_asset_list['non_intrusive_indicates_cavity_extraction'] & pd.isnull(self.standardised_asset_list["cavity_reason"]) ), - "EPC Shows Empty Cavity, inspections show filled or other: " + self.standardised_asset_list[ + f"{self.EPC_EMPTY_INSPECTIONS_FILLED}: " + self.standardised_asset_list[ "SAP Category"], self.standardised_asset_list["cavity_reason"] ) @@ -1735,7 +1854,7 @@ class AssetList: (self.standardised_asset_list['non-intrusives: Insulated'] == "RETRO DRILLED") & pd.isnull(self.standardised_asset_list["cavity_reason"]) ), - "EPC Shows Empty Cavity, inspections show retro drilled: " + self.standardised_asset_list[ + f"{self.EPC_EMPTY_INSPECTIONS_RETRO_DRILLED}: " + self.standardised_asset_list[ "SAP Category"], self.standardised_asset_list["cavity_reason"] ) @@ -1747,8 +1866,7 @@ class AssetList: (self.standardised_asset_list['non-intrusives: Insulated'] == "FILLED AT BUILD") & pd.isnull(self.standardised_asset_list["cavity_reason"]) ), - "EPC Shows Empty Cavity, inspections show filled at build: " + self.standardised_asset_list[ - "SAP Category"], + f"{self.EPC_EMPTY_INSPECTIONS_FILLED_AT_BUILD}: " + self.standardised_asset_list["SAP Category"], self.standardised_asset_list["cavity_reason"] ) else: @@ -1758,7 +1876,7 @@ class AssetList: ~self.standardised_asset_list["non_intrusive_indicates_empty_cavity"] & pd.isnull(self.standardised_asset_list["cavity_reason"]) ), - "EPC Shows Empty Cavity: " + self.standardised_asset_list["SAP Category"], + f"{self.EPC_EMPTY}: " + self.standardised_asset_list["SAP Category"], self.standardised_asset_list["cavity_reason"] ) @@ -1768,10 +1886,12 @@ class AssetList: ~self.standardised_asset_list["non_intrusive_indicates_empty_cavity"] & pd.isnull(self.standardised_asset_list["cavity_reason"]) ), - "EPC Shows Empty Cavity, inspections show non-cavity build: " + self.standardised_asset_list[ - "SAP Category"], + f"{self.EPC_EMPTY_INSPECTIONS_NON_CAVITY}: " + self.standardised_asset_list["SAP Category"], self.standardised_asset_list["cavity_reason"] ) + + # Work type prefixes + # Landlord data: The landlord's data indicates that the wall is an uninsulated cavity wall, but EPC and # inspections show filled self.standardised_asset_list["cavity_reason"] = np.where( @@ -1781,7 +1901,7 @@ class AssetList: ~self.standardised_asset_list["epc_indicates_empty_cavity"] & pd.isnull(self.standardised_asset_list["cavity_reason"]) ), - "Landlord Data Shows Empty Cavity, EPC & Inspections Shows Filled or Non-cavity: " + + f"{self.LANDLORD_EMPTY_INSPECTIONS_OTHER}: " + self.standardised_asset_list["SAP Category"], self.standardised_asset_list["cavity_reason"] ) @@ -1792,7 +1912,7 @@ class AssetList: self.standardised_asset_list["non_intrusive_indicates_cavity_extraction"] & pd.isnull(self.standardised_asset_list["cavity_reason"]) ), - "Non-Intrusive Data Shows Cavity Extraction: " + self.standardised_asset_list["SAP Category"], + f"{self.EXTRACTION_NON_INTRUSIVE}: " + self.standardised_asset_list["SAP Category"], self.standardised_asset_list["cavity_reason"] ) @@ -1801,7 +1921,7 @@ class AssetList: self.standardised_asset_list["non_intrusive_indicates_cavity_extraction_no_year_filter"] & pd.isnull(self.standardised_asset_list["cavity_reason"]) ), - f"Non-Intrusive Data Shows Cavity Extraction, built after {self.EMPTY_CAVITY_YEAR_THRESHOLD}: " + + f"{self.EXTRACTION_NON_INTRUSIVE}, built after {self.EMPTY_CAVITY_YEAR_THRESHOLD}: " + self.standardised_asset_list["SAP Category"], self.standardised_asset_list["cavity_reason"] ) @@ -1814,11 +1934,9 @@ class AssetList: # Map of variables and fill values for the solar_reason variable # ordering of this map is important, where we flag our prioritised work types first solar_reason_map = { - "solar_eligible": "Solar Eligible: ", - "solar_eligible_solid_wall_uninsulated": "Solar Eligible, Solid Wall Uninsulated, EPC E or Below: ", - "solar_eligible_needs_heating_upgrade": ( - "Solar Eligible, Needs Heating Upgrade: " - ) + "solar_eligible": f"{self.SOLAR_ELIGIBLE}: ", + "solar_eligible_solid_wall_uninsulated": f"{self.SOLAR_ELIGIBLE_SOLID_WALL_UNINSULATED}: ", + "solar_eligible_needs_heating_upgrade": f"{self.SOLAR_ELIGIBLE_NEEDS_HEATING_UPGRADE}: " } for variable, reason in solar_reason_map.items(): @@ -1864,17 +1982,18 @@ class AssetList: for col in ["cavity_reason", "solar_reason"]: self.standardised_asset_list[col] = np.where( ( - (~pd.isnull(self.standardised_asset_list["submission_date"])) + (~pd.isnull(self.standardised_asset_list["submission_status"])) ), None, self.standardised_asset_list[col] ) - if self.ecosurv is not None: + if self.ecosurv is not None and "ecosurv_install_status" in self.standardised_asset_list.columns: + # If we didn't match anything to ecosurv, the ecosurv_install_status won't exist for col in ["cavity_reason", "solar_reason"]: self.standardised_asset_list[col] = np.where( ( - (~pd.isnull(self.standardised_asset_list["ecosurv_reference"])) + (~pd.isnull(self.standardised_asset_list["ecosurv_install_status"])) ), None, self.standardised_asset_list[col] @@ -1911,42 +2030,301 @@ class AssetList: self.outcomes[self.DOMNA_PROPERTY_ID].isin(identified_work) ] - def flat_analysis(self): - - # We need to deduce the building name - we strip out the house number - - # We want to deduce if flats have 50% of the properties below C75 - # We group by postcode and property type - grouped = self.standardised_asset_list.groupby( - [self.STANDARD_POSTCODE, self.STANDARD_PROPERTY_TYPE] + # Finally, direct operations feedback has suggested that if a property is a flat that has a SAP rating of + # 76 or above, we should exclude it because it's likely not going to be eligible for anyting + self.standardised_asset_list["cavity_reason"] = np.where( + (self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] == "flat") & + (self.standardised_asset_list["SAP Category"] == "SAP Rating 76 or more"), + self.standardised_asset_list["cavity_reason"] + " - (unlikely to quality)", + self.standardised_asset_list["cavity_reason"] ) - flat_data = [] - for _, group in grouped: - if "flat" in group[self.STANDARD_PROPERTY_TYPE].values: - num_flats = group[self.STANDARD_PROPERTY_TYPE].shape[0] - num_below_c75 = group[ - self.EPC_API_DATA_NAMES["current-energy-efficiency"] - ].lt(self.FILLED_CAVITY_SAP_THRESHOLD).sum() - # Check if any flats are below C69 - num_flats_below_c69 = group[ - self.EPC_API_DATA_NAMES["current-energy-efficiency"] - ].lt(69).sum() + # Split cavity_reason on the colon and check if the first part is equal to one of the two options above + # that indicates empties + self.standardised_asset_list["identified_empty_cavity"] = ( + self.standardised_asset_list["cavity_reason"].str.split(":").str[0].isin( + [self.EMPTY_CAVITY_NON_INTRUSIVE, self.EMPTY_CAVITY_NON_INTRUSIVE_YEAR, self.EPC_EMPTY] + ) + ) - flat_data.append( - { - "Postcode": group[self.STANDARD_POSTCODE].iloc[0], - "Property Type": "Flat", - "Number of Flats with EPC": num_flats, - "Number of Flats below C75": num_below_c75, - "Proportion of Flat EPCs below C75": round(100 * num_below_c75 / num_flats), - "Number of Flats Below C69": num_flats_below_c69, - } + def fill_landlord_block_reference(self, has_blocks_of_flats): + if not has_blocks_of_flats: + return + + # If we have blocks of flats, we fill the landlord_block_reference field with address 1 + postcode + self.standardised_asset_list[self.STANDARD_BLOCK_REFERENCE] = np.where( + (self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] == "block of flats") & ( + pd.isnull(self.standardised_asset_list[self.STANDARD_BLOCK_REFERENCE]) + ), + self.standardised_asset_list[self.STANDARD_ADDRESS_1] + " " + + self.standardised_asset_list[self.STANDARD_POSTCODE], + self.standardised_asset_list[self.STANDARD_BLOCK_REFERENCE] + ) + + def split_blocks(self): + """ + Where we have a single row that is a block of flats, we split this into multiple rows, + one for each unit. The data that we have will be copied across rows + :return: + """ + + blocks = self.standardised_asset_list[ + self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] == "block of flats" + ].copy() + + if blocks.empty: + return + + RANGE_RE = re.compile(r'\b(\d+[A-Za-z]?)\s*[-–]\s*(\d+[A-Za-z]?)\b') + NUM_RE = re.compile(r'\b\d+[A-Za-z]?\b') # captures 12, 12A, etc. + + expanded_rows = [] + + for _, row in blocks.iterrows(): + addr = str(row[self.STANDARD_ADDRESS_1]) + + # 1 ─ Range (e.g. 1-7) + m_range = RANGE_RE.search(addr) + if m_range: + start, end = m_range.groups() + start, end = int(re.match(r'\d+', start)[0]), int(re.match(r'\d+', end)[0]) + if start > end or (end - start) > 100: + raise ValueError(f"Suspicious range '{addr}'") + for n in range(start, end + 1): + new = row.copy() + new_addr = RANGE_RE.sub(str(n), addr, count=1) + original_full_address = new[self.STANDARD_FULL_ADDRESS] + new_full_address = original_full_address.replace(addr, new_addr) + new[self.STANDARD_ADDRESS_1] = new_addr + new[self.STANDARD_FULL_ADDRESS] = new_full_address + new[self.STANDARD_PROPERTY_TYPE] = "flat" + # Keep a record of the previous address 1 + new["block_address1"] = addr + new["block_full_address"] = original_full_address + new["is_expended_block"] = True + # We update the full address + + new[self.DOMNA_PROPERTY_ID] = f"{row[self.DOMNA_PROPERTY_ID]}-{new_addr}" + expanded_rows.append(new) + continue + + # 2 ─ Explicit list (e.g. 1, 2, 5 Block) + nums = NUM_RE.findall(addr) + if len(nums) > 1 and ',' in addr: + for n in nums: + new = row.copy() + new_addr = re.sub(NUM_RE, n, addr, count=1) # replace the first number only + new[self.STANDARD_ADDRESS_1] = new_addr + new[self.DOMNA_PROPERTY_ID] = f"{row[self.DOMNA_PROPERTY_ID]}-{new_addr}" + expanded_rows.append(new) + continue + + # 3 ─ Single number or no number, treat as individual dwelling + if (len(nums) == 1) or not nums: + expanded_rows.append(row) + continue + + # Anything else with digits is unrecognised + raise NotImplementedError(f"Unhandled block format: '{addr}'") + + expanded_blocks = pd.DataFrame(expanded_rows) + + # We drop the blocks from the standardised asset list and append on the expanded blocks + self.standardised_asset_list = self.standardised_asset_list[ + self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] != "block of flats" + ] + + self.standardised_asset_list = pd.concat( + [self.standardised_asset_list, expanded_blocks], + ignore_index=True + ) + + # As a final clean up, for any blocks that are size 1, we don't includr a project code + sizes = ( + expanded_blocks + .groupby(self.STANDARD_BLOCK_REFERENCE)[self.DOMNA_PROPERTY_ID] + .nunique() + .reset_index() + ) + size_1 = sizes[sizes[self.DOMNA_PROPERTY_ID] <= 1] + # Remove the size 1 blocks from the standardised asset list + self.standardised_asset_list[self.STANDARD_BLOCK_REFERENCE] = np.where( + self.standardised_asset_list[self.STANDARD_BLOCK_REFERENCE].isin( + size_1[self.STANDARD_BLOCK_REFERENCE].values + ), + None, + self.standardised_asset_list[self.STANDARD_BLOCK_REFERENCE] + ) + + def label_property_status(self): + """ + This function is designed to be run after identify_worktypes() has been run, and will create a "property_status" + column, which will note where each property is (to be surveyed, surveyed, installed), using the stages we + recognise within hubspot + :return: + """ + + # For anything that is ready to go, that gets set to ready to be scheduled + self.standardised_asset_list["hubspot_status"] = np.where( + ~pd.isnull(self.standardised_asset_list["cavity_reason"]) | + ~pd.isnull(self.standardised_asset_list["solar_reason"]), + hubspot_config.HubspotProcessStatus.READY_TO_BE_SCHEDULED.label, + None + ) + + # we step through the process of flagging completed surveys + + # We utilise submissions, ecosurv and outcomes to define the hubspot status + # We'll take the maximum of these three columns, based on the enum integer value + label_to_enum = {e.label: e for e in hubspot_config.HubspotProcessStatus} + + def get_max_status_from_columns(row): + status_candidates = [] + for col in ["submission_status", "ecosurv_install_status", "outcome_status"]: + label = row.get(col) + if label in label_to_enum: + status_candidates.append(label_to_enum[label]) + if not status_candidates: + return row["hubspot_status"] # fallback to existing status if no updates + return max(status_candidates).label + + self.standardised_asset_list["hubspot_status"] = self.standardised_asset_list.apply( + get_max_status_from_columns, axis=1 + ) + + self.standardised_asset_list["project_code"] = None + # if we have any blocks, where work is eligible, we flag them now + # These blocks may be refecence via the landlord_block_reference field, or by property types being + # blocks of flats + has_landlord_block_reference = sum(~pd.isnull(self.standardised_asset_list[self.STANDARD_BLOCK_REFERENCE])) + + if has_landlord_block_reference: + # For blocks that have a 50% allocation, we create project codes + self.block_analysis() + # find any block refs with more than 50% emptires + viable_empty_blocks = self.block_analysis_df[ + self.block_analysis_df['Percentage of Empties'] >= 0.50 + ] + + if not viable_empty_blocks.empty: + project_code_lookup = viable_empty_blocks[["Block Reference"]].copy() + self.standardised_asset_list = self.standardised_asset_list.merge( + project_code_lookup, how="left", left_on=self.STANDARD_BLOCK_REFERENCE, right_on="Block Reference" ) + self.standardised_asset_list["project_code"] = np.where( + ~pd.isnull(self.standardised_asset_list["Block Reference"]), + self.standardised_asset_list["Block Reference"], + self.standardised_asset_list["project_code"] + ) + self.standardised_asset_list = self.standardised_asset_list.drop(columns=["Block Reference"]) - flat_data = pd.DataFrame(flat_data) + def analyse_geographies(self): + cavity_programme = ( + self.standardised_asset_list[["domna_postcode", "cavity_reason"]] + .groupby(["domna_postcode"])["cavity_reason"] + .count() + .reset_index() + ) + solar_programme = ( + self.standardised_asset_list[["domna_postcode", "solar_reason"]] + .groupby(["domna_postcode"])["solar_reason"] + .count() + .reset_index() + ) + postcodes = ( + self.standardised_asset_list[["domna_postcode", "landlord_property_id"]] + .groupby("domna_postcode")["landlord_property_id"] + .count() + .reset_index() + .rename(columns={"landlord_property_id": "n_properties"}) + ) + geographical_areas = postcodes.merge(cavity_programme, how="left", on="domna_postcode").merge( + solar_programme, how="left", on="domna_postcode" + ).fillna(0) + geographical_areas["coverage"] = ( + ( + geographical_areas["solar_reason"] + geographical_areas["cavity_reason"] + ) / geographical_areas["n_properties"] * 100 + ) - self.flat_data = flat_data + geographical_areas = geographical_areas.sort_values("coverage", ascending=False) + self.geographical_areas = geographical_areas + + def block_analysis(self): + + # Reverse mapping: label -> enum + LABEL_TO_ENUM = {e.label: e for e in hubspot_config.HubspotProcessStatus} + + # Threshold status - anything that is at this stage or beyond is considered surveyed + threshold = hubspot_config.HubspotProcessStatus.SURVEYED_COMPLETED_SIGNED_OFF.value + + block_analysis = [] + for block_reference, group in self.standardised_asset_list.groupby(self.STANDARD_BLOCK_REFERENCE): + + cavity_breakdown = group["cavity_reason"].fillna("No Eligibility").value_counts(normalize=True) * 100 + + if all(cavity_breakdown.index == "No Eligibility"): + continue + + # We check the % of empty vs not empty as right now, we're focused on empty + n_empties = ( + (group["identified_empty_cavity"] == True) & + (~pd.isnull(group["cavity_reason"])) & + (~group["cavity_reason"].str.contains("(unlikely to quality)", case=False, na=False, regex=False)) + ).sum() + + works = group["hubspot_status"] + above_threshold = works.map(LABEL_TO_ENUM.get).dropna() + count_above = (above_threshold >= threshold).sum() + proportion_surveyed = count_above / len(works) + proportion_empty = n_empties / len(works) + # We auto-populate any blocks that have greater than 50% proportion empty + + block_analysis.append( + { + "Block Reference": block_reference, + "Proportion of properties suryeyed": proportion_surveyed, + "Percentage of Empties": proportion_empty, + **cavity_breakdown.to_dict(), + } + ) + + block_analysis = pd.DataFrame(block_analysis) + block_analysis = block_analysis.fillna(0) + + # We flag which properties are eligible for works. We need at least 50% + block_analysis["Eligible for Works"] = ( + block_analysis["Percentage of Empties"] >= 0.50 + ) + block_analysis = block_analysis.sort_values("Percentage of Empties", ascending=False) + + # For properties that are NOT eligible, we should update the cavity reason + ineligible_blocks = block_analysis[ + ~block_analysis["Eligible for Works"] + ]["Block Reference"].values + + eligible_blocks = block_analysis[ + block_analysis["Eligible for Works"] + ]["Block Reference"].values + + self.standardised_asset_list["cavity_reason"] = np.where( + self.standardised_asset_list[self.STANDARD_BLOCK_REFERENCE].isin(ineligible_blocks), + self.standardised_asset_list["cavity_reason"] + " (Flat in block with less than 50% eligible)", + self.standardised_asset_list["cavity_reason"] + ) + + # if the property is in a block of flats that eligible, but the property itself is not eligible, we flag this + # The criteria is: + # =The property should be in a block of flats + + self.standardised_asset_list["cavity_reason"] = np.where( + self.standardised_asset_list[self.STANDARD_BLOCK_REFERENCE].isin(eligible_blocks), + self.standardised_asset_list["cavity_reason"] + + " " + "(Flat in block with more than 50% eligible, but not eligible itself)", + self.standardised_asset_list["cavity_reason"] + ) + + self.block_analysis_df = block_analysis @staticmethod def split_full_name(x): @@ -1970,6 +2348,8 @@ class AssetList: sheet_name, landlord_property_id, phone_number_column=None, + secondary_phone_number_column=None, + secondary_contact_full_name=None, email_column=None, fullname_column=None, firstname_column=None, @@ -1979,6 +2359,8 @@ class AssetList: self.contact_detail_fields = { "landlord_property_id": landlord_property_id, "phone_number": phone_number_column, + "secondary_phone_number": secondary_phone_number_column, + "secondary_contact_full_name": secondary_contact_full_name, "email": email_column, "fullname": fullname_column, "firstname": firstname_column, @@ -1986,7 +2368,8 @@ class AssetList: } details_colnames = [ - phone_number_column, email_column, fullname_column, firstname_column, lastname_column + phone_number_column, secondary_phone_number_column, email_column, fullname_column, firstname_column, + lastname_column ] # We'll fill them none_details = [x for x in details_colnames if x is None] @@ -2007,68 +2390,113 @@ class AssetList: *contact_details[fullname_column].apply(self.split_full_name) ) else: - raise NotImplementedError("Implement me") + contact_details["title"] = None self.contact_details = contact_details - def prepare_for_crm(self, company_domain, crm_pipeline_name, first_dealstage, assigned_surveyors): + @classmethod + def load_standardised_asset_list(cls, filepath, sheet_name, header): """ - This function prepares the data for upload into Hubspot + This function is designed to load the standardised asset list from a file :return: """ # This is a placeholder for now + # instantiate the class + instance = cls( + local_filepath=filepath, + sheet_name=sheet_name, + address1_colname=cls.STANDARD_ADDRESS_1, + postcode_colname=cls.STANDARD_POSTCODE, + full_address_colname=cls.STANDARD_FULL_ADDRESS, + landlord_property_id=cls.STANDARD_LANDLORD_PROPERTY_ID, + full_address_cols_to_concat=[], + missing_postcodes_method=None, + address1_extraction_method=None, + landlord_year_built=cls.STANDARD_YEAR_BUILT, + landlord_uprn=cls.STANDARD_UPRN, + landlord_property_type=cls.STANDARD_PROPERTY_TYPE, + landlord_built_form=cls.STANDARD_BUILT_FORM, + landlord_wall_construction=cls.STANDARD_WALL_CONSTRUCTION, + landlord_roof_construction=cls.STANDARD_ROOF_CONSTRUCTION, + landlord_heating_system=cls.STANDARD_HEATING_SYSTEM, + landlord_existing_pv=cls.STANDARD_EXISTING_PV, + landlord_sap=cls.STANDARD_SAP, + landlord_block_reference=cls.STANDARD_BLOCK_REFERENCE, + phase=False, + header=header + ) + return instance + def prepare_for_crm(self, company_domain, installer_name, reconcile_programme=False): + """ + This function prepares the data for upload into Hubspot + :param company_domain: The company domain name to be used in the CRM + :param installer_name: The name of the installer to be used in the CRM + :param reconcile_programme: If True, will include all properties with a project code, regardless of status + :raises ValueError: If the installer name is not valid or if there are missing products + :return: + """ # This maps the opportunities as we reference them, to the product data as stored in Hubspot - product_lookup_table = { - "Non-Intrusive Data Showed Cavity Extraction": { - "name": "Extract & Fill - ECO4", "id": 100307905778, "unit_price": 500 - }, - "Non-Intrusive Data Showed Empty Cavity": { - "name": "Empty Cavity & Loft - ECO4", "id": 82733738177, "unit_price": 1000 - }, - "Non-Intrusive Data Showed Empty Cavity but all SAP scores allowed": { - "name": "Empty Cavity & Loft - ECO4", "id": 82733738177, "unit_price": 1000 - }, - "Non-Intrusive Data Showed Cavity Extraction but all SAP scores allowed": { - "name": "Extract & Fill - ECO4", "id": 100307905778, "unit_price": 500 - }, - "EPC Data Showed Empty Cavity": { - "name": "Empty Cavity & Loft - ECO4", "id": 82733738177, "unit_price": 1000 - }, - "Solid Floor, Insulated, No Solar": { - "name": "Solar PV - ECO4", "id": 82623589564, "unit_price": 1608 - }, - "Solid Floor, Insulated, Needs Loft": { - "name": "Solar PV - ECO4", "id": 82623589564, "unit_price": 1608 - }, - "Other Floor, Insulated, No Solar": { - "name": "Solar PV - ECO4", "id": 82623589564, "unit_price": 1608 - }, - "Other Floor, Insulated, Needs Loft": { - "name": "Solar PV - ECO4", "id": 82623589564, "unit_price": 1608 - } - } + if not hubspot_config.Installer.is_valid_value(installer_name): + raise ValueError(f"Installer name {installer_name} is not valid. Please check the installer name.") + # We check if all products are covered in the lookup table - cavity_products = self.standardised_asset_list["cavity_reason"].unique() - solar_products = self.standardised_asset_list["solar_reason"].unique() - # Check if there any options not in out lookup table - if ( - any(x for x in cavity_products if x not in product_lookup_table) or - any(x for x in solar_products if x not in product_lookup_table) - ): - raise ValueError("We have products not referenced in the lookup table - check this") + cavity_products = self.standardised_asset_list["cavity_reason"].unique().tolist() + cavity_products = [x for x in cavity_products if not pd.isnull(x)] + solar_products = self.standardised_asset_list["solar_reason"].unique().tolist() + solar_products = [x for x in solar_products if not pd.isnull(x)] + + product_map = {} + for identified_product in cavity_products + solar_products: + if pd.isnull(identified_product): + continue + + matched_product = None + for product_prefix, crm_product in self.prefixes_to_products.items(): + if identified_product.startswith(product_prefix): + matched_product = crm_product + + product_map[identified_product] = matched_product + + # For each cavity and solar product, we iterate through the prexies and map to the products programme_data = self.standardised_asset_list.copy() + programme_data["domna_full_address"] = ( + programme_data["domna_full_address"].str.replace(";", ", ", regex=False).str.replace(" ", "") + ) - # Exclusions - these are properties we won't treat for the moment - product_exclusions = [ - "Other Floor, Insulated, No Solar", - "Other Floor, Insulated, Needs Loft" - ] - if product_exclusions: - logger.warning("Excluding products: %s", product_exclusions) + # Format the two date columns + programme_data["survey_date"] = pd.to_datetime(programme_data["survey_date"], errors="coerce") + programme_data[self.EPC_API_DATA_NAMES["inspection-date"]] = pd.to_datetime( + programme_data[self.EPC_API_DATA_NAMES["inspection-date"]], + errors="coerce" + ) + # Convert to dd/mm/yyyy format + programme_data["survey_date"] = programme_data["survey_date"].dt.strftime("%d/%m/%Y") + programme_data[self.EPC_API_DATA_NAMES["inspection-date"]] = ( + programme_data[self.EPC_API_DATA_NAMES["inspection-date"]].dt.strftime("%d/%m/%Y") + ) - programme_data = programme_data[programme_data["solar_reason"].isin(product_exclusions) == False] + # We take rows that have a survyor and a date for the survey + # We include properties under 2 circumstances: + # 1) The hubspot status is ready to be scheduled and there is an assigned surveyor and week for survey + # 2) The hubspot status is something else, meaning this has been included in an existing programme + # 3) reconcile programme is true, and therefore all proeprties with a project code will be included + + if reconcile_programme: + programme_data = programme_data[~pd.isnull(programme_data["project_code"])] + else: + ready_to_be_scheduled = ( + ( + programme_data["hubspot_status"] == hubspot_config.HubspotProcessStatus.READY_TO_BE_SCHEDULED.label + ) & (~pd.isnull(programme_data["survey_date"])) + ) + # completed_works = ( + # (programme_data["hubspot_status"] != + # hubspot_config.HubspotProcessStatus.READY_TO_BE_SCHEDULED.label) & + # (~pd.isnull(programme_data["hubspot_status"])) + # ) + programme_data = programme_data[ready_to_be_scheduled] # Merge on the contact details programme_data = programme_data.merge( @@ -2081,26 +2509,47 @@ class AssetList: programme_data["Company Domain Name "] = company_domain # Append the product data onto the programme data programme_data["cavity_product"] = programme_data["cavity_reason"].map( - lambda x: product_lookup_table.get(x, {"name": None})["name"] + lambda x: product_map.get(x, {"name": None})["name"] ) programme_data["solar_product"] = programme_data["solar_reason"].map( - lambda x: product_lookup_table.get(x, {"name": None})["name"] + lambda x: product_map.get(x, {"name": None})["name"] ) - programme_data["domna_product"] = programme_data["solar_reason"].copy() + # We check if we have any missings + cavity_missing = pd.isnull(programme_data[~pd.isnull(programme_data["cavity_reason"])]["cavity_product"]).sum() + solar_missing = pd.isnull(programme_data[~pd.isnull(programme_data["solar_reason"])]["solar_product"]).sum() + + if cavity_missing > 0 or solar_missing > 0: + raise ValueError( + f"We have {cavity_missing} cavity products and {solar_missing} solar products that are not " + "mapped to a product in the lookup table. Please check the mapping." + ) + + programme_data["domna_product"] = programme_data["solar_product"].copy() programme_data["domna_product"] = np.where( pd.isnull(programme_data["domna_product"]), - programme_data["solar_product"], + programme_data["cavity_product"], programme_data["domna_product"] ) # We filter just on rows where we have a product - programme_data = programme_data[ - ~pd.isnull(programme_data["domna_product"]) - ] - programme_data = programme_data.drop(columns=["solar_product", "cavity_product"]) + if reconcile_programme: + # We include historical works, which will include hisorical cavity so we set these as extraction (as + # this is the main work mix) + programme_data["domna_product"] = programme_data["domna_product"].fillna( + self.CRM_HISTORICAL_CAVITY_PRODUCT["name"] + ) + else: + # We shouldn't have any missing products + programme_data = programme_data[ + ~pd.isnull(programme_data["survey_date"]) + ] + + if pd.isnull(programme_data["domna_product"]).sum(): + raise ValueError("Missing products") + programme_data = programme_data.drop(columns=["solar_product", "cavity_product"]) product_df = ( - pd.DataFrame(product_lookup_table).T[["name", "id", "unit_price"]] + pd.DataFrame(self.CRM_PRODUCTS).T[["name", "id", "unit_price"]] .reset_index() .rename( columns={ @@ -2115,28 +2564,98 @@ class AssetList: product_df['Quantity '] = 1 # Append on the product data - programme_data = programme_data.merge( - product_df, - how="left", - on="domna_product", - ) + programme_data = programme_data.merge(product_df, how="left", on="domna_product") # Add in deal and pipeline information - programme_data["dealname"] = programme_data[self.STANDARD_FULL_ADDRESS] + " : " + programme_data[ - "domna_product"] - programme_data['Pipeline '] = crm_pipeline_name - programme_data['Deal Stage '] = first_dealstage + programme_data["dealname"] = ( + programme_data[self.STANDARD_FULL_ADDRESS] + " : " + programme_data["domna_product"] + ) + programme_data['Pipeline '] = hubspot_config.CRM_PIPELINE_NAME programme_data['Associations: Listing'] = "Property Owner" - programme_data = programme_data.merge( - assigned_surveyors.rename( - columns={self.landlord_property_id: self.STANDARD_LANDLORD_PROPERTY_ID} - ), how="left", on=self.STANDARD_LANDLORD_PROPERTY_ID + # We determine which column we should use for the UPRN + if self.STANDARD_UPRN not in programme_data.columns: + uprn_column = self.EPC_API_DATA_NAMES["uprn"] + # If we're working form the EPC, we don't have this information if the EPC is estimated + programme_data[uprn_column] = np.where( + programme_data["estimated"] == True, None, programme_data[uprn_column] + ) + else: + # Use the value that has the most coverage + uprn_column = "hubspot_uprn" + programme_data[uprn_column] = programme_data[self.STANDARD_UPRN].fillna( + programme_data[self.EPC_API_DATA_NAMES["uprn"]] + ) + + # Add in some columns if we have them + date_of_inspections = ( + "Non-Intrusives: Date of Inspection" if + "Non-Intrusives: Date of Inspection" in programme_data.columns else None ) + # Ammend the property type and built form columns + programme_data["hubspot_property_type"] = programme_data[self.STANDARD_PROPERTY_TYPE].copy() + programme_data["hubspot_built_form"] = programme_data[self.STANDARD_BUILT_FORM].copy() + + def _replace_property_description_data(programme_data, column_name): + """ + Helper function to replace property type or built form data with a specified value. + """ + + if column_name == "hubspot_property_type": + valid_values = ["house", "bungalow", "flat", "maisonette"] + epc_fill_col = "property-type" + elif column_name == "hubspot_built_form": + valid_values = ["detached", "semi-detached", "mid-terrace", "end-terrace"] + epc_fill_col = "built-form" + else: + raise ValueError(f"Invalid column name: {column_name}. Must be 'hubspot_property_type' or " + f"'hubspot_built_form'.") + + # Any vakue that is not house, bungalow, flat or maisonette is set to None + programme_data[column_name] = np.where( + ~programme_data[column_name].isin(valid_values), + None, + programme_data[column_name] + ) + # We fill with the EPC property type + programme_data[column_name] = np.where( + pd.isnull(programme_data[column_name]), + programme_data[self.EPC_API_DATA_NAMES[epc_fill_col]], + programme_data[column_name] + ) + + programme_data[column_name] = programme_data[column_name].fillna("unknown") + + return programme_data + + # Clean up the property type and built form columns + programme_data = _replace_property_description_data(programme_data, "hubspot_property_type") + programme_data = _replace_property_description_data(programme_data, "hubspot_built_form") + + # We accomodate the old vs new inspections format + if "non-intrusives: WFT Findings" in programme_data.columns: + # We have the old format - we only have notes + non_intrusives_surveyor_notes = "non-intrusives: WFT Findings" + non_intrusives_construction = None + non_intrusives_insulated = None + non_intrusives_insulation_material = None + non_intrusives_ciga_check_required = None + non_intrusives_pv_access = None + non_intrusives_roof_orientation = None + non_intrusives_surveyor_name = None + else: + non_intrusives_surveyor_notes = 'non-intrusives: Any further surveyor notes' + non_intrusives_construction = "non-intrusives: Construction" + non_intrusives_insulated = "non-intrusives: Insulated" + non_intrusives_insulation_material = "non-intrusives: Material" + non_intrusives_ciga_check_required = 'non-intrusives: CIGA Check Required' + non_intrusives_pv_access = 'non-intrusives: PV, ACCESS ISSUE, SEE NOTES' + non_intrusives_roof_orientation = 'non-intrusives: OFF GAS - ROOF ORIENTATION' + non_intrusives_surveyor_name = 'non-intrusives: Surveyors Name' + # This maps the hubspot schema to the template. Anything that is not covered in this will be flagged schema_mappings = { - 'Name ': self.DOMNA_PROPERTY_ID, # TODO: Maybe change this? 'Company Domain Name ': 'Company Domain Name ', 'Email ': ( self.contact_detail_fields["email"] if self.contact_detail_fields["email"] else None @@ -2150,49 +2669,42 @@ class AssetList: 'Phone ': ( self.contact_detail_fields["phone_number"] if self.contact_detail_fields["phone_number"] else None ), # TODO: Review + 'Secondary Phone ': ( + self.contact_detail_fields["secondary_phone_number"] if + self.contact_detail_fields["secondary_phone_number"] else None + ), + "Secondary Contact Full Name ": ( + self.contact_detail_fields["secondary_contact_full_name"] if + self.contact_detail_fields["secondary_contact_full_name"] else None + ), 'Full Address ': self.STANDARD_FULL_ADDRESS, 'Address 1 ': self.STANDARD_ADDRESS_1, 'Address 2 ': None, # TODO: Don't have this for the moment 'Postcode ': self.STANDARD_POSTCODE, - 'Property Type ': self.STANDARD_PROPERTY_TYPE, - 'Property Sub Type ': None, # TODO: Don't have this for the moment + 'Property Type ': "hubspot_property_type", + 'Property Sub Type ': "hubspot_built_form", 'Bedroom(s) ': None, # TODO: Don't have this for the moment 'Domna Property ID ': self.DOMNA_PROPERTY_ID, - 'National UPRN ': ( - self.STANDARD_UPRN if self.STANDARD_UPRN is not None else self.EPC_API_DATA_NAMES["uprn"] - ), + # We populate this with the column that we have + 'National UPRN ': uprn_column, 'Owner Property ID ': self.STANDARD_LANDLORD_PROPERTY_ID, 'Wall Construction ': self.STANDARD_WALL_CONSTRUCTION, 'Heating System ': self.STANDARD_HEATING_SYSTEM, 'Year Built ': self.STANDARD_YEAR_BUILT, 'Boiler Make ': None, # TODO: Don't have this for the moment 'Boiler Model ': None, # TODO: Don't have this for the moment - 'Non-Intrusives: Date Checked ': None, - # TODO: Don't have this for the moment - 'Non-Intrusives: Wall Type ': ( - "non-intrusives: Construction" if self.non_intrusives_present else None - ), - 'Non-intrusives: Insulation ': ( - "non-intrusives: Insulated" if self.non_intrusives_present else None - ), - 'Non-intrusives: Insulation Material ': ( - "non-intrusives: Material" if self.non_intrusives_present else None - ), - 'Non-Intrusives: CIGA Check Required ': ( - 'non-intrusives: CIGA Check Required' if self.non_intrusives_present else None - ), - 'Non-Intrusives: PV Access Issues ': ( - 'non-intrusives: PV, ACCESS ISSUE, SEE NOTES' if self.non_intrusives_present else None - ), - 'Non-Intrusives: Roof Orientation ': ( - 'non-intrusives: OFF GAS - ROOF ORIENTATION' if self.non_intrusives_present else None - ), - 'Non-Intrusives: Surveyor Notes ': ( - 'non-intrusives: Any further surveyor notes' if self.non_intrusives_present else None - ), - 'Non-Intrusives: Surveyor Name ': ( - 'non-intrusives: Surveyors Name' if self.non_intrusives_present else None - ), + 'Non-Intrusives: Date Checked ': date_of_inspections, + 'Non-Intrusives: Wall Type ': non_intrusives_construction, + 'Non-intrusives: Insulation ': non_intrusives_insulated, + 'Non-intrusives: Insulation Material ': + non_intrusives_insulation_material, + 'Non-Intrusives: CIGA Check Required ': + non_intrusives_ciga_check_required, + 'Non-Intrusives: PV Access Issues ': non_intrusives_pv_access, + 'Non-Intrusives: Roof Orientation ': + non_intrusives_roof_orientation, + 'Non-Intrusives: Surveyor Notes ': non_intrusives_surveyor_notes, + 'Non-Intrusives: Surveyor Name ': non_intrusives_surveyor_name, 'CIGA: Date Requested ': None, # TODO: Don't have this for the moment 'CIGA: Cavity Guarantee Found ': None, 'Last EPC: Is Estimated ': self.EPC_API_DATA_NAMES["estimated"], @@ -2209,18 +2721,24 @@ class AssetList: 'Last EPC: Floor ': self.EPC_API_DATA_NAMES["floor-description"], 'Last EPC: Room Height ': self.EPC_API_DATA_NAMES["floor-height"], 'Last EPC: Age Band ': self.EPC_API_DATA_NAMES["construction-age-band"], - 'Deal Stage ': 'Deal Stage ', 'Pipeline ': 'Pipeline ', - 'Expected Commencement Date ': None, # TODO: Need to set this, + 'Expected Commencement Date ': "survey_date", 'Deal Name ': "dealname", # Need to create this, 'Product ID ': 'Product ID ', 'Name ': 'Name ', 'Unit price ': 'Unit price ', 'Quantity ': 'Quantity ', - 'Deal Owner': 'surveyor_email', - 'Amount ': 'Unit price ', + 'Deal Owner': 'surveyor', + 'Project Code ': 'project_code', + 'Associations: Listing': 'Associations: Listing', + 'Deal Stage ': "hubspot_status", } + # We sometimes columns if the landlord never provided them + missed_mapping_cols = [c for c in schema_mappings.values() if c not in programme_data.columns if c is not None] + for c in missed_mapping_cols: + programme_data[c] = None + # We now create the finalised dataset to be uploaded into Hubspot variables_required = list(schema_mappings.values()) variables_required = [v for v in variables_required if v is not None] @@ -2235,6 +2753,27 @@ class AssetList: columns={v: k for k, v in schema_mappings.items() if v is not None} ) + programme_data['Installer '] = installer_name + programme_data['Name '] = ( + programme_data['Full Address '] + " ," + programme_data['Postcode '] + ) + # The listing owner email is the same as the surveyor email (deal owner), so they can see the listing + programme_data['Listing Owner Email '] = programme_data['Deal Owner'] + programme_data['Amount '] = 0 + programme_data["Deal Owner"] = np.where( + ~pd.isnull(programme_data["Deal Owner"]), + programme_data["Deal Owner"].astype(str).str.lower(), + programme_data["Deal Owner"] + ) + + # We make sure we have all of the columns that we need + missed_columns = [c for c in hubspot_config.CRM_UPLOAD_COLUMNS if c not in programme_data.columns] + if missed_columns: + raise ValueError( + f"We have the following columns that are not in the programme data: {missed_columns}. " + "Please check the mapping and ensure all required columns are present." + ) + self.hubspot_data = programme_data def flag_ecosurv(self, ecosurv_landlords=None, landlords_to_ignore=None): @@ -2324,13 +2863,63 @@ class AssetList: logger.info("Matched %s properties to ecosurv data", len(matched)) logger.info("%s properties in Ecosurv remain unmatched", len(unmatched)) - # We now match + if not matched: + return + + # We now match matched = pd.DataFrame(matched) # We'll possibly have duplicates here, where properties have been sold twice. Ww de-dupe if matched[self.STANDARD_LANDLORD_PROPERTY_ID].duplicated().sum(): # It doesn't matter too much which record we take matched = matched.drop_duplicates(subset=[self.STANDARD_LANDLORD_PROPERTY_ID]) + # We merge on the status of the property + matched = matched.merge( + self.ecosurv[["Reference", "Status", "Lead Status", "Tags"]].rename( + columns={ + "Reference": "ecosurv_reference", + "Status": "ecosurv_status", + "Lead Status": "ecosurv_lead_status", + "Tags": "ecosurv_tags", + "Installer": "ecosurv_installer" + } + ), how="left", on="ecosurv_reference" + ) + + matched["ecosurv_install_status"] = hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER + + # This mapping is ordered by process order, where lodgment is the final step so if we have an indication + # that the property is ready for lodgement, we set the status to that. We then proceed through the other + # statuses where the penultimate status is install complete + mapping = { + "Cancelled": hubspot_config.HubspotProcessStatus.INSTALLER_CANCELLED_FINALIZED, + "TrustMark: Lodged": hubspot_config.HubspotProcessStatus.LODGEMENT_COMPLETE, + "Retrofit: Complete": hubspot_config.HubspotProcessStatus.INSTALL_COMPLETE, + "Retrofit: Awaiting TrustMark": hubspot_config.HubspotProcessStatus.INSTALL_COMPLETE, + "Retrofit: Awaiting post checks": hubspot_config.HubspotProcessStatus.INSTALL_COMPLETE, + "Installer Notification Sent": hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER, + "Submitted to RC": hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER, + "COONEY": hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER, + "Signed off for install": hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER, + "Retrofit: Signed off for install": hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER, + "Audit": hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER, + "Accepted": hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER, + "Sold": hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER + } + + def get_max_status(tag_str): + if pd.isna(tag_str): + return None + matched_statuses = [] + for tag, status in mapping.items(): + if tag in tag_str: + matched_statuses.append(status) + if not matched_statuses: + return None + return max(matched_statuses).label + + matched["ecosurv_install_status"] = matched["ecosurv_tags"].apply(get_max_status) + self.standardised_asset_list = self.standardised_asset_list.merge( matched, how="left", @@ -2380,7 +2969,7 @@ class AssetList: # Perform the remap outcomes["Outcome"] = outcomes["Notes / Outcomes"].map(remap_dictionary) - outcomes["Outcome"] = outcomes["Outcome"].str.lower() + outcomes["Outcome"] = outcomes["Outcome"].str.lower().str.strip() logger.info("Matching outcomes to asset list") # Merge the outcomes onto the asset list - we check we're able to match sufficiently well @@ -2507,7 +3096,7 @@ class AssetList: else: raise NotImplementedError("Invalid date in outcomes - implement me") - notes_col = "Notes" if "Notes" in outcomes.columns else "Notes / Outcomes" + notes_col = "Notes" if "Notes" in self.outcomes.columns else "Notes / Outcomes" lookup = lookup.merge( self.outcomes[["row_id", "Outcome", notes_col, date_col]], how="left", on="row_id" @@ -2542,12 +3131,13 @@ class AssetList: apply(get_latest_note). reset_index(drop=True) ) - latest_note = latest_note[["domna_property_id", notes_col]] + latest_note = latest_note[["domna_property_id", notes_col, "Outcome"]].rename( + columns={"Notes": "latest_outcome_note", "Outcome": "latest_outcome"} + ) pivot_df = lookup.groupby(["domna_property_id", "Outcome"]).size().unstack(fill_value=0).reset_index() - pivot_df = pivot_df.merge( - visit_counts, how="left", on="domna_property_id" - ) + pivot_df = pivot_df.merge(visit_counts, how="left", on="domna_property_id") + pivot_df = pivot_df.merge(latest_note, how="left", on="domna_property_id") # We want the latest note @@ -2558,15 +3148,32 @@ class AssetList: self.outcomes["matched_to_asset_list"] = self.outcomes["row_id"].isin(lookup["row_id"].values) self.outcomes = self.outcomes.merge(lookup[["row_id", "domna_property_id"]], how="left", on="row_id") + # We flag the outcome status, based on the outcome + pivot_df["outcome_status"] = None + + if "surveyed" in pivot_df.columns: + pivot_df["outcome_status"] = np.where( + pivot_df["surveyed"] > 0, hubspot_config.HubspotProcessStatus.SURVEYED_COMPLETED_SIGNED_OFF.label, + pivot_df["outcome_status"] + ) + + if "installer refusal" in pivot_df.columns: + pivot_df["outcome_status"] = np.where( + pivot_df["installer refusal"] > 0, hubspot_config.HubspotProcessStatus.NOT_VIABLE.label, + pivot_df["outcome_status"] + ) + + pivot_df["outcome_status"] = np.where( + pivot_df["latest_outcome"].isin(["see notes"]) & + (pivot_df["outcome_status"] != hubspot_config.HubspotProcessStatus.SURVEYED_COMPLETED_SIGNED_OFF.label), + hubspot_config.HubspotProcessStatus.SURVEYED_NO_ACCESS_NEEDS_SIGN_OFF.label, + pivot_df["outcome_status"] + ) + # We merge out pivoted outcomes onto the asset list self.standardised_asset_list = self.standardised_asset_list.merge( pivot_df, how="left", left_on=self.DOMNA_PROPERTY_ID, right_on="domna_property_id" ) - # Merge the latest note - self.standardised_asset_list = self.standardised_asset_list.merge( - latest_note.rename(columns={notes_col: "Latest Route March Note"}), - how="left", left_on=self.DOMNA_PROPERTY_ID, right_on="domna_property_id" - ) if self.standardised_asset_list[self.DOMNA_PROPERTY_ID].duplicated().sum(): raise ValueError("Duplicates appreared - something went wrong") @@ -2576,6 +3183,7 @@ class AssetList: def flag_survey_master( self, master_filepaths, + master_id_colnames, master_to_asset_list_filepath=None ): # TODO: This probably needs further expansion @@ -2591,7 +3199,7 @@ class AssetList: logger.info("Getting masters and merging onto asset list") master_surveyed = [] unmatched_submissions = [] - for filepath in master_filepaths: + for idx, filepath in enumerate(master_filepaths): master_data = pd.read_csv(filepath) # Strip columns master_data.columns = [c.strip() for c in master_data.columns] @@ -2618,22 +3226,6 @@ class AssetList: "SUBMISSION DATE" if "SUBMISSION DATE" in master_data.columns else "SUBMISSION DATE TO INSTALLERS" ) - # if "UPRN" in master_data.columns: - # # We just need to check if any were cancelled - # master_to_append = master_data[ - # ["UPRN", install_col, submission_col] - # ].rename( - # columns={ - # "UPRN": self.STANDARD_LANDLORD_PROPERTY_ID, - # install_col: "survey_status", - # submission_col: "submission_date" - # } - # ) - # master_to_append["cancelled"] = master_to_append["survey_status"].str.lower().str.contains("cancel") - # - # master_surveyed.append(master_to_append) - # continue - master_data["row_id"] = master_data.index self.standardised_asset_list["house_no"] = self.standardised_asset_list.apply( @@ -2643,21 +3235,33 @@ class AssetList: axis=1 ) - scheme_col = ( - "AFFORDABLE WARMTH OR EPC FOR HOUSING ASSOCIATION" if - "AFFORDABLE WARMTH OR EPC FOR HOUSING ASSOCIATION" in master_data.columns else "AFFORDABLE WARMTH" - ) - postcode_col = "POSTCODE" if "POSTCODE" in master_data.columns else "Post Code" - house_no_col = 'NO.' if 'NO.' in master_data.columns else "NO" - property_type_col = ( - "PROPERTY TYPE As per table emailed" if - "PROPERTY TYPE As per table emailed" in - master_data.columns else "PROPERTY TYPE As per table emailed" - ) - measure_mix_col = "MEASURE COMBO" + if "AFFORDABLE WARMTH OR EPC FOR HOUSING ASSOCIATION" in master_data.columns: + scheme_col = "AFFORDABLE WARMTH OR EPC FOR HOUSING ASSOCIATION" + elif "AFFORDABLE WARMTH" in master_data.columns: + scheme_col = "AFFORDABLE WARMTH" + else: + scheme_col = "OFFICE USE ONLY" + + postcode_col = "POSTCODE" if "POSTCODE" in master_data.columns else "Post Code" + if 'NO.' in master_data.columns: + house_no_col = 'NO.' + elif "NO" in master_data.columns: + house_no_col = 'NO' + else: + house_no_col = "NUMBER" + + if "PROPERTY TYPE As per table emailed" in master_data.columns: + property_type_col = "PROPERTY TYPE As per table emailed" + elif "PROPERTY TYPE As per table emailed" in master_data.columns: + property_type_col = "PROPERTY TYPE As per table emailed" + else: + property_type_col = "PROPERTY TYPE (SEE DEEMED SCORES SHEET) Eg. 3W_Flat_1 (As per Matrix)" + + measure_mix_col = "MEASURE COMBO" + installer_notes_col = "INSTALLERS NOTES ; REASONS FOR CANCELLATIONS" + installer_col = "INSTALLER" + town_colname = "TOWN" if "TOWN" in master_data.columns else 'Town/Area' - # Otherwise, we need to match algorithmically - has_property_id = "UPRN" in master_data.columns logger.info("Matching master data to asset list") matched = [] unmatched = [] @@ -2670,13 +3274,22 @@ class AssetList: if pd.isnull(row[postcode_col]): continue - # if has_property_id: - # submission_uprn = row["UPRN"] - # - # if not pd.isnull(submission_uprn): - # df = self.standardised_asset_list[ - # self.standardised_asset_list[self.STANDARD_LANDLORD_PROPERTY_ID] == submission_uprn - # ] + if master_id_colnames[idx] is not None: + # Filter the standardised asset list on this + df = self.standardised_asset_list[ + self.standardised_asset_list[self.STANDARD_LANDLORD_PROPERTY_ID] == row[master_id_colnames[idx]] + ] + if df.shape[0] == 1: + matched.append( + { + "row_id": row["row_id"], + "original_house_no": original_house_no, + "original_street": original_street, + "original_postcode": original_postcode, + self.STANDARD_LANDLORD_PROPERTY_ID: df[self.STANDARD_LANDLORD_PROPERTY_ID].values[0], + } + ) + continue postcode_no_space = row[postcode_col].strip().replace(" ", "").lower() @@ -2721,6 +3334,7 @@ class AssetList: self.STANDARD_LANDLORD_PROPERTY_ID: df[self.STANDARD_LANDLORD_PROPERTY_ID].values[0], } ) + continue if house_no in df["house_no"].values: df = df[df["house_no"] == house_no] @@ -2736,7 +3350,8 @@ class AssetList: df = df[ df[self.STANDARD_FULL_ADDRESS].str.lower().apply( lambda x: process.extractOne( - " ".join([row[house_no_col], row["Street / Block Name"], row["TOWN"]]).lower(), + " ".join( + [row[house_no_col], row["Street / Block Name"], row[town_colname]]).lower(), x )[1] ) > 90 @@ -2781,18 +3396,31 @@ class AssetList: self.standardised_asset_list = self.standardised_asset_list.drop(columns="house_no") # We match the "UPRN" which is the landlords ID, onto the master sheet + + if measure_mix_col not in master_data.columns: + master_data[measure_mix_col] = "Measure mix not recorded" + matched = pd.DataFrame(matched) - master_to_append = master_data[[scheme_col, "row_id", install_col, submission_col, measure_mix_col]].merge( + master_to_append = master_data[ + [scheme_col, "row_id", install_col, submission_col, measure_mix_col, installer_notes_col, installer_col] + ].merge( matched, how="left", on="row_id" ).rename( columns={ scheme_col: "funding_scheme", measure_mix_col: "measure_mix", install_col: "survey_status", - submission_col: "submission_date" + submission_col: "submission_date", + installer_notes_col: "submission_installer_notes", + installer_col: "submission_installer" } ) - master_to_append["cancelled"] = master_to_append["survey_status"].str.lower().str.contains("cancel") + master_to_append["submission_cancelled"] = ( + master_to_append["survey_status"].str.lower().str.contains("cancel") + ) + master_to_append["submission_installed"] = ( + master_to_append["survey_status"].str.lower().str.contains("installed") + ) master_surveyed.append(master_to_append) unmatched_df = master_data[ master_data["row_id"].isin(unmatched) @@ -2828,7 +3456,21 @@ class AssetList: ].astype(str) # We de-dupe crudely on landlord property id - self.master_surveyed = master_surveyed.drop_duplicates(subset=[self.STANDARD_LANDLORD_PROPERTY_ID]) + self.master_surveyed = master_surveyed.drop_duplicates(subset=[self.STANDARD_LANDLORD_PROPERTY_ID]).copy() + + # We now add the submission status, based on the hubspot stages + self.master_surveyed["submission_status"] = hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER.label + self.master_surveyed["submission_status"] = np.where( + self.master_surveyed["submission_cancelled"] == True, + hubspot_config.HubspotProcessStatus.INSTALLER_CANCELLED_FINALIZED.label, + self.master_surveyed["submission_status"] + ) + + self.master_surveyed["submission_status"] = np.where( + self.master_surveyed["submission_installed"] == True, + hubspot_config.HubspotProcessStatus.INSTALL_COMPLETE.label, + self.master_surveyed["submission_status"] + ) self.standardised_asset_list = self.standardised_asset_list.merge( self.master_surveyed, how="left", on=self.STANDARD_LANDLORD_PROPERTY_ID diff --git a/asset_list/app.py b/asset_list/app.py index bb898c09..7c0023ce 100644 --- a/asset_list/app.py +++ b/asset_list/app.py @@ -2,8 +2,6 @@ import os import json import pandas as pd from pprint import pprint -import msgpack -from utils.s3 import read_from_s3 from asset_list.AssetList import AssetList from asset_list.mappings.property_type import PROPERTY_MAPPING from asset_list.mappings.built_form import BUILT_FORM_MAPPINGS @@ -62,98 +60,227 @@ def app(): Property UPRN """ - # Thurrock - data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thurrock" - data_filename = "THURROCK COUNCIL - For analysis.xlsx" - sheet_name = "Assets" + # NCHA + data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/NCHA" + data_filename = "Energy Information MASTER June 2025.xlsx" + sheet_name = "Data" postcode_column = 'Postcode' - fulladdress_column = "Full Address" + fulladdress_column = "Address" address1_column = None address1_method = "house_number_extraction" address_cols_to_concat = [] missing_postcodes_method = None - landlord_year_built = "Construction Date" + landlord_year_built = "Build Date (HAR10)" landlord_os_uprn = None - landlord_property_type = "Property Type" - landlord_built_form = "Property Subtype" - landlord_wall_construction = None + landlord_property_type = "Property Type (HAR10)" + landlord_built_form = "Build Form (EPC)" + landlord_wall_construction = "Wall Description" landlord_roof_construction = None - landlord_heating_system = "Main Heating Type" + landlord_heating_system = "HEAT Code" landlord_existing_pv = None - landlord_property_id = "Property Reference" - landlord_sap = None - outcomes_filename = [] - outcomes_sheetname = [] - outcomes_postcode = [] - outcomes_houseno = [] - outcomes_id = [] - outcomes_address = [] + landlord_property_id = "Place ref" + landlord_sap = "EPC SAP" + outcomes_filename = None + outcomes_sheetname = None + outcomes_postcode = None + outcomes_houseno = None + outcomes_id = None + outcomes_address = None master_filepaths = [] master_to_asset_list_filepath = None phase = False ecosurv_landlords = None + asset_list_header = 0 + landlord_block_reference = None + master_id_colnames = [] - # Medway - data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Medway" - data_filename = "MEDWAY Asset List.xlsx" - sheet_name = "Asset list" - postcode_column = 'Postcode' - fulladdress_column = None - address1_column = "House Number" - address1_method = None - address_cols_to_concat = ["House Number", "Street 1"] - missing_postcodes_method = None - landlord_year_built = "Year Built" - landlord_os_uprn = None - landlord_property_type = "Property Type - Academy" - landlord_built_form = "Property Type - Academy" - landlord_wall_construction = None - landlord_roof_construction = None - landlord_heating_system = None - landlord_existing_pv = None - landlord_property_id = "Row ID" - landlord_sap = None - outcomes_filename = [] - outcomes_sheetname = [] - outcomes_postcode = [] - outcomes_houseno = [] - outcomes_id = [] - outcomes_address = [] - master_filepaths = [] - master_to_asset_list_filepath = None - phase = False - ecosurv_landlords = None + # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Calico" + # data_filename = "07.04 CALICO - Final List.xlsx" + # asset_list_header = 2 + # sheet_name = "Final List" + # postcode_column = 'Postcode' + # fulladdress_column = None + # address1_column = "Property Number / Name" + # address1_method = None + # address_cols_to_concat = [ + # "Property Number / Name", + # "Street", + # "Town" + # ] + # missing_postcodes_method = None + # landlord_year_built = "NROSH Estimated Build Date" + # landlord_os_uprn = None + # landlord_property_type = "Asset Type" + # landlord_built_form = None + # landlord_wall_construction = "Wall Type" + # landlord_heating_system = "Boiler Type" + # landlord_existing_pv = None + # landlord_property_id = "Asset Reference" + # outcomes_filename = [] + # outcomes_sheetname = [] + # outcomes_postcode = [] + # outcomes_houseno = [] + # outcomes_id = [] + # outcomes_address = [] + # master_filepaths = [] + # master_id_colnames = [] + # master_to_asset_list_filepath = None + # landlord_roof_construction = None + # landlord_block_reference = None + # landlord_sap = "Current Efficiency Rating - Score" + # phase = None + # ecosurv_landlords = None - # MHS - data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS" - data_filename = "MHS HOMES (Full Asset List) - for programme build.xlsx" - sheet_name = "Sheet1" - postcode_column = 'Postcode' - fulladdress_column = "FullAddress" - address1_column = None - address1_method = "house_number_extraction" - address_cols_to_concat = [] - missing_postcodes_method = None - landlord_year_built = "BuiltInYear" - landlord_os_uprn = None - landlord_property_type = "AssetType" - landlord_built_form = "PropertyType" - landlord_wall_construction = None - landlord_roof_construction = None - landlord_heating_system = None - landlord_existing_pv = None - landlord_property_id = "UPRN" - landlord_sap = None - outcomes_filename = [] - outcomes_sheetname = [] - outcomes_postcode = [] - outcomes_houseno = [] - outcomes_id = [] - outcomes_address = [] - master_filepaths = [] - master_to_asset_list_filepath = None - phase = False - ecosurv_landlords = None + # data_folder = ( + # "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Livewest/Programme Update - March 2025/2018 Asset + # List" + # ) + # data_filename = "LIVEWEST STOCK - 23rd October 2018.xlsx" + # sheet_name = "Assets" + # postcode_column = 'Postcode' + # fulladdress_column = "Address" + # address1_column = None + # address1_method = "house_number_extraction" + # address_cols_to_concat = [] + # missing_postcodes_method = None + # landlord_year_built = "Build Year" + # landlord_os_uprn = None + # landlord_property_type = "Property Archetype" + # landlord_built_form = None + # landlord_wall_construction = None + # landlord_heating_system = "Heating Fuel Type" + # landlord_existing_pv = None + # landlord_property_id = "Uprn - DO NOT DELETE" + # outcomes_filename = [ + # os.path.join(data_folder, "RT - LiveWest.xlsx") + # ] + # outcomes_sheetname = ["Feedback"] + # outcomes_postcode = ["Poscode"] + # outcomes_houseno = ["No."] + # outcomes_id = ["UPRN"] + # outcomes_address = ["Address"] + # master_filepaths = [ + # "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Livewest/Programme Update - March 2025/Rolling + # Master " + # "- redacted for analysis/CAVITY-Table 1.csv" + # ] + # master_id_colnames = [None] + # master_to_asset_list_filepath = None + # landlord_roof_construction = None + # landlord_block_reference = None + # landlord_sap = None + # phase = None + # ecosurv_landlords = "livewest|live west" + + # data_folder = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Livewest/Programme Update - March " + # "2025/Livewest Asset List (Original) - csv") + # data_filename = "Report-Table 1.csv" + # sheet_name = None + # postcode_column = 'Postcode' + # fulladdress_column = "T1_Address" + # address1_column = None + # address1_method = "house_number_extraction" + # address_cols_to_concat = [] + # missing_postcodes_method = None + # landlord_year_built = "Build Yr" + # landlord_os_uprn = None + # landlord_property_type = "T1_AssetType" + # landlord_built_form = "T1_AssetType" + # landlord_wall_construction = "Wall Type Cavity" + # landlord_heating_system = "Heating Fuel" + # landlord_existing_pv = None + # landlord_property_id = "T1_UPRN" + # outcomes_filename = [ + # os.path.join(data_folder, "RT - LiveWest.xlsx") + # ] + # outcomes_address = ["Address"] + # outcomes_sheetname = ["Feedback"] + # outcomes_postcode = ["Poscode"] + # outcomes_houseno = ["No."] + # outcomes_id = ["UPRN"] + # master_filepaths = [ + # "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Livewest/Programme Update - March 2025/Rolling + # Master " + # "- redacted for analysis/CAVITY-Table 1.csv" + # ] + # master_id_colnames = [None] + # master_to_asset_list_filepath = None + # landlord_roof_construction = None + # landlord_block_reference = None + # landlord_sap = None + # phase = None + # ecosurv_landlords = "livewest|live west" + + # Stori + # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Storicymru" + # data_filename = "Asset list - for analysis.xlsx" + # sheet_name = "SAP and Costs Calculations" + # postcode_column = 'Postcode' + # fulladdress_column = "Address1" + # address1_column = None + # address1_method = "house_number_extraction" + # address_cols_to_concat = [] + # missing_postcodes_method = None + # landlord_year_built = "Age" + # landlord_os_uprn = None + # landlord_property_type = "TYPE" + # landlord_built_form = "AGE / DETACHMENT" + # landlord_wall_construction = "WALL" + # landlord_roof_construction = "LOFT INSULATION" + # landlord_heating_system = "BOILER" + # landlord_existing_pv = "SOLAR PV" + # landlord_property_id = "UPRN" + # landlord_sap = "Current SAP Rating" + # landlord_block_reference = None + # outcomes_filename = [] + # outcomes_sheetname = [] + # outcomes_postcode = [] + # outcomes_houseno = [] + # outcomes_id = [] + # outcomes_address = [] + # master_filepaths = [] + # master_to_asset_list_filepath = None + # master_id_colnames = [] + # phase = False + # ecosurv_landlords = None + + # Thrive - reconciliation + # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation" + # data_filename = "Thrive Asset List - Complete - Updated May 2025.xlsx" + # sheet_name = "Sheet1" + # postcode_column = 'postcode' + # fulladdress_column = "full_address" + # address1_column = "address_line_1" + # address1_method = None + # address_cols_to_concat = [] + # missing_postcodes_method = None + # landlord_year_built = "age_band_calculated" + # landlord_os_uprn = None + # landlord_property_type = "property_type" + # landlord_built_form = "build_form" + # landlord_wall_construction = None + # landlord_roof_construction = "assumed_loft_insulation_thickness_updated" + # landlord_heating_system = "heating_type_updated" + # landlord_existing_pv = None + # landlord_property_id = "thrive_property_id" + # landlord_sap = "sap_rating_updated" + # landlord_block_reference = "block_reference" + # outcomes_filename = [ + # os.path.join(data_folder, "Thrive - Outcomes - April 24-March25 - Corrected.xlsx") + # ] + # outcomes_sheetname = ["Sheet1"] + # outcomes_postcode = ["postcode"] + # outcomes_houseno = ["No."] + # outcomes_id = ["thrive_property_id"] + # outcomes_address = ["address"] + # master_filepaths = [ + # os.path.join(data_folder, "Thrive Submissions ECO3 - with IDS.csv"), + # os.path.join(data_folder, "Thrive Submissions ECO4 - with IDS.csv"), + # ] + # master_to_asset_list_filepath = None + # master_id_colnames = ["thrive_property_id", "thrive_property_id"] + # phase = False + # ecosurv_landlords = "thrive" # Southern Midlands # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Southern/Midlands Properties - Apr 2025" @@ -182,40 +309,12 @@ def app(): # master_filepaths = [] # master_to_asset_list_filepath = None - data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/North-West" - data_filename = "Places for People NORTH WEST - INSPECTIONS MASTER - UPDATE.xlsx" - sheet_name = "CHECKED" - postcode_column = 'Postcode' - fulladdress_column = None - address1_column = "AddressLine1" - address1_method = None - address_cols_to_concat = ["AddressLine1", "AddressLine2", "AddressLine3"] - missing_postcodes_method = None - landlord_year_built = None - landlord_os_uprn = None - landlord_property_type = "Archetype (PFP)" - landlord_built_form = "Archetype (PFP)" - landlord_wall_construction = None - landlord_roof_construction = None - landlord_heating_system = None - landlord_existing_pv = None - landlord_property_id = "Uprn" - outcomes_filename = None - outcomes_sheetname = None - outcomes_postcode = None - outcomes_houseno = None - outcomes_id = None - master_filepaths = [] - master_to_asset_list_filepath = None - landlord_sap = None - phase = None - # Maps addresses to uprn in problematic cases manual_uprn_map = {} asset_list = AssetList( local_filepath=os.path.join(data_folder, data_filename), - header=0, + header=asset_list_header, sheet_name=sheet_name, address1_colname=address1_column, postcode_colname=postcode_column, @@ -233,6 +332,7 @@ def app(): landlord_heating_system=landlord_heating_system, landlord_existing_pv=landlord_existing_pv, landlord_sap=landlord_sap, + landlord_block_reference=landlord_block_reference, phase=phase ) asset_list.init_standardise() @@ -294,7 +394,8 @@ def app(): asset_list.flag_survey_master( master_filepaths=master_filepaths, - master_to_asset_list_filepath=master_to_asset_list_filepath + master_to_asset_list_filepath=master_to_asset_list_filepath, + master_id_colnames=master_id_colnames, ) asset_list.flag_ecosurv(ecosurv_landlords) @@ -306,7 +407,7 @@ def app(): epc_api_only = False force_retrieve_data = False skip = None # Used to skip already completed chunks - chunk_size = 5000 + chunk_size = 2000 filename = "Chunk {i}.csv" download_folder = os.path.join(data_folder, "Chunks") if not os.path.exists(download_folder): @@ -486,59 +587,12 @@ def app(): ) asset_list.merge_data(epc_df) - asset_list.extract_attributes() + asset_list.identify_worktypes() - cleaned = read_from_s3( - s3_file_name="cleaned_epc_data/cleaned.bson", - bucket_name="retrofit-data-dev" - ) - cleaned = msgpack.unpackb(cleaned, raw=False) - - asset_list.identify_worktypes(cleaned) - - pprint(asset_list.work_type_figures) - - asset_list.flat_analysis() - - asset_list.load_contact_details( - local_filepath=os.path.join(data_folder, "Full property list wth D&V report V look up 12.2.25.xlsx"), - sheet_name="Report 1", - landlord_property_id=asset_list.landlord_property_id, - phone_number_column='Property Current Tel. Number', - fullname_column='Proeprty Current Occupant', - firstname_column=None, - lastname_column=None, - email_column=None, # TODO - we need this - ) - - # Convert to a format suitable for CRM - # TODO: TEMP - assigned_surveyors = pd.DataFrame( - [ - { - asset_list.landlord_property_id: "02610001", - "week_commencing": "10/10/2025", - "surveyor_name": "Khalim Conn-Kowlessar", - "surveyor_email": "khalim@domna.homes", - } - ] - ) - - # TODO: Sort the output by postcode - - company_domain = "ealing.gov.uk" - crm_pipeline_name = "Survey Management" - first_dealstage = "READY TO BEGIN SCHEDULING" - # TODO - temp, upload to either SharePoint or AWS - - asset_list.prepare_for_crm( - assigned_surveyors=assigned_surveyors, - company_domain=company_domain, - crm_pipeline_name=crm_pipeline_name, - first_dealstage=first_dealstage - ) - hubspot_data = asset_list.hubspot_data + # We now flag the status of the property + asset_list.label_property_status() + asset_list.analyse_geographies() # Store as an excel filename = os.path.join(data_folder, ".".join(data_filename.split(".")[:-1])) + " - Standardised.xlsx" @@ -546,7 +600,8 @@ def app(): with pd.ExcelWriter(filename) as writer: asset_list.standardised_asset_list.to_excel(writer, sheet_name="Standardised Asset List", index=False) - asset_list.flat_data.to_excel(writer, sheet_name="Flat Data", index=False) + if asset_list.block_analysis_df is not None: + asset_list.block_analysis_df.to_excel(writer, sheet_name="Block Analysis", index=False) # If we have outcomes, we add a tab with the outcomes if not asset_list.outcomes_for_output.empty: asset_list.outcomes_for_output.to_excel(writer, sheet_name="Outcomes", index=False) @@ -560,5 +615,5 @@ def app(): if not asset_list.ecosurv_no_match.empty: asset_list.ecosurv_no_match.to_excel(writer, sheet_name="Unmatched Ecosurv", index=False) - # Store the Hubspot export as a csv - hubspot_data.to_csv(os.path.join(data_folder, "Hubspot Export.csv"), index=False) + if not asset_list.geographical_areas.empty: + asset_list.geographical_areas.to_excel(writer, sheet_name="Geographical Areas", index=False) diff --git a/asset_list/hubspot/config.py b/asset_list/hubspot/config.py new file mode 100644 index 00000000..5110fb5f --- /dev/null +++ b/asset_list/hubspot/config.py @@ -0,0 +1,85 @@ +from enum import IntEnum, Enum + +CRM_PIPELINE_NAME = 'Operations - Housing Associations' + + +class HubspotProcessStatus(IntEnum): + def __new__(cls, value, label): + obj = int.__new__(cls, value) + obj._value_ = value + obj.label = label + return obj + + # the numerical values of this enum aren't important, but they define the order of operations + + # This is the first stage, where a survey is ready to go + READY_TO_BE_SCHEDULED = 1, "READY TO BE SCHEDULED" + # The property didn't get access and needs sign off + SURVEYED_NO_ACCESS_NEEDS_SIGN_OFF = 2, "SURVEYED - NO ACCESS - NEED SIGN OFF" + # The survey has been completed. We don't have any update as to whether the property has been installed + SURVEYED_COMPLETED_SIGNED_OFF = 3, "SURVEYED - COMPLETED - SIGNED OFF" + # The property turned out to be ineligibile + NOT_VIABLE = 4, "NOT VIABLE" + # The property is with the installer. This will likely be the default for historic programmes + SUBMITTED_TO_INSTALLER = 5, "SUBMITTED TO INSTALLER" + # The property has been installed + INSTALL_COMPLETE = 6, "INSTALL COMPLETE" + # The install has complete and lodgement is complete + LODGEMENT_COMPLETE = 7, "LODGEMENT COMPLETE" + # The property has been cancelled + INSTALLER_CANCELLED_FINALIZED = 8, "INSTALLER CANCELLED - FINALIZED" + + +class Installer(Enum): + SCIS = "SCIS" + JJ_CRUMP = "J & J CRUMP" + SGEC = "SGEC" + + @classmethod + def is_valid_value(cls, value): + """ + Check if the value is a valid installer. + """ + return value in cls._value2member_map_ + + +CRM_UPLOAD_COLUMNS = [ + 'Name ', 'Associations: Listing', 'Company Domain Name ', + 'Email ', 'First Name ', 'Last Name ', + 'Phone ', 'Secondary Phone ', + 'Secondary Contact Full Name ', + 'Listing Owner Email ', + 'Full Address ', 'Address 1 ', + 'Address 2 ', 'Postcode ', + 'Property Type ', 'Property Sub Type ', + 'Bedroom(s) ', 'Domna Property ID ', + 'National UPRN ', 'Owner Property ID ', + 'Wall Construction ', 'Heating System ', + 'Year Built ', 'Boiler Make ', + 'Boiler Model ', + 'Non-Intrusives: Date Checked ', + 'Non-Intrusives: Wall Type ', + 'Non-intrusives: Insulation ', + 'Non-intrusives: Insulation Material ', + 'Non-Intrusives: CIGA Check Required ', + 'Non-Intrusives: PV Access Issues ', + 'Non-Intrusives: Roof Orientation ', + 'Non-Intrusives: Surveyor Notes ', + 'Non-Intrusives: Surveyor Name ', + 'CIGA: Date Requested ', + 'CIGA: Cavity Guarantee Found ', + 'Last EPC: Is Estimated ', + 'Last EPC: EPC Rating ', + 'Last EPC: SAP Rating ', + 'Last EPC: Main Heating Description ', + 'Last EPC: Heating Controls ', + 'Last EPC: Lodgement Date ', + 'Last EPC: Floor Area ', 'Last EPC: Wall ', + 'Last EPC: Roof ', 'Last EPC: Floor ', + 'Last EPC: Room Height ', + 'Last EPC: Age Band ', 'Deal Stage ', + 'Pipeline ', 'Expected Commencement Date ', + 'Deal Name ', 'Project Code ', + 'Product ID ', 'Name ', 'Unit price ', + 'Quantity ', 'Deal Owner', 'Amount ', 'Installer ' +] diff --git a/asset_list/hubspot/prepare_for_hubspot.py b/asset_list/hubspot/prepare_for_hubspot.py new file mode 100644 index 00000000..eed6d7e7 --- /dev/null +++ b/asset_list/hubspot/prepare_for_hubspot.py @@ -0,0 +1,91 @@ +import os +import pandas as pd +from asset_list.AssetList import AssetList + + +def app(): + """ + TODO: Operations may have removed some cavity_reason/solar_reason values from the standardised asset list after + review. So, we will need to update the hubspot status for these entries and set them to None, if they + were previously being set to ready for scheduling. We don't want to just filter on rows where + cavity_reason and solar_reason are populated, as if we want to include historical surveys, this will remove + them + + + TODO: If we wish to upload deals in batches + + :return: + """ + + # inputs: + reconcile_programme = False # If True, the hubspot upload will include all properties with a project code + customer_domain = "https://sandwell.gov.uk" + installer_name = "J & J CRUMP" + asset_list_filepath = ( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Sandwell/Hubspot/Sandwell BC - Full Asset List MAIN - " + "Standardised.xlsx" + ) + asset_list_sheet_name = "Proposed Program" + asset_list_header = 1 + + contact_details_filepath = ( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Sandwell/Hubspot/Sandwell Contact Details.xlsx" + ) + contacts_sheet_name = "Sheet1" + contacts_landlord_property_id = "landlord_property_id" + contacts_phone_number_column = "phone_number" + contacts_secondary_phone_number_column = "secondary_phone_number" + contacts_secondary_contact_full_name = "secondary_contact_full_name" + contacts_email_column = "email" + contacts_fullname_column = "fullname" + contacts_firstname_column = "firstname" + contacts_lastname_column = "lastname" + + existing_programme_filepath = ( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Sandwell/Hubspot/property-status.csv" + ) + + asset_list = AssetList.load_standardised_asset_list( + asset_list_filepath, asset_list_sheet_name, asset_list_header + ) + asset_list.load_contact_details( + local_filepath=contact_details_filepath, + sheet_name=contacts_sheet_name, + landlord_property_id=contacts_landlord_property_id, + phone_number_column=contacts_phone_number_column, + secondary_phone_number_column=contacts_secondary_phone_number_column, + secondary_contact_full_name=contacts_secondary_contact_full_name, + email_column=contacts_email_column, + fullname_column=contacts_fullname_column, + firstname_column=contacts_firstname_column, + lastname_column=contacts_lastname_column + ) + + asset_list.prepare_for_crm( + company_domain=customer_domain, + installer_name=installer_name, + reconcile_programme=reconcile_programme + ) + + # Remove the existing programme + existing_programme = pd.read_csv(existing_programme_filepath, encoding="utf-8-sig") + asset_list.hubspot_data = asset_list.hubspot_data[ + ~asset_list.hubspot_data["Domna Property ID "].isin( + existing_programme['Domna Property ID'].values + ) + ] + + # Get the filepath and the filename. Append hubspot upload to the filename. We also change the file type to csv + directory, filename = os.path.split(asset_list_filepath) + name, ext = os.path.splitext(filename) + output_filename = f"{name} - Hubspot Upload.csv" + output_filepath = os.path.join(directory, output_filename) + + if pd.isnull(asset_list.hubspot_data['Project Code ']).sum(): + raise ValueError("FIX MEEE") + + if pd.isnull(asset_list.hubspot_data['Deal Stage ']).any(): + raise ValueError("Warning: Some rows have missing project codes. These will not be uploaded to HubSpot.") + + # Just store locally + asset_list.hubspot_data.to_csv(output_filepath, index=False, encoding="utf-8-sig") diff --git a/asset_list/mappings/built_form.py b/asset_list/mappings/built_form.py index 116c3203..45e45c54 100644 --- a/asset_list/mappings/built_form.py +++ b/asset_list/mappings/built_form.py @@ -331,4 +331,33 @@ BUILT_FORM_MAPPINGS = { 'Low Rise': 'low rise', 'Upper Floor': 'top-floor', 'High Rise': 'high rise', + + '2012 ONWARDS DETACHED': 'detached', + '1950-66 END TERRACE': 'end-terrace', + '1976-82 MID TERRACED': 'mid-terrace', + '1950-66 MID TERRACE': 'mid-terrace', + '1991-95 DETACHED': 'detached', + '1976-82 END TERRACED': 'end-terrace', + '1967-75 DETACHED': 'detached', + 'PRE 1900 DETACHED': 'detached', + 'PRE 1900 MID TERRACE': 'mid-terrace', + '1900 DET': 'detached', + '1967-75 MID TERR': 'mid-terrace', + '1930-49 SEMI DET': 'semi-detached', + '1900-29 SEMI DET': 'semi-detached', + '1900-29 MID TERR': 'mid-terrace', + '1983- 90 MID TERR': 'mid-terrace', + '1976-82 MID TERR': 'mid-terrace', + '1983-90 END TERR': 'end-terrace', + '1991-95 SEMI DET': 'semi-detached', + '1983-90 SEMI DET': 'semi-detached', + '1991-95 MID TERR': 'mid-terrace', + '1950-66 SEMI DET': 'semi-detached', + '1900 MID TERR': 'mid-terrace', + '1967-75 SEMI DET': 'semi-detached', + '1983- 90 SEMI DET': 'semi-detached', + '1983-90 MID TERR': 'mid-terrace', + '1976-82 SEMI DET': 'semi-detached', + 'PRE 1900 MID TERR': 'mid-terrace' + } diff --git a/asset_list/mappings/exising_pv.py b/asset_list/mappings/exising_pv.py index 51f5f922..e67fafb4 100644 --- a/asset_list/mappings/exising_pv.py +++ b/asset_list/mappings/exising_pv.py @@ -16,5 +16,6 @@ EXISTING_PV_MAPPINGS = { 'PV: 25% roof area, PV: 3.6kWp array': 'already has PV', 'PV: 10% roof area, PV: 2kWp array': 'already has PV', 'PV: 50% roof area': 'already has PV', - 'Solar PV': 'already has PV' + 'Solar PV': 'already has PV', + 'SOLAR PV': 'already has PV' } diff --git a/asset_list/mappings/heating_systems.py b/asset_list/mappings/heating_systems.py index 92f59f2c..1a46c429 100644 --- a/asset_list/mappings/heating_systems.py +++ b/asset_list/mappings/heating_systems.py @@ -27,7 +27,7 @@ STANDARD_HEATING_SYSTEMS = { "electric ceiling", "electric underfloor", "no heating", - "non-electric underfloor" + "non-electric underfloor", } HEATING_MAPPINGS = { @@ -292,4 +292,39 @@ HEATING_MAPPINGS = { 'Communal Heating': 'communal heating', 'No Data': 'unknown', 'Boiler System': 'gas condensing boiler', + 'Storage heating': 'electric storage heaters', + 'Storage heating (HHRSH)': 'high heat retention storage heaters', + + 'ELECTRIC BOILER': 'electric boiler', + 'STORAGE HEATERS': 'electric storage heaters', + 'GREENSTAR 24I JUNIOR': 'gas combi boiler', + 'generic cond combi post98': 'gas condensing combi', + 'SAP TABLE REG COND +98 NO PICTURE OF BOILER': 'gas condensing boiler', + 'ECO TEC PRO 28 H COMBI A': 'gas combi boiler', + 'GREENSTAR 25I ErP': 'gas combi boiler', + 'IDEAL LOGIC MAX COMBI C30': 'gas combi boiler', + 'ECO TEC PRO 28 (286/5-3)': 'gas combi boiler', + 'IDEAL LOGIC HEAT 30': 'gas boiler, radiators', + 'WORCESTER 240': 'gas boiler, radiators', + 'ECO TEC PRO 24 (246/5-3)': 'gas combi boiler', + 'ECO TEC PRO 28 (OLD)': 'gas combi boiler', + 'LOGIC COMBI2 C30': 'gas combi boiler', + 'GREENSTAR 28I JUNIOR': 'gas combi boiler', + 'WORCESTER 24i': 'gas combi boiler', + 'GREENSTAR 30I ErP': 'gas combi boiler', + '25 CDI': 'gas combi boiler', + 'GREENSTAR 28CDI COMPACT ErP': 'gas combi boiler', + 'GREENSTAR 24 RI': 'gas boiler, radiators', + 'BAXI COMBI 105 HE': 'gas combi boiler', + 'ECO TEC PRO 28 (OLD TYPE)': 'gas combi boiler', + 'WORCESTER 28 SI ll RSF': 'gas combi boiler', + 'GREENSTAR 30SI COMPACT ErP': 'gas combi boiler', + 'SAP TABLE REG COND +98 NO PICTURE OF CYLINDER': 'gas condensing boiler', + 'WORCESTER 24 SI ll RSF': 'gas combi boiler', + 'GREENSTAR 4000': 'gas combi boiler', + 'GREENSTAR 24i JUNIOR': 'gas combi boiler', + 'ECO TEC PRO 24 (OLD TYPE)': 'gas combi boiler', + 'GREENSTAR 30SI COMPACT': 'gas combi boiler', + 'BAXI DUO TEC 28 COMBI ErP': 'gas combi boiler', + 'Not applicable for this asset type': 'unknown' } diff --git a/asset_list/mappings/property_type.py b/asset_list/mappings/property_type.py index b705d6ef..bdb6580e 100644 --- a/asset_list/mappings/property_type.py +++ b/asset_list/mappings/property_type.py @@ -252,5 +252,19 @@ PROPERTY_MAPPING = { 'Bedsit bungalow semi detached': 'bedsit', 'Bedsit Flat': 'bedsit', 'Semi detached house': 'house', - 'Unit': 'unknown' + 'Unit': 'unknown', + 'HOUSE (3 STOREY)': 'house', + 'FLAT GROUND FLOOR': 'flat', + 'FLAT TOP FLOOR': 'flat', + + 'SHARED HOUSE': 'house', + 'MAISONETTE': 'maisonette', + 'DIRECT ACCESS HOSTEL': 'other', + 'Day centre': 'other', + 'Care home': 'other', + 'BLOCK (Communal)': 'block of flats', + 'SHOP': 'other', + 'Office Block': 'other', + 'BLOCK (Non-Communal)': 'block of flats', + 'Refuge': 'other' } diff --git a/asset_list/mappings/roof.py b/asset_list/mappings/roof.py index 03d6f9af..13359ded 100644 --- a/asset_list/mappings/roof.py +++ b/asset_list/mappings/roof.py @@ -6,7 +6,7 @@ STANDARD_ROOF_CONSTRUCTIONS = { "pitched unknown access to loft", "piched unknown insulation", "pitched insulated", - "pitched less than 100mm insulation" + "pitched less than 100mm insulation", "another dwelling above", "flat unknown insulation", "unknown insulated", @@ -38,4 +38,18 @@ ROOF_CONSTRUCTION_MAPPINGS = { '200mm': 'pitched insulated', '0-49mm': 'pitched less than 100mm insulation', '50mm': 'pitched less than 100mm insulation', + '': 'unknown', + 'NR': 'unknown', + 'Non-joist': 'unknown', + '25mm': 'pitched less than 100mm insulation', + '400mm+': 'pitched insulated', + '12mm': 'pitched less than 100mm insulation', + + '150MM': 'pitched insulated', + '200MM': 'pitched insulated', + '250MM': 'pitched insulated', + '100MM': 'pitched less than 100mm insulation', + 'U/K': 'unknown', + 'U/K - 250MM RIR FLAT CEILING': 'flat unknown insulation', + 'U/K - 200MM RIR FLAT CEILING': 'flat unknown insulation' } diff --git a/asset_list/mappings/walls.py b/asset_list/mappings/walls.py index 5e32531f..2e0a332f 100644 --- a/asset_list/mappings/walls.py +++ b/asset_list/mappings/walls.py @@ -224,5 +224,31 @@ WALL_CONSTRUCTION_MAPPINGS = { 'Traditional Cavity Brickwork': 'cavity unknown insulation', 'System build (undefined)': 'system built', 'Non Trad Wimpey': 'system built', - 'Non Trad Wates': 'system built' + 'Non Trad Wates': 'system built', + + 'CAVITY FILLED 270MM': 'filled cavity', + 'CAVITY FILLED 270MM': 'filled cavity', + 'CAVITY FILLED 250MM': 'filled cavity', + 'CAVITY FILLED 260MM': 'filled cavity', + 'CAVITY FILLED 260MM': 'filled cavity', + 'SOLID A/B 220MM': 'solid brick unknown insulation', + 'CAVITY A/B 300MM': "uninsulated cavity", + 'CAVITY A/B 250MM': "uninsulated cavity", + 'CAVITY A/B 260MM': "uninsulated cavity", + 'CAVITY A/B 270MM': "uninsulated cavity", + 'SOLID BRICK/CAVITY EXT': 'solid brick unknown insulation', + 'CAVITY EWI': 'filled cavity', + 'SANDSTONE/CAVITY EXT': 'sandstone or limestone', + 'SYSTEM BUILD 100MM EWI': 'system built', + 'CAVITY A/B 260MM': "uninsulated cavity", + 'CAVITY A/B 270MM': "uninsulated cavity", + 'CAVITY A/B 250MM': "uninsulated cavity", + 'System': 'system built', + 'Sandstone/Limestone': 'sandstone or limestone', + 'No Fines': 'system built', + 'Granite/Whinstone': 'granite or whinstone', + 'Not applicable to this asset type': 'unknown', + 'Steel Frame': 'system built', + 'Solid Wall As Built': 'uninsulated solid brick', + 'Solid As Built': 'uninsulated solid brick' } diff --git a/asset_list/utils.py b/asset_list/utils.py index ff9db3f8..1678b8e9 100644 --- a/asset_list/utils.py +++ b/asset_list/utils.py @@ -79,7 +79,13 @@ def get_data( uprn=uprn ) # Force the skipping of estimating the EPC - searcher.ordnance_survey_client.property_type = None + # We check if the property was split + if home.get("is_expended_block"): + searcher.ordnance_survey_client.property_type = "Flat" + searcher.property_type = "Flat" + searcher.set_strict_property_type_search() + else: + searcher.ordnance_survey_client.property_type = None searcher.ordnance_survey_client.built_form = None searcher.find_property(skip_os=True) diff --git a/backend/Funding.py b/backend/Funding.py index f5f85b9f..49d2d293 100644 --- a/backend/Funding.py +++ b/backend/Funding.py @@ -5,7 +5,7 @@ from typing import List from backend.app.plan.schemas import HousingType -class Funding: +class FundingOld: """ Given a property, this class identifies if the home is possibly eligible for funding under the various funding schemes. It will also calculate the expected amount of funding available @@ -411,3 +411,190 @@ class Funding: self.gbis() # self.eco4() self.whlg() + + +class Funding: + """ + New class to handle funding calculation + """ + + def __init__( + self, + tenure: HousingType, + social_cavity_abs_rate: float, + social_solid_abs_rate: float, + private_cavity_abs_rate: float, + private_solid_abs_rate: float, + project_scores_matrix, + whlg_eligible_postcodes + ): + self.tenure = tenure + self.social_cavity_abs_rate = social_cavity_abs_rate + self.social_solid_abs_rate = social_solid_abs_rate + self.private_cavity_abs_rate = private_cavity_abs_rate + self.private_solid_abs_rate = private_solid_abs_rate + + self.starting_sap_band = None + self.ending_sap_band = None + self.floor_area_band = None + self.project_scores_matrix = project_scores_matrix + self.whlg_eligible_postcodes = whlg_eligible_postcodes + + @staticmethod + def get_sap_band(sap_score_number): + bands = [ + ("High_A", 96, float("inf")), + ("Low_A", 92, 96), + ("High_B", 86, 92), + ("Low_B", 81, 86), + ("High_C", 74.5, 81), + ("Low_C", 69, 74.5), + ("High_D", 61.5, 69), + ("Low_D", 55, 61.5), + ("High_E", 46.5, 55), + ("Low_E", 39, 46.5), + ("High_F", 29.5, 39), + ("Low_F", 21, 29.5), + ("High_G", 10.5, 21), + ("Low_G", 1, 10.5), + ] + + for band, lower, upper in bands: + if lower <= sap_score_number < upper: + return band + + return None + + @staticmethod + def get_floor_area_band(floor_area): + if floor_area <= 72: + return "0-72" + + if floor_area <= 97: + return "73-97" + + if floor_area <= 199: + return "98-199" + + return "200" + + @staticmethod + def eco4_prs_eligibility( + starting_sap: int, measures: List, mainheat_description: str, heating_control_description: str + ): + """ + Handles the eligibility criteria for private rental properties under eco + :return: + """ + + # Help to heat group + # 1) EPC E - G + # 2) Must receive one of SWI, FTCH, renewable heating or DHC + # 3) Tenant must be on benefits + + # We don't consider the tenant being on benefits - we just notify the end user that this is a requirement + + meets_epc = starting_sap <= 54 + has_solid_wall = "internal_wall_insulation" in measures or "external_wall_insulation" in measures + # We check if the property has a heating system that means solar pv counts as a renewable heating system + + has_eligible_electric_heating = any(x in mainheat_description for x in [ + "air source heat pump", "ground source heat pump", "boiler and radiators, electric" + ]) | (("electric storage heaters" in mainheat_description) and + (heating_control_description.lower() == "controls for high heat retention storage heaters") + ) + + # Counts as renewable heating + solar_renweable_heating = has_eligible_electric_heating & ("solar_pv" in measures) + # Is a renewable heating + ashp = "air_source_heat_pump" in measures + + if meets_epc & (solar_renweable_heating or ashp or has_solid_wall): + return True + + return False + + def calculate_full_project_abs(self): + + # Filter the project scores matrix + data = self.project_scores_matrix[ + (self.project_scores_matrix["Floor Area Segment"] == self.floor_area_band) & + (self.project_scores_matrix["Starting Band"] == self.starting_sap_band) & + (self.project_scores_matrix["Finishing Band"] == self.ending_sap_band) + ] + + if data.emtpy: + raise ValueError("Missing abs rate, check the project scores matrix") + + return data["Cost Savings"].values[0] + + def check_funding( + self, measures: List, + starting_sap: int, + ending_sap: int, + floor_area: float, + mainheat_description: str, + heating_control_description: str, + is_cavity: bool + ): + """ + Given a list of measures, this function will check if the package of measures is fundable + :param measures: + :param starting_sap: + :param ending_sap: + :param floor_area: + :param mainheat_description: + :param heating_control_description: + :param is_cavity: Indicates if the property has cavity wall insulation + :return: + """ + + # If it's an E or D, should get to an EPC C + if starting_sap >= 55 and ending_sap < 69: + raise NotImplementedError("This property doesn't have sufficient SAP movement") + + if starting_sap <= 38 & ending_sap <= 55: + # F or G should get to D + raise NotImplementedError("Implement F or G to D eligibility") + + self.starting_sap_band = self.get_sap_band(starting_sap) + self.ending_sap_band = self.get_sap_band(ending_sap) + self.floor_area_band = self.get_floor_area_band(floor_area) + + ######################## + # Private + ######################## + # 1) ECO4 + # 2) GBIS + + if self.tenure == "Private": + is_eco4_eligible = self.eco4_prs_eligibility( + starting_sap=starting_sap, + measures=measures, + mainheat_description=mainheat_description, + heating_control_description=heating_control_description + ) + + # Need to implement + # 1) Package has to include an insulation measure + # 2) We should use the funding for the measure that has the largest partial project score + is_gbis_eligible = () + + if not is_eco4_eligible: + return + eco4_abs = self.calculate_full_project_abs() + # We estimate rates now + eco4_funding = ( + eco4_abs * self.private_cavity_abs_rate if is_cavity else eco4_abs & self.private_solid_abs_rate + ) + + ######################## + # Social + ######################## + # 1) ECO4 + # 2) GBIS + + if self.tenure == "Social": + pass + + raise NotImplementedError("Only implemented for Private or Social housing") diff --git a/backend/SearchEpc.py b/backend/SearchEpc.py index 0010191a..16dd8f04 100644 --- a/backend/SearchEpc.py +++ b/backend/SearchEpc.py @@ -160,6 +160,9 @@ class SearchEpc: """ Address lines 1 and postcode are mandatory fields. The other address lines are optional but can be used to find the epc for the home, if address1 and postcode are insufficient + + If you wish to run a strict property type search, please run set_strict_property_type_search() + :param address1: string, propery's address line 1 :param postcode: string, propery's postcode :param full_address: string, optional parameter, the full address of the property @@ -189,6 +192,7 @@ class SearchEpc: self.older_epcs = None self.full_sap_epc = None self.metadata = None + self.strict_property_type_search = False # These are the address and postcode values, which we store in the database self.address_clean = None @@ -199,6 +203,14 @@ class SearchEpc: self.property_type = property_type self.fast = fast + def set_strict_property_type_search(self): + """ + This method sets the strict property type search flag to True. When this flag is set, the search will + only return results that match the specified property type. + :return: + """ + self.strict_property_type_search = True + @staticmethod def get_house_number(address: str, postcode=None) -> str | None: """ @@ -315,6 +327,8 @@ class SearchEpc: address_params["address"] = self.address1 if self.postcode: address_params["postcode"] = self.postcode + if self.strict_property_type_search and self.property_type: + address_params["property-type"] = self.property_type.lower() # We attempt the search with uprn params @@ -365,11 +379,16 @@ class SearchEpc: unique_property_types = {r["property-type"] for r in rows} + is_just_a_house = (len(unique_property_types) == 1) & ( + ("House" in unique_property_types) | ("Bungalow" in unique_property_types) + ) + # We allow for variation in property type across flats/maisonettes # If we know that we have a flat/maisonette, we allow for both property types - if property_type in ["Flat", "Maisonette"]: - if ((len(uprns) == 1) and ((len(unique_property_types) == 1) - ) or unique_property_types == {"Flat", "Maisonette"}): + # Make sure we have not JUST a house, or not JUST a flat/maisonette + if property_type in ["Flat", "Maisonette"] and not is_just_a_house: + if (((len(uprns) == 1) and ((len(unique_property_types) == 1) + ) or unique_property_types == {"Flat", "Maisonette"})): return rows if property_type is not None: @@ -424,6 +443,8 @@ class SearchEpc: return rows + raise ValueError("property type and address cannot both be None, at least one must be provided") + @staticmethod def format_address(newest_epc): """ @@ -702,6 +723,18 @@ class SearchEpc: exclude_old=exclude_old ) + # Check if it's a new build EPC. A property that doesn't have an EPC is not going to be a new build + # so we avoid comparing it to new builds + # TODO - this is experimental + newer_age_bands = [ + "England and Wales: 1996-2002", "England and Wales: 2003-2006", "England and Wales: 2007-2011", + "England and Wales: 2012 onwards" + ] + + if (~epc_data["construction-age-band"].isin(newer_age_bands)).sum(): + # We have some older age bands, so we need to filter them out + epc_data = epc_data[~epc_data["construction-age-band"].isin(newer_age_bands)].copy() + # If we have missing lodgment date, we fill it with inspection-date epc_data["lodgement-datetime"] = epc_data["lodgement-datetime"].fillna(epc_data["inspection-date"]) # If we still have missing dates, we set it to the mean of the non NA dates diff --git a/backend/engine/engine.py b/backend/engine/engine.py index 58c3dc8e..5316fd03 100644 --- a/backend/engine/engine.py +++ b/backend/engine/engine.py @@ -507,7 +507,7 @@ async def model_engine(body: PlanTriggerRequest): ) # if we have a remote assment data type, we pull the additional data and include it - if body.event_type == "remote_assessment": + if (body.event_type == "remote_assessment") and not (epc_searcher.newest_epc["estimated"]): logger.info("Retrieving find my epc data") try: property_non_invasive_recommendations, patch = RetrieveFindMyEpc.get_from_epc( diff --git a/backend/tests/test_funding.py b/backend/tests/test_funding.py new file mode 100644 index 00000000..311ab589 --- /dev/null +++ b/backend/tests/test_funding.py @@ -0,0 +1,52 @@ +import pytest +import pandas as pd +from utils.s3 import read_csv_from_s3 +from backend.Funding import Funding + + +def get_funding_data(): + """ + This function retrieves the eco project scores matrix and the warm homes local grant funding data + :return: + """ + project_scores_matrix = read_csv_from_s3( + bucket_name="retrofit-data-dev", + filepath="funding/ECO4 Full Project Scores Matrix.csv", + ) + project_scores_matrix = pd.DataFrame(project_scores_matrix) + project_scores_matrix.columns = ['Floor Area Segment', 'Starting Band', 'Finishing Band', 'Cost Savings'] + project_scores_matrix["Cost Savings"] = project_scores_matrix["Cost Savings"].astype(float) + + whlg_eligible_postcodes = read_csv_from_s3( + bucket_name="retrofit-data-dev", + filepath="funding/whlg eligible postcodes.csv", + ) + whlg_eligible_postcodes = pd.DataFrame(whlg_eligible_postcodes) + + return project_scores_matrix, whlg_eligible_postcodes + + +class TestFunding: + + def test_prs(self): + eco_project_scores_matrix, whlg_eligible_postcodes = get_funding_data() + funding = Funding( + project_scores_matrix=eco_project_scores_matrix, + whlg_eligible_postcodes=whlg_eligible_postcodes, + social_cavity_abs_rate=13.5, + social_solid_abs_rate=17, + private_cavity_abs_rate=13.5, + private_solid_abs_rate=17, + tenure="Private", + ) + + measures_1 = ["internal_wall_insulation", "solar_pv"] + funding.check_funding( + measures=measures_1, + starting_sap=54, + ending_sap=69, + floor_area=73, + mainheat_description="Boiler and radiators, mains gas", + heating_control_description="Programmer, room thermostat and TRVs", + is_cavity=True + ) diff --git a/etl/customers/Futures Housing/validation_surveys.py b/etl/customers/Futures Housing/validation_surveys.py new file mode 100644 index 00000000..1f8e6cfa --- /dev/null +++ b/etl/customers/Futures Housing/validation_surveys.py @@ -0,0 +1,167 @@ +import pandas as pd + + +def get_band(sap_score_number): + bands = [ + ("High_A", 96, float("inf")), + ("Low_A", 92, 96), + ("High_B", 86, 92), + ("Low_B", 81, 86), + ("High_C", 74.5, 81), + ("Low_C", 69, 74.5), + ("High_D", 61.5, 69), + ("Low_D", 55, 61.5), + ("High_E", 46.5, 55), + ("Low_E", 39, 46.5), + ("High_F", 29.5, 39), + ("Low_F", 21, 29.5), + ("High_G", 10.5, 21), + ("Low_G", 1, 10.5), + ] + + for band, lower, upper in bands: + if lower <= sap_score_number < upper: + return band + + return None + + +def classify_floor_area(floor_area): + if floor_area <= 72: + return "0-72" + + if floor_area <= 97: + return "73-97" + + if floor_area <= 199: + return "98-199" + + return "200+" + + +asset_list = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Wates - Futures Housing/ECO 4 Wates - Standardised.xlsx", + sheet_name="Standardised Asset List" +) + +asset_list["starting_sap_band"] = asset_list["epc_sap_score_on_register"].apply(get_band) +asset_list["floor_area_band"] = asset_list["epc_total_floor_area"].apply(classify_floor_area) + +# Objective: +# We need to get a reasonable estimate for the cost of works for properties that are EPC D or below +# +# Therefore: +# 1) We know that some properties that are currently EPC C may* qualify for ECO4 funding. Right now, we aren't trying +# to determine which EPC C properties or above will qualify, just how much works will cost for properties that do +# qualify +# 2) We cannot survey everything, so before we undetake too much risk we should produce some costings for each of the +# archetypes +# +# Driving Factors: +# 1) Floor area band & starting SAP band - this will determine how much funding is produced +# 2) Heating system - this will determine if the property needs a heating upgrade or not + + +archetypes = asset_list[asset_list["epc_sap_score_on_register"] <= 68].groupby( + ["floor_area_band", "starting_sap_band", "landlord_heating_system"] +)["landlord_property_id"].nunique().reset_index() +archetypes = archetypes.rename(columns={"landlord_property_id": "n_properties"}) +archetypes = archetypes.sort_values("n_properties", ascending=False) +archetypes["running_total"] = archetypes["n_properties"].cumsum() +archetypes["cumulative_percentage"] = archetypes["running_total"] / archetypes["n_properties"].sum() * 100 + +archetypes["is_electric"] = archetypes["landlord_heating_system"] != "boiler - other fuel" +archetypes["needs_heating_upgrade"] = archetypes["landlord_heating_system"].isin( + ["boiler - other fuel", "electric storage heaters"] +) +archetypes = archetypes.reset_index(drop=True) + +# Right now, they don't want to treat the oil properties so we'll exclude them for the moment +electric_heated_archetypes = ( + archetypes[archetypes["landlord_heating_system"] != "boiler - other fuel"].copy().reset_index(drop=True) +) +electric_heated_archetypes["running_total"] = electric_heated_archetypes["n_properties"].cumsum() +electric_heated_archetypes["cumulative_percentage"] = ( + electric_heated_archetypes["running_total"] / electric_heated_archetypes["n_properties"].sum() * 100 +) + +# The main properties that need validation surveys are properties that require a heating upgrade +electric_heated_archetypes = electric_heated_archetypes[electric_heated_archetypes["needs_heating_upgrade"]] +electric_heated_archetypes = electric_heated_archetypes.merge( + archetypes[["starting_sap_band", "floor_area_band", "landlord_heating_system", "archetype_id"]], + how="left", on=["starting_sap_band", "floor_area_band", "landlord_heating_system"] +) + +oil_archetypes = archetypes[ + archetypes["landlord_heating_system"] == "boiler - other fuel" + ].copy().reset_index(drop=True) + +archetypes["archetype_id"] = archetypes.index + +asset_list = asset_list.merge( + archetypes[["starting_sap_band", "floor_area_band", "landlord_heating_system", "archetype_id"]], + how="left", on=["starting_sap_band", "floor_area_band", "landlord_heating_system"] +) + +properties_for_verification = asset_list[ + asset_list["archetype_id"].isin(electric_heated_archetypes["archetype_id"].values) +].copy() +properties_for_verification["postal_region"] = properties_for_verification["domna_postcode"].str.split(" ").str[ + 0].str.strip() + +properties_for_verification["epc_age"] = ( + pd.Timestamp.now() - pd.to_datetime(properties_for_verification["epc_inspection_date"]) +).dt.days + +# We also survey 2 oil heater properties, so we take the 2 most prevelant archetypes +archetypes_for_survey = pd.concat( + [electric_heated_archetypes, oil_archetypes.head(2)] +) + +# Take the property with the oldest EPC, by region. Prioritise estimated properties +sample = [] +for _, config in archetypes_for_survey.iterrows(): + properties = asset_list[ + (asset_list["archetype_id"] == config["archetype_id"]) & + (asset_list["floor_area_band"] == config["floor_area_band"]) & + (asset_list["starting_sap_band"] == config["starting_sap_band"]) + ] + + if pd.isnull(properties["epc_inspection_date"]).sum(): + sample_property = properties[pd.isnull(properties["epc_inspection_date"])].head(1).to_dict("records") + else: + # Take the property with the oldest EPC + sample_property = properties.sort_values("epc_inspection_date", ascending=True).head(1).to_dict("records") + + sample.extend(sample_property) + +sample = pd.DataFrame(sample) + +sample = sample[ + [ + "landlord_property_id", "epc_inspection_date", "epc_sap_score_on_register", "starting_sap_band", + "floor_area_band", "landlord_heating_system", "domna_postcode", "domna_full_address", "archetype_id" + ] +] + +archetypes = asset_list[["landlord_property_id", "archetype_id"]].copy() +archetypes["archetype_id"] = archetypes["archetype_id"].astype(str) + +filename = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Wates - Futures Housing/archetypes.xlsx" +# Store the data in two tabs. One for the asset list with the EPC data and the second with the flat data + +with pd.ExcelWriter(filename) as writer: + archetypes.to_excel(writer, sheet_name="Archetypes", index=False) + sample.to_excel(writer, sheet_name="Survey Sample", index=False) + +# We store this + +# Questions: +# 1) If futures are considering changing properties that have oil heating systems, we could include them and +# we have 39 total archetypes. Otherwise, we have 25 archetypes +# 2) Can futures provide us with any information on the model of air source heat pumps and associated controls they're +# using + +# Recommendations: +# 1) If they are willing to upgrade the heating systems of the oil properties, surveying 18 properties will cover +# diff --git a/etl/customers/cambridge/surveys.py b/etl/customers/cambridge/surveys.py new file mode 100644 index 00000000..2aa52d6f --- /dev/null +++ b/etl/customers/cambridge/surveys.py @@ -0,0 +1,24 @@ +import pandas as pd +from backend.ml_models.Valuation import PropertyValuation +from backend.app.utils import sap_to_epc + +# Read in the survey data +surveys = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Cambridge/Survey Data.xlsx", + sheet_name="Survey data", +) + +increases = [] +for _, x in surveys.iterrows(): + current_epc = sap_to_epc(x["Pre SAP"]) + target_epc = sap_to_epc(x["Scenario 1 Post SAP"]) + current_value = x["Valuation"] + + val = PropertyValuation.estimate_valuation_improvement( + current_value, + current_epc, + target_epc, + total_cost=None + ) + avg_increase = val["average_increase"] + increases.append(round(avg_increase)) diff --git a/etl/customers/l_and_g/risk_matrix.py b/etl/customers/l_and_g/risk_matrix.py index c800117e..8f5451fc 100644 --- a/etl/customers/l_and_g/risk_matrix.py +++ b/etl/customers/l_and_g/risk_matrix.py @@ -81,6 +81,7 @@ def app(): # We need to calculate the costs cost_data = [] for _, row in epr_data.iterrows(): + epc = row["EPC"][0] sap = int(row["EPC"][1:]) diff --git a/etl/customers/places_for_people/abs.py b/etl/customers/places_for_people/abs.py new file mode 100644 index 00000000..aa85a93f --- /dev/null +++ b/etl/customers/places_for_people/abs.py @@ -0,0 +1,199 @@ +""" +This script is to calculate the ABS for the Places for People London project +""" + +import os +import pandas as pd + +# London +pfp_london_cav = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/abs " + "rates/PFP_areas_surrounding_london_reviewed_standardised_15052025.xlsx", + sheet_name="Cav Route", + header=1 +) +pfp_london_cav = pfp_london_cav.rename(columns={"Route": "Route March"}) +pfp_london_pv = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/abs " + "rates/PFP_areas_surrounding_london_reviewed_standardised_15052025.xlsx", + sheet_name="PV Route", + header=1 +) +pfp_london_pv = pfp_london_pv.rename(columns={"Route": "Route March"}) +pfp_london_cav["location"] = "London" +pfp_london_pv["location"] = "London" +# East +pfp_east_cav = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/abs " + "rates/PFP_east_reviewed_standarised_15052025.xlsx", + sheet_name="Cav Route", + header=1 +) +pfp_east_cav = pfp_east_cav.rename(columns={"Route": "Route March"}) +pfp_east_pv = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/abs " + "rates/PFP_east_reviewed_standarised_15052025.xlsx", + sheet_name="PV Route", + header=1 +) +pfp_east_pv = pfp_east_pv.rename(columns={"Route": "Route March"}) +pfp_east_cav["location"] = "East" +pfp_east_pv["location"] = "East" +# North east +pfp_north_east_cav = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/abs " + "rates/PFP_north_east_reviewed_standardised_15052025.xlsx", + sheet_name="Cav Route", + header=1 +) +pfp_north_east_cav = pfp_north_east_cav.rename(columns={"Route": "Route March"}) +pfp_north_east_pv = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/abs " + "rates/PFP_north_east_reviewed_standardised_15052025.xlsx", + sheet_name="PV Route", + header=1 +) +pfp_north_east_pv = pfp_north_east_pv.rename(columns={"Route": "Route March"}) +pfp_north_east_cav["location"] = "North East" +pfp_north_east_pv["location"] = "North East" +# North West +pfp_north_west_cav = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/abs " + "rates/PFP_north_west_reviewed_standardised_15052025.xlsx", + sheet_name="Cav Route", + header=1 +) +pfp_north_west_cav = pfp_north_west_cav.rename(columns={"Route": "Route March"}) +pfp_north_west_pv = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/abs " + "rates/PFP_north_west_reviewed_standardised_15052025.xlsx", + sheet_name="PV Route", + header=1 +) +pfp_north_west_pv = pfp_north_west_pv.rename(columns={"Route": "Route March"}) +pfp_north_west_cav["location"] = "North West" +pfp_north_west_pv["location"] = "North West" + +cav_route = pd.concat( + [ + pfp_london_cav, + pfp_east_cav, + pfp_north_east_cav, + pfp_north_west_cav + ] +) +solar_route = pd.concat( + [ + pfp_london_pv, + pfp_east_pv, + pfp_north_east_pv, + pfp_north_west_pv + ] +) + + +def get_band(sap_score_number): + bands = [ + ("High_A", 96, float("inf")), + ("Low_A", 92, 96), + ("High_B", 86, 92), + ("Low_B", 81, 86), + ("High_C", 74.5, 81), + ("Low_C", 69, 74.5), + ("High_D", 61.5, 69), + ("Low_D", 55, 61.5), + ("High_E", 46.5, 55), + ("Low_E", 39, 46.5), + ("High_F", 29.5, 39), + ("Low_F", 21, 29.5), + ("High_G", 10.5, 21), + ("Low_G", 1, 10.5), + ] + + for band, lower, upper in bands: + if lower <= sap_score_number < upper: + return band + + return None + + +def classify_floor_area(floor_area): + if floor_area <= 72: + return "0-72" + + if floor_area <= 97: + return "73-97" + + if floor_area <= 199: + return "98-199" + + return "200+" + + +# We classify the abs bounds +solar_route["starting_abs_band"] = solar_route["epc_sap_score_on_register"].apply(get_band) +solar_route["ending_abs_band_scenario1"] = "High_C" +solar_route["ending_abs_band_scenario2"] = "Low_B" +solar_route["epc_total_floor_area"] = solar_route["epc_total_floor_area"].fillna(90) +solar_route["floor_area_band"] = solar_route["epc_total_floor_area"].apply(classify_floor_area) + +# We classify the abs bounds +cav_route["epc_sap_score_on_register"] = cav_route["epc_sap_score_on_register"].fillna(68) +cav_route["starting_abs_band"] = cav_route["epc_sap_score_on_register"].apply(get_band) +cav_route["floor_area_band"] = cav_route["epc_total_floor_area"].apply(classify_floor_area) +cav_route["ending_abs_band"] = "Low_C" + +abs_matrix = pd.read_csv( + "/Users/khalimconn-kowlessar/Downloads/ECO4 Full Project Scores Matrix.csv" +) + +cav_route = cav_route.merge( + abs_matrix.rename(columns={"Cost Savings": "ABS Rate"}), + how="left", + left_on=["starting_abs_band", "ending_abs_band", "floor_area_band"], + right_on=["Starting Band", "Finishing Band", "Floor Area Segment"], +) +solar_route = solar_route.merge( + abs_matrix.rename(columns={"Cost Savings": "ABS Rate"}), + how="left", + left_on=["starting_abs_band", "ending_abs_band_scenario1", "floor_area_band"], + right_on=["Starting Band", "Finishing Band", "Floor Area Segment"], +) +cav_route["ABS Rate"] = cav_route["ABS Rate"].fillna(0) +solar_route["ABS Rate"] = solar_route["ABS Rate"].fillna(0) + +cav_abs_agg = ( + cav_route.groupby("Route March").agg( + { + "ABS Rate": "sum", + "landlord_property_id": "count", + } + ).reset_index() +) +cav_abs_agg["Week Number"] = cav_abs_agg["Route March"].str.extract(r"(\d+)").astype(int) +cav_abs_agg = cav_abs_agg.sort_values("Week Number", ascending=True) +cav_abs_agg = cav_abs_agg.rename(columns={"landlord_property_id": "Number of Properties"}) + +solar_abs_agg = ( + solar_route.groupby("Route March").agg( + { + "ABS Rate": "sum", + "landlord_property_id": "count", + } + ).reset_index() +) +solar_abs_agg["Week Number"] = solar_abs_agg["Route March"].str.extract(r"(\d+)").astype(int) +solar_abs_agg = solar_abs_agg.rename(columns={"landlord_property_id": "Number of Properties"}) +solar_abs_agg = solar_abs_agg.sort_values("Week Number", ascending=True) + +# We store the data +# Store as an excel +filename = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/abs rates/pfp programme rates.xlsx" +# Store the data in two tabs. One for the asset list with the EPC data and the second with the flat data + +with pd.ExcelWriter(filename) as writer: + solar_abs_agg.to_excel(writer, sheet_name="Solar ABS", index=False) + cav_abs_agg.to_excel(writer, sheet_name="Cav ABS", index=False) + + cav_route.to_excel(writer, sheet_name="Cavity data", index=False) + solar_route.to_excel(writer, sheet_name="Solar data", index=False) diff --git a/etl/customers/thrive/Programme Analysis.py b/etl/customers/thrive/Programme Analysis.py index 521cfd30..2d6a0d69 100644 --- a/etl/customers/thrive/Programme Analysis.py +++ b/etl/customers/thrive/Programme Analysis.py @@ -8,6 +8,8 @@ address the following concerns: """ import pandas as pd +from tqdm import tqdm +from backend.SearchEpc import SearchEpc # This is Thrive's list of properties and when they should have been surveyed thrive_tracker = pd.read_excel( @@ -51,27 +53,10 @@ original_columns = { } original_asset_list = original_asset_list[original_columns.keys()].rename(columns=original_columns) -original_asset_list["Data Source"] = "Thrive Tracker" +original_asset_list["Data Source"] = "Original Asset List" +original_asset_list = original_asset_list.drop_duplicates() # We append on the missed properties, with the information we have -# 'Unnamed: 0', 'Thrive Notes', 'Priority', 'UPRN', 'Short Address', '#', -# 'Adress Line 1', 'Postcode', 'Property Type', 'Build Form', -# 'Build year', 'Assumed mm ', 'SAP', 'Name', 'Primary Number', -# 'Secondary Number', 'Email', 'Thrive use: Tenancy Number', -# 'Special Requirements ', 'CIGA', 'Date CIGA check received', -# 'Proposed Progamme', 'New Proposed Programme', -# 'Missing from Route March?', 'Date Letters Sent (w.c)', 'Work Type', -# 'Warmfront Survey Notes', 'Notes Reply (Thrive)', 'Loft Insulation', -# 'Cost for Vents', 'Cavity Depth', 'Cavity Condition', -# 'Date Submitted to installer', 'PRRN Number', -# 'Loft insulation required? (Thrive)', 'Date booked ', -# 'Completed\n(yes/no)', 'Date Completed', -# 'Vents installed?\n(number and location)', -# 'Loft Top Up\n(amount of insulation) ', 'CIGA Warranty Provided ', -# 'Notes', 'Works Number', 'CW KGI Uploaded', 'Keystone Fan Added', -# 'SA Cavity Condition Updated', 'SA Loft & Energy Updated', -# 'PRRN Submitted ' - missed_properties["Full Address"] = ( missed_properties["#"].astype(str) + ", " + missed_properties["Adress Line 1"].astype(str) + ", " + @@ -94,6 +79,19 @@ missed_properties["WFT Findings"] = "Property Not Inspected" missed_properties["ECO Eligibility"] = "Property Not Inspected" missed_properties["Data Source"] = "Thrive Tracker" +# We de-dupe ides in original_asset_list +dupe_ids = original_asset_list[original_asset_list["thrive_property_id"].duplicated()]["thrive_property_id"].unique() +dupes = original_asset_list[ + original_asset_list["thrive_property_id"].isin(dupe_ids) +].copy() +dupes = dupes.sort_values("thrive_property_id") + +original_asset_list = original_asset_list.rename( + columns={ + "detailed_property_type": "build_form" + } +) + master_list = pd.concat([missed_properties, original_asset_list], ignore_index=True) # We were provided with a data update for a sample of properties. We update the data with this information @@ -103,12 +101,339 @@ data_update = pd.read_excel( header=0 ) -new_properties = data_update[~data_update["UPRN"].isin(master_list["thrive_property_id"].astype(str).values)] +new_properties = data_update[~data_update["UPRN"].isin(master_list["thrive_property_id"].astype(str).values)].copy() +new_properties["Full Address"] = ( + new_properties["#"].astype(str) + ", " + + new_properties["Adress Line 1"].astype(str) + ", " + + new_properties["Postcode"].astype(str) +) +new_properties = new_properties[missed_columns.keys()].rename(columns=missed_columns) +new_properties["WFT Findings"] = "Property Not Inspected" +new_properties["ECO Eligibility"] = "Property Not Inspected" +new_properties["Data Source"] = "13.05.2025 Data Update" + +master_list = pd.concat([new_properties, master_list]) + +# We append any new data on heating system, heating type, and insulation type, based on the data update +master_list = master_list.merge( + data_update[["UPRN", "Heating Type", "Assumed mm ", "SAP"]].rename( + columns={ + "Heating Type": "heating_type_updated", + "Assumed mm ": "assumed_loft_insulation_thickness_updated", + "SAP": "sap_rating_updated" + } + ), + how="left", + left_on="thrive_property_id", + right_on="UPRN" +) + +# We fill the missings +master_list["heating_type_updated"] = master_list["heating_type_updated"].fillna(master_list["heating_type"]) +master_list["assumed_loft_insulation_thickness_updated"] = master_list[ + "assumed_loft_insulation_thickness_updated" +].fillna(master_list["assumed_loft_insulation_thickness"]) +master_list["sap_rating_updated"] = master_list["sap_rating_updated"].fillna(master_list["sap_rating"]) + +assert not master_list["thrive_property_id"].duplicated().sum(), "Duplicate thrive_property_id found in master_list" + +master_list["Address in tracker"] = master_list["thrive_property_id"].astype(str).isin( + thrive_tracker["UPRN"].astype(str).values +) + +# Those the asset list - call it master asset list updated May2025 +master_list = master_list.drop(columns=["UPRN"]) +master_list["thrive_property_id"] = master_list["thrive_property_id"].astype(str) +# master_list.to_excel( +# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Asset List - " +# "Complete - Updated May 2025.xlsx", +# ) + +master_list["house_number_TEMP"] = master_list.apply( + lambda x: SearchEpc.get_house_number(address=x["full_address"], postcode=x["postcode"]), + axis=1 +) + +# We add in the status of the property +# TODO: Add the status of the property from the Thrive tracker +outcomes = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive - Outcomes - April " + "24-March25.xlsx", + header=0 +) +outcomes["row_id"] = outcomes.index + +# We have two ids which have the same phohe. nymber, but different UPRN, so we don't match to the tracker for these +tracker_for_matching = thrive_tracker[ + ~thrive_tracker["UPRN"].isin(['OAKGRE0065ABBLDW1', 'OAKGRE0066ABBLDW1', 'JACKET0102ABBLDW1', 'BELLCL0008BEDMDW1']) +].copy() +tracker_for_matching["Full Address"] = ( + tracker_for_matching["#"].astype(str) + ", " + + tracker_for_matching["Adress Line 1"].astype(str) + ", " + + tracker_for_matching["Postcode"].astype(str) +) + +outcomes_id_lookup = [] +for _, x in tqdm(outcomes.iterrows(), total=len(outcomes)): + + hn = str(x["No."]) + address = x["Address"] + postcode = x["Postcode"] + contact_no = str(x["Contact No"]) if not pd.isnull(x["Contact No"]) else str(x["Contact No.1"]) + contact_no = None if contact_no == "nan" else contact_no + + if address == "292 Micklefield Road": + hn = "292" + + if (address == "Micklefield Road") & (hn == "302"): + hn = "292" + + if (address == "103a Norfolk Road Rickmansworth Hertfordshire WD3 1JY"): + hn = "103a" + + if (address == "105a Norfolk Road Rickmansworth Hertfordshire WD3 1JY"): + hn = "105a" + + if (address == "107a Norfolk Road Rickmansworth Hertfordshire WD3 1JY"): + hn = "107a" + + # + # # We match this to the tracker + # m1 = tracker_for_matching[tracker_for_matching["Primary Number"].astype(str) == contact_no] + # # Many of the phone numbers don't have a leading zero in the tracker so we add them + # if (m1.shape[0] != 1) and not pd.isnull(contact_no): + # m1 = tracker_for_matching[tracker_for_matching["Primary Number"].astype(str) == contact_no.lstrip("0")] + # + # if m1.shape[0] > 1: + # raise ValueError( + # f"Error for {hn} - {address} - {postcode} - {contact_no} in the tracker" + # ) + + # if m1.empty: + m1 = tracker_for_matching[ + (tracker_for_matching["#"].astype(str) == hn) & + (tracker_for_matching["Postcode"] == postcode) + ] + + if m1.empty: + # Some properties aren't in the tracker, we match to the master list + m1 = master_list[ + (master_list["house_number_TEMP"].astype(str) == hn) & + (master_list["postcode"] == postcode) + ] + outcomes_id_lookup.append( + { + "row_id": x["row_id"], + "thrive_property_id": m1["thrive_property_id"].values[0], + "address": m1["full_address"].values[0], + "postcode": m1["postcode"].values[0], + } + ) + continue + + if m1.shape[0] != 1: + raise ValueError( + f"Error for {hn} - {address} - {postcode} - {contact_no} in the tracker" + ) + + # We add the status to the master list + outcomes_id_lookup.append( + { + "row_id": x["row_id"], + "thrive_property_id": m1["UPRN"].values[0], + "address": m1["Full Address"].values[0], + "postcode": m1["Postcode"].values[0], + } + ) + +outcomes_id_lookup = pd.DataFrame(outcomes_id_lookup) +outcomes = outcomes.merge( + outcomes_id_lookup, + how="left", + left_on="row_id", + right_on="row_id" +) + +outcomes = outcomes.drop(columns=["row_id"]) +outcomes = outcomes.rename( + columns={ + "Outcomes": "Outcome", + "Notes (If 'no " + "answer' under outcomes, have you checked around the property for access issues where possible?)": "Notes", + } +) +# Store the corrected outcomes +# outcomes.to_excel( +# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive - Outcomes - +# April 24-March25 - Corrected.xlsx", +# index=False +# ) -data_update = = data_update[["UPRN", ""]] -# TODO: Flag the Thrive priorities and create a separate project code for these -# TODO: Add the general project code -# TODO: Add the thrive \ No newline at end of file +def parse_date(value): + # Strip any 'W.C' or 'w/c' prefix and clean whitespace + value = value.strip().lower().replace('w.c', '').replace('w/c', '').strip() + try: + # Try parsing the date with dayfirst=True + return pd.to_datetime(value, dayfirst=True, errors='coerce') + except Exception: + return pd.NaT + + +outcomes['Parsed Date'] = outcomes['Date letters sent'].apply(parse_date) + +# Next step - match the submissions master to the asset list. We will append on the UPRN +eco3_submissions = pd.read_csv( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Submissions " + "ECO3.csv", + header=0 +) +eco3_submissions["row_id"] = eco3_submissions.index + +eco4_submissions = pd.read_csv( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Submissions " + "ECO4.csv", + header=0 +) +eco4_submissions["row_id"] = eco4_submissions.index + +# List of properties never on the asset list +not_on_master = [ + "7+FOXGROVE PATH+WD19 6YL", "9+FOXGROVE PATH+WD19 6YL", "11+FOXGROVE PATH+WD19 6YL", + "20+LINCOLN DRIVE+WD19 7BA", "22+LINCOLN DRIVE+WD19 7BA", "24+LINCOLN DRIVE+WD19 7BA", + "26+LINCOLN DRIVE+WD19 7BA", "1+Ryman Court, Stag Lane+WD3 5HN", "6+Ryman Court, Stag Lane+WD3 5HN", + "9+Ryman Court, Stag Lane+WD3 5HN", "10+Ryman Court, Stag Lane+WD3 5HN", "11+Ryman Court, Stag Lane+WD3 5HN", + "12+Ryman Court, Stag Lane+WD3 5HN", "14+Ryman Court, Stag Lane+WD3 5HN", "15+Ryman Court, Stag Lane+WD3 5HN", + "20+Ryman Court, Stag Lane+WD3 5HN", "21+Ryman Court, Stag Lane+WD3 5HN", "22+Ryman Court, Stag Lane+WD3 5HN", + "25+Ryman Court, Stag Lane+WD3 5HN", "26+Ryman Court, Stag Lane+WD3 5HN", "31+Ryman Court, Stag Lane+WD3 5HN", + "33+Ryman Court, Stag Lane+WD3 5HN", "34+Ryman Court, Stag Lane+WD3 5HN", + '37+Ryman Court, Stag Lane+WD3 5HN', '38+Ryman Court, Stag Lane+WD3 5HN', '39+Ryman Court, Stag Lane+WD3 5HN', + '41+Ryman Court, Stag Lane+WD3 5HN', '43+Ryman Court, Stag Lane+WD3 5HN', '45+Ryman Court, Stag Lane+WD3 5HN', + '46+Ryman Court, Stag Lane+WD3 5HN', '48+Ryman Court, Stag Lane+WD3 5HN', '49+Ryman Court, Stag Lane+WD3 5HN', + '50+Ryman Court, Stag Lane+WD3 5HN', '52+Ryman Court, Stag Lane+WD3 5HN' +] + +eco3_remap = { + "19+OAKHILL ROAD+WD5 8RE": ('19', 'OAKHILL ROAD', 'WD3 9RE'), + "29+OAKHILL ROAD+WD5 8RE": ('29', 'OAKHILL ROAD', 'WD3 9RE'), + "31+OAKHILL ROAD+WD5 8RE": ('31', 'OAKHILL ROAD', 'WD3 9RE'), + "44+OAKHILL ROAD+WD5 8RE": ('44', 'OAKHILL ROAD', 'WD3 9RF'), + "64+OAKHILL ROAD+WD4 8RF": ('64', 'OAKHILL ROAD', 'WD3 9RF'), + "11+LANCASTER WAY+WD3 PRE": ('11', 'LANCASTER WAY', 'WD5 0PQ'), + "16+LANCASTER WAY+WD3 PRE": ('16', 'LANCASTER WAY', 'WD5 0PQ'), + "58+TALBOT ROAD +WD31HE": ('58', 'TALBOT ROAD', 'WD3 1HE'), + "10+PEARTREE COURT/WELWYN GARDEN CITY+AL73XN": ('10', 'PEARTREE COURT/WELWYN GARDEN CITY', 'AL7 3XN'), + "25+GOBLINS GREEN/WELWYN GARDEN CITY+AL73ST": ('25', 'GOBLINS GREEN/WELWYN GARDEN CITY', 'AL7 3ST'), + "32+GOBLINS GREEN/WELWYN GARDEN CITY+AL73ST": ('32', 'GOBLINS GREEN/WELWYN GARDEN CITY', 'AL7 3ST'), + "94+BAKER ST/POTTERS BAR+EN62EP": ('94', 'BAKER ST/POTTERS BAR', 'EN6 2EP'), + '33+Tudor Way+WD3JA': ('33', 'Tudor Way', 'WD3 8JA'), + '120+Hazlewood lane +WD5 0HF': ('120', 'Hazlewood lane', 'WD5 0HE'), + '35+Rosehill gardens +WD5 0HE': ('35', 'Rosehill gardens', 'WD5 0HF'), + '18+Rosehill gardens +WD5 0HE': ('18', 'Rosehill gardens', 'WD5 0HF'), + '34+Rosehill gardens +WD5 0HE': ('34', 'Rosehill gardens', 'WD5 0HF'), + '58+Rosehill gardens +WD5 0HE': ('58', 'Rosehill gardens', 'WD5 0HF'), + '48+Rosehill gardens +WD5 0HE': ('48', 'Rosehill gardens', 'WD5 0HF'), + '45+Rosehill gardens +WD5 0HE': ('45', 'Rosehill gardens', 'WD5 0HF'), + '6+Rosehill gardens +WD5 0HE': ('6', 'Rosehill gardens', 'WD5 0HF'), + '2+Rosehill gardens +WD5 0HE': ('2', 'Rosehill gardens', 'WD5 0HF'), + '29+Rosehill gardens +WD5 0HE': ('29', 'Rosehill gardens', 'WD5 0HF'), + '61+GOLDEN DELL+AL8 4EE': ('61', 'GOLDEN DELL', 'AL7 4EE'), + '2O+EDINBURGH AVENUE+WD3 8LB': ('20', 'EDINBURGH AVENUE', 'WD3 8LB'), +} + +eco3_lookup = [] +for _, row in tqdm(eco3_submissions.iterrows(), total=len(eco3_submissions)): + hn = row["NO "] + pc = row["Post Code"] + street = row["Street / Block Name"] + key = f"{hn}+{street}+{pc}" + if key in not_on_master: + continue + + if key in eco3_remap: + hn, street, pc = eco3_remap[key] + # The postcode is different to the asse + + # We filter the asset list, because it's hard to know how accurate this is + m1 = master_list[ + (master_list["house_number_TEMP"].astype(str) == hn) & + (master_list["postcode"] == pc) + ] + + if m1.shape[0] != 1: + raise ValueError( + f"Error for {key} in the tracker" + ) + + eco3_lookup.append( + { + "row_id": row["row_id"], + "thrive_property_id": m1["thrive_property_id"].values[0], + "submission_house_number": row["NO "], + "submission_address1": row["Street / Block Name"], + "submission_postcode": row["Post Code"], + } + ) + +eco4_lookup = [] +for _, row in tqdm(eco4_submissions.iterrows(), total=len(eco4_submissions)): + hn = row["NO."] + pc = row["Post Code"] + street = row["Street / Block Name"] + key = f"{hn}+{street}+{pc}" + if key in not_on_master: + continue + + if key in eco3_remap: + hn, street, pc = eco3_remap[key] + # The postcode is different to the asse + + # We filter the asset list, because it's hard to know how accurate this is + m1 = master_list[ + (master_list["house_number_TEMP"].astype(str) == hn) & + (master_list["postcode"].str.lower() == pc.lower()) + ] + + if m1.shape[0] != 1: + raise ValueError( + f"Error for {key} in the tracker" + ) + + eco4_lookup.append( + { + "row_id": row["row_id"], + "thrive_property_id": m1["thrive_property_id"].values[0], + "submission_house_number": row["NO."], + "submission_address1": row["Street / Block Name"], + "submission_postcode": row["Post Code"], + } + ) + +# We match the lookups back to the submission sheets +eco3_lookup = pd.DataFrame(eco3_lookup) +eco3_submissions = eco3_submissions.merge( + eco3_lookup, + how="left", + on="row_id", +) + +eco4_lookup = pd.DataFrame(eco4_lookup) +eco4_submissions = eco4_submissions.merge( + eco4_lookup, + how="left", + on="row_id", +) + +# Store +eco3_submissions.to_csv( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Submissions " + "ECO3 - with IDS.csv", + index=False +) +eco4_submissions.to_csv( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Submissions " + "ECO4 - with IDS.csv", + index=False +) diff --git a/etl/customers/thrive/Project codes.py b/etl/customers/thrive/Project codes.py new file mode 100644 index 00000000..01a15497 --- /dev/null +++ b/etl/customers/thrive/Project codes.py @@ -0,0 +1,130 @@ +""" +THis script will take the standardised asset list and append on the project codes. +We also, review the existing install status, in case anything is wrong +""" +import pandas as pd +import numpy as np + +standardised_asset_list = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Asset List - " + "Complete - Updated May 2025 - Standardised.xlsx", + sheet_name="Standardised Asset List", +) + +project_code_allocations = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Master Tracker (Thrive - " + "Warmfront).xlsx", + sheet_name="Master Tracker", + header=1 +) + +programme_codes = project_code_allocations[ + ["UPRN", "Proposed Progamme", "New Proposed Programme", "Warmfront Survey Notes", ] +].copy() +programme_codes["programme_reference"] = programme_codes["New Proposed Programme"].copy() +programme_codes["programme_reference"] = np.where( + pd.isnull(programme_codes["programme_reference"]), + programme_codes["Proposed Progamme"], + programme_codes["programme_reference"] +) + +PROJECT_CODE_MAP = { + 'Phase 2': "THRIVE-002", + 'Phase 3': "THRIVE-003", + 'Phase 4': "THRIVE-004", + 'Phase 5': "THRIVE-005", + 'Phase 6': "THRIVE-006", + 'Phase 7': "THRIVE-007", + 'Phase 8': "THRIVE-008", + 'Phase 9': "THRIVE-009", + 'Phase 10': "THRIVE-010", + "Week 1": "THRIVE-WEEK-001", + "Week 2": "THRIVE-WEEK-002", + "Week 4": "THRIVE-WEEK-004", + "Week 7": "THRIVE-WEEK-007", +} +programme_codes["project_code"] = programme_codes["programme_reference"].map(PROJECT_CODE_MAP) + +thrive_notes = project_code_allocations[["UPRN", "Thrive Notes", "Priority", "Notes Reply (Thrive)"]].copy() + +standardised_asset_list = standardised_asset_list.merge( + programme_codes[["UPRN", "project_code", "programme_reference"]], + how="left", + left_on="landlord_property_id", + right_on="UPRN", +).merge( + thrive_notes[["UPRN", "Thrive Notes", "Priority", "Notes Reply (Thrive)"]], + how="left", + on="UPRN", +) + +standardised_asset_list = standardised_asset_list.drop(columns=["UPRN"]) + +# We fill the project code for historical completions +standardised_asset_list["project_code"] = np.where( + pd.isnull(standardised_asset_list["project_code"]) & ( + standardised_asset_list["hubspot_status"] != "READY TO BE SCHEDULED" + ) & ( + ~pd.isnull(standardised_asset_list["hubspot_status"]) + ), + "THRIVE-HISTORICAL", + standardised_asset_list["project_code"] +) + +# Store as an excel +filename = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Programme - " + "reconciled.xlsx") +# Store the data in two tabs. One for the asset list with the EPC data and the second with the flat data +# Other tabs: +block_analysis = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Asset List - " + "Complete - Updated May 2025 - Standardised.xlsx", + sheet_name="Block Analysis", +) +outcomes = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Asset List - " + "Complete - Updated May 2025 - Standardised.xlsx", + sheet_name="Outcomes", +) +unmatched_submissions = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Asset List - " + "Complete - Updated May 2025 - Standardised.xlsx", + sheet_name="Unmatched Submissions", +) +unmatched_ecosurv = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Asset List - " + "Complete - Updated May 2025 - Standardised.xlsx", + sheet_name="Unmatched Ecosurv", +) + +with pd.ExcelWriter(filename) as writer: + standardised_asset_list.to_excel(writer, sheet_name="Standardised Asset List", index=False) + block_analysis.to_excel(writer, sheet_name="Block Analysis", index=False) + # If we have outcomes, we add a tab with the outcomes + outcomes.to_excel(writer, sheet_name="Outcomes", index=False) + unmatched_submissions.to_excel(writer, sheet_name="Unmatched Submissions", index=False) + unmatched_ecosurv.to_excel(writer, sheet_name="Unmatched Ecosurv", index=False) + +# A check, just comparing against the master tracker to make sure I have all of the installs +asset_list = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Asset List - " + "Complete - Updated May 2025 - Standardised.xlsx", + sheet_name="Standardised Asset List", +) + +master_tracker = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Master Tracker (Thrive - " + "Warmfront).xlsx", + sheet_name="Master Tracker", + header=1 +) + +df = asset_list[["landlord_property_id", "hubspot_status"]].merge( + master_tracker[~pd.isnull(master_tracker['Date Completed'])][["UPRN", "Date Completed"]], + how="inner", + left_on="landlord_property_id", + right_on="UPRN" +) + +df["hubspot_status"].value_counts() +df[df["hubspot_status"] == "SUBMITTED TO INSTALLER"] diff --git a/recommendations/tests/test_lighting_recommendations.py b/recommendations/tests/test_lighting_recommendations.py index dbb621e7..5fb914a8 100644 --- a/recommendations/tests/test_lighting_recommendations.py +++ b/recommendations/tests/test_lighting_recommendations.py @@ -49,6 +49,6 @@ class TestLightingRecommendations: 'lighting in all ' 'fixed outlets', 'low-energy-lighting': 100}, - 'total': 240.24, 'subtotal': 200.20000000000002, 'vat': 40.040000000000006, 'contingency': 14.3, - 'preliminaries': 14.3, 'material': 80.0, 'profit': 28.6, 'labour_hours': 3.2, 'labour_days': 0.4, - 'labour_cost': 63.0, 'survey': False}] + 'total': 188.76000000000002, 'subtotal': 157.3, 'vat': 31.460000000000004, 'contingency': 14.3, + 'material': 80.0, 'labour_hours': 3.2, 'labour_days': 0.4, 'labour_cost': 63.0, 'survey': False} + ]