diff --git a/asset_list/AssetList.py b/asset_list/AssetList.py index 4b7a11ec..21376708 100644 --- a/asset_list/AssetList.py +++ b/asset_list/AssetList.py @@ -4,8 +4,8 @@ import re import tiktoken from pprint import pprint from datetime import datetime +import asset_list.hubspot.config as hubspot_config -from numpy.ma.core import masked_not_equal from openai import OpenAI import numpy as np import pandas as pd @@ -29,6 +29,7 @@ from recommendations.recommendation_utils import ( ) from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes +from etl.epc_clean.epc_attributes.WallAttributes import WallAttributes logger = setup_logger() @@ -279,6 +280,7 @@ class AssetList: STANDARD_HEATING_SYSTEM = "landlord_heating_system" STANDARD_EXISTING_PV = "landlord_existing_pv" STANDARD_SAP = "landlord_sap_rating" + STANDARD_BLOCK_REFERENCE = "landlord_block_reference" DOMNA_PROPERTY_ID = "domna_property_id" @@ -292,6 +294,21 @@ class AssetList: "Any further surveyor notes", 'Surveyors Name' ] + NON_INTRUSIVES_NEW_FORMAT_COLNAMES = [ + "Has the property been re-walled?", "Is the property tile hung?", "Does the property have a render?", + "Does the property have cladding?", "Gable Wall Obstructions", + "Does the property have foliage that needs removal?", + "Potential unsafe environment", "Date of Inspection", "Borescoped?" + ] + + # Another version of non-intrusives: + NON_INTRUSIVES_NEW_FORMAT_COLNAMES_V2 = [ + 'Archetype', 'Archetype 2', 'Construction', 'Insulated', 'Material', 'Boroscoped?', + 'CIGA Check Required', 'ROOF ORIENTATION', 'TILE HUNG', 'RENDERED', + 'CLADDING', 'ACCESS ISSUES', 'FURTHER SURVEYOR NOTES', 'DATE', + 'NAME OF SURVEYOR' + ] + NON_INTRUSIVES_ELIGIBILITY_COLUMN = "Eligibility (Red/Yellow/Green)" OLD_FORMAT_NON_INTRUSIVE_COLNAMES = ['WFT Findings', 'ECO Eligibility'] @@ -342,6 +359,40 @@ class AssetList: "cavity wall, as built, partial insulation", ] + # Work type prefixes: + # Empties + EMPTY_CAVITY_NON_INTRUSIVE = "Non-Intrusive Data Shows Empty Cavity" + EMPTY_CAVITY_NON_INTRUSIVE_YEAR = 'Non-Intrusive Data Shows Empty Cavity, built after 2002' + EPC_EMPTY_INSPECTIONS_RETRO_DRILLED = "EPC Shows Empty Cavity, inspections show retro drilled" + EPC_EMPTY_INSPECTIONS_FILLED = "EPC Shows Empty Cavity, inspections show filled or other" + EPC_EMPTY_INSPECTIONS_FILLED_AT_BUILD = "EPC Shows Empty Cavity, inspections show filled at build" + EPC_EMPTY_INSPECTIONS_NON_CAVITY = "EPC Shows Empty Cavity, inspections show non-cavity build" + EPC_EMPTY = "EPC Shows Empty Cavity" + LANDLORD_EMPTY_INSPECTIONS_OTHER = ("Landlord Data Shows Empty Cavity, EPC & Inspections Shows Filled or " + "Non-cavity") + # Extraction + EXTRACTION_NON_INTRUSIVE = "Non-Intrusive Data Shows Cavity Extraction" + + # Solar + SOLAR_ELIGIBLE = "Solar Eligible" + SOLAR_ELIGIBLE_SOLID_WALL_UNINSULATED = "Solar Eligible, Solid Wall Uninsulated, EPC E or Below" + SOLAR_ELIGIBLE_NEEDS_HEATING_UPGRADE = "Solar Eligible, Needs Heating Upgrade" + + CRM_HISTORICAL_CAVITY_PRODUCT = { + "id": 156989182176, "unit_price": 0, "name": "Historical ECO Cavity" + } + + CRM_PRODUCTS = { + "Empty Cavity - ECO4": {"id": 82733738177, "unit_price": 1000, "name": "Empty Cavity - ECO4"}, + "Extract & Fill - ECO4": {"id": 100307905778, "unit_price": 500, "name": "Extract & Fill - ECO4"}, + "Solar PV - ECO4": {"id": 82623589564, "unit_price": 1608, "name": "Solar PV - ECO4"}, + "Solar PV + HHRSH - ECO4": {"id": 155529972924, "unit_price": 1608, "name": "Solar PV + HHRSH - ECO4"}, + "Solar PV + Heating Upgrade - ECO4": { + "id": 109265426665, "unit_price": 1608, "name": "Solar PV + Heating Upgrade - ECO4" + }, + "Historical ECO Cavity": CRM_HISTORICAL_CAVITY_PRODUCT + } + def __init__( self, local_filepath, @@ -362,6 +413,7 @@ class AssetList: landlord_heating_system=None, landlord_existing_pv=None, landlord_sap=None, + landlord_block_reference=None, phase=False, header=0 ): @@ -375,7 +427,7 @@ class AssetList: self.standardised_asset_list = self.raw_asset_list.copy() # Will be used to store aggregated figures against the various work types self.work_type_figures = {} - self.flat_data = None + self.block_analysis_df = None self.duplicated_addresses = None self.contact_details = None self.contact_detail_fields = None @@ -386,6 +438,7 @@ class AssetList: self.unmatched_submissions = pd.DataFrame() self.ecosurv = None self.ecosurv_no_match = pd.DataFrame() + self.geographical_areas = pd.DataFrame() # When this is True, we intend to break the programme into multiple phases. We may need to review # how this is structured in the future, as depending on how we get future data, we may need to @@ -397,9 +450,17 @@ class AssetList: self.non_intrusives_present = "CIGA Check Required" in self.raw_asset_list.columns # We detect if we have the old format of non-intruvies self.old_format_non_intrusives_present = "WFT Findings" in self.raw_asset_list.columns + if self.old_format_non_intrusives_present: + self.non_intrusives_present = False self.non_intrusives_eligibility = "Eligibility (Red/Yellow/Green)" in self.raw_asset_list.columns + self.new_format_non_insturives_present = ( + "Has the property been re-walled?" in self.raw_asset_list.columns + ) + + self.new_format_non_insturives_present_v2 = 'TILE HUNG' in self.raw_asset_list.columns + # Names of columns self.landlord_property_id = landlord_property_id self.address1_colname = address1_colname @@ -414,6 +475,7 @@ class AssetList: self.landlord_heating_system = landlord_heating_system self.landlord_existing_pv = landlord_existing_pv self.landlord_sap = landlord_sap + self.landlord_block_reference = landlord_block_reference # parameters for cleaning self.full_address_cols_to_concat = full_address_cols_to_concat @@ -479,6 +541,23 @@ class AssetList: self.standardised_asset_list["Archetype"].copy() ) + self.prefixes_to_products = { + # Empty + self.EMPTY_CAVITY_NON_INTRUSIVE: self.CRM_PRODUCTS["Empty Cavity - ECO4"], + self.EPC_EMPTY_INSPECTIONS_RETRO_DRILLED: self.CRM_PRODUCTS["Empty Cavity - ECO4"], + self.EPC_EMPTY_INSPECTIONS_FILLED: self.CRM_PRODUCTS["Empty Cavity - ECO4"], + self.EPC_EMPTY_INSPECTIONS_FILLED_AT_BUILD: self.CRM_PRODUCTS["Empty Cavity - ECO4"], + self.EPC_EMPTY_INSPECTIONS_NON_CAVITY: self.CRM_PRODUCTS["Empty Cavity - ECO4"], + self.EPC_EMPTY: self.CRM_PRODUCTS["Empty Cavity - ECO4"], + self.LANDLORD_EMPTY_INSPECTIONS_OTHER: self.CRM_PRODUCTS["Empty Cavity - ECO4"], + # Extraction + self.EXTRACTION_NON_INTRUSIVE: self.CRM_PRODUCTS["Extract & Fill - ECO4"], + # Solar + self.SOLAR_ELIGIBLE: self.CRM_PRODUCTS["Solar PV - ECO4"], + self.SOLAR_ELIGIBLE_SOLID_WALL_UNINSULATED: self.CRM_PRODUCTS["Solar PV - ECO4"], + self.SOLAR_ELIGIBLE_NEEDS_HEATING_UPGRADE: self.CRM_PRODUCTS["Solar PV + Heating Upgrade - ECO4"], + } + def _extract_address1(self, asset_list, full_address_col, postcode_col, method="first_two_words"): if method not in self.ADDRESS_1_CLEANING_METHODS: @@ -660,6 +739,7 @@ class AssetList: self.landlord_heating_system, self.landlord_existing_pv, self.landlord_sap, + self.landlord_block_reference, ] # Keep just non-null variables (e.g landlord may not provide uprn self.keep_variables = [v for v in variables if v is not None] @@ -677,16 +757,23 @@ class AssetList: self.landlord_heating_system: self.STANDARD_HEATING_SYSTEM, self.landlord_existing_pv: self.STANDARD_EXISTING_PV, self.landlord_sap: self.STANDARD_SAP, + self.landlord_block_reference: self.STANDARD_BLOCK_REFERENCE } self.rename_map = {k: v for k, v in self.rename_map.items() if k is not None} non_intrusive_columns = [] - if self.non_intrusives_present: + if self.non_intrusives_present and not self.new_format_non_insturives_present_v2: non_intrusive_columns = self.NON_INTRUSIVES_COLNAMES if self.non_intrusives_eligibility: non_intrusive_columns.append(self.NON_INTRUSIVES_ELIGIBILITY_COLUMN) + if self.new_format_non_insturives_present: + non_intrusive_columns += self.NON_INTRUSIVES_NEW_FORMAT_COLNAMES + + if self.new_format_non_insturives_present_v2: + non_intrusive_columns += self.NON_INTRUSIVES_NEW_FORMAT_COLNAMES_V2 + if self.old_format_non_intrusives_present: # We check if we have the ECO Eligibility column, which we might not have non_intrusive_columns = [ @@ -755,52 +842,44 @@ class AssetList: # We attempt to convert the year built to a datetime, by detecting the format and converting def extract_year(date_str): - """ - Extracts the year from a date string in the format '01-Jul-YYYY'. - Returns the extracted year as an integer or None if the format is incorrect. - """ - known_errors = [ + known_errors = { "#MULTIVALUE", + "ND", + "PIMSS EMPTY", + "UNKNOWN", "This cell has an external reference that can't be shown or edited. Editing this cell will " "remove the external reference.", - "ND", - 'PIMSS EMPTY', - "UNKNOWN" - ] + 0 + } - if pd.isnull(date_str) or date_str in known_errors or (date_str == 0): + if pd.isnull(date_str) or date_str in known_errors: return None - if isinstance(date_str, str): - match = re.match(r"\d{1,2}-[A-Za-z]{3}-(\d{4})", date_str) - if match: - return int(match.group(1)) # Extract the year and convert to integer - if "-" in date_str: - - # Count the number of times we have "-", as we've seen double ranges - # (when we have extensions) so the format is like this: - # 'G: 1983-1990, H: 1991-1995' - if date_str.count("-") == 2: - # We have a range - return int(date_str.split("-")[1].split(",")[0]) - # We probably have a range - return int(date_str.split("-")[1].strip()) - + # Handle datetime if isinstance(date_str, datetime): return date_str.year - if isinstance(date_str, float): - if str(int(date_str)).isdigit() & (len(str(int(date_str))) == 4): + # Handle numeric year (float or int) + if isinstance(date_str, (int, float)): + if 1000 <= int(date_str) <= 2100: return int(date_str) - # Check if date_str is a year itself - if str(date_str).isdigit() & (len(str(date_str)) == 4): - return int(date_str) + # Now handle string-based logic + if isinstance(date_str, str): + # Direct date match e.g. 01-Jul-2021 + match = re.match(r"\d{1,2}-[A-Za-z]{3}-(\d{4})", date_str) + if match: + return int(match.group(1)) - # Remove any non-numeric characters - date_str = re.sub(r"\D", "", str(date_str)) - if str(date_str).isdigit() & (len(str(date_str)) == 4): - return int(date_str) + # Find all 4-digit years in string + years = [int(y) for y in re.findall(r"\b(?:19|20)\d{2}\b", date_str)] + if years: + return max(years) # Return most recent year + + # If only numbers are present without format + numeric_str = re.sub(r"\D", "", date_str) + if len(numeric_str) == 4 and numeric_str.isdigit(): + return int(numeric_str) raise NotImplementedError(f"Unhandled format for year built, value is {date_str} - implement me") @@ -920,7 +999,7 @@ class AssetList: self.STANDARD_YEAR_BUILT, self.STANDARD_WALL_CONSTRUCTION, self.STANDARD_HEATING_SYSTEM, - self.STANDARD_EXISTING_PV + self.STANDARD_BLOCK_REFERENCE, ] if v not in self.standardised_asset_list.columns ] for v in missing_variables: @@ -931,6 +1010,38 @@ class AssetList: self.standardised_asset_list[self.STANDARD_LANDLORD_PROPERTY_ID].astype(str) ) + # CLean up the standard SAP column, that can be problematic + if self.landlord_sap is not None: + self.standardised_asset_list[self.STANDARD_SAP] = ( + self.standardised_asset_list[self.STANDARD_SAP] + .astype(str) + .str.replace('\xa0', ' ', regex=False) + .str.strip() + ) + self.standardised_asset_list[self.STANDARD_SAP] = np.where( + self.standardised_asset_list[self.STANDARD_SAP] == "", + None, + self.standardised_asset_list[self.STANDARD_SAP] + ) + self.standardised_asset_list[self.STANDARD_SAP] = ( + self.standardised_asset_list[self.STANDARD_SAP].astype(float) + ) + # If it's zero, we set it to None + self.standardised_asset_list[self.STANDARD_SAP] = np.where( + self.standardised_asset_list[self.STANDARD_SAP] == 0, + None, + self.standardised_asset_list[self.STANDARD_SAP] + ) + + has_blocks_of_flats = (self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] == "block of flats").sum() + + # Perform block splitting, ahead of fetching the EPC data + # If we blocks of flats, without a landlord block reference, we create this + self.fill_landlord_block_reference(has_blocks_of_flats) + + # If we have blocks of flats, we split these out into individual units. + self.split_blocks() + def merge_data(self, df: pd.DataFrame): """ Used to insert data into the standardised asset list, based on the domna property id @@ -1000,7 +1111,7 @@ class AssetList: num_floors=x[self.ATTRIBUTE_NUMBER_OF_FLOORS], floor_height=( float(x[self.EPC_API_DATA_NAMES["floor-height"]]) if - x[self.EPC_API_DATA_NAMES["floor-height"]] else 2.5 + not pd.isnull(x[self.EPC_API_DATA_NAMES["floor-height"]]) else 2.5 ), perimeter=x[self.ATTRIBUTE_ESTIMATED_PERIMETER], built_form=x[self.EPC_API_DATA_NAMES["built-form"]] @@ -1147,7 +1258,7 @@ class AssetList: processed_age_band, how="left" ) - def identify_worktypes(self, cleaned): + def identify_worktypes(self): if self.landlord_sap is not None: # We add a SAP category for all work type identification @@ -1176,6 +1287,13 @@ class AssetList: ) ) + self.standardised_asset_list["SAP Category"] = np.where( + pd.isnull(self.standardised_asset_list[self.STANDARD_SAP]) & + pd.isnull(self.standardised_asset_list[self.EPC_API_DATA_NAMES["current-energy-efficiency"]]), + "SAP Unknown", + self.standardised_asset_list["SAP Category"] + ) + else: # We add a SAP category for all work type identification # We break into 4 categories (54 or less, 55-68, 69-74, 75 or more) @@ -1196,13 +1314,24 @@ class AssetList: ), ) ) + self.standardised_asset_list["SAP Category"] = np.where( + pd.isnull(self.standardised_asset_list[self.EPC_API_DATA_NAMES["current-energy-efficiency"]]), + "SAP Unknown", + self.standardised_asset_list["SAP Category"] + ) # Before we being, we identify if a property has solar already as we use this # for identifying cavity jobs - if self.non_intrusives_present: - existing_solar_non_intrusives_check = ( - self.standardised_asset_list["non-intrusives: PV, ACCESS ISSUE, SEE NOTES"] == "SOLAR PV ON ROOF" - ) + if self.non_intrusives_present and not self.old_format_non_intrusives_present: + + if self.new_format_non_insturives_present_v2: + existing_solar_non_intrusives_check = ( + self.standardised_asset_list["non-intrusives: ROOF ORIENTATION"] == "ALREADY HAS SOLAR PV" + ) + else: + existing_solar_non_intrusives_check = ( + self.standardised_asset_list["non-intrusives: PV, ACCESS ISSUE, SEE NOTES"] == "SOLAR PV ON ROOF" + ) elif self.old_format_non_intrusives_present: existing_solar_non_intrusives_check = ( self.standardised_asset_list["non-intrusives: WFT Findings"].str.lower().str.strip().isin( @@ -1426,13 +1555,22 @@ class AssetList: ) ) + # If the landlord has given us the heating system, we default to that on heating upgrades. Because of the + # poor heating in place, if the EPC indicates that this property had a low efficiency heating system but the + # landlord data suggests otherwise (e.g. there's a gas boiler), we default to what the landlord has told us self.standardised_asset_list["solar_epc_data_indicates_requires_heating_upgrade"] = ( - self.standardised_asset_list[self.EPC_API_DATA_NAMES["mainheat-description"]].str.lower().str.contains( - "electric storage heaters|room heaters" + ( + self.standardised_asset_list[self.EPC_API_DATA_NAMES["mainheat-description"]].str.lower().str.contains( + "electric storage heaters|room heaters" + ) & ( + self.standardised_asset_list[ + self.EPC_API_DATA_NAMES["mainheatcont-description"] + ] != "Controls for high heat retention storage heaters" + ) ) & ( - self.standardised_asset_list[ - self.EPC_API_DATA_NAMES["mainheatcont-description"] - ] != "Controls for high heat retention storage heaters" + ~self.standardised_asset_list[self.STANDARD_HEATING_SYSTEM].isin( + ["district heating", "communal heating", "communal gas boiler"] + ) & ~self.standardised_asset_list[self.STANDARD_HEATING_SYSTEM].astype(str).str.contains("gas ") ) ) @@ -1471,12 +1609,17 @@ class AssetList: # With this in mind, we look for 2 clases # 1) The property is fully insulated apart from the loft (<200mm insulation) # 2) THe property is fully insulated - - print("Should we include cavity properties where they might be uninsulated?") self.standardised_asset_list["solar_landlord_walls_insulated"] = ( self.standardised_asset_list[self.STANDARD_WALL_CONSTRUCTION].isin( [ - "filled cavity", "insulated solid brick", "insulated timber frame", + "filled cavity", + "insulated solid brick", + "insulated timber frame", + "uninsulated cavity", + "insulated system built", + "insulated granite or whinstone", + "insulated sandstone or limestone", + "new build - average thermal transmittance" ] ) ) @@ -1501,19 +1644,9 @@ class AssetList: else: self.standardised_asset_list["solar_non_intrusives_walls_insulated"] = False - # We merge on the u-value for average thermal transmittance - walls_uvalue_data = pd.DataFrame(cleaned["walls-description"]) - walls_uvalue_data = walls_uvalue_data[ - ~pd.isnull(walls_uvalue_data["thermal_transmittance"]) - ][["original_description", "thermal_transmittance"]].rename( - columns={ - "original_description": self.EPC_API_DATA_NAMES["walls-description"], - "thermal_transmittance": "walls_u_value" - } - ) - self.standardised_asset_list = self.standardised_asset_list.merge( - walls_uvalue_data, how="left", on=self.EPC_API_DATA_NAMES["walls-description"] - ) + self.standardised_asset_list["walls_u_value"] = self.standardised_asset_list[ + self.EPC_API_DATA_NAMES["walls-description"] + ].apply(lambda x: WallAttributes(x).process()["thermal_transmittance"] if not pd.isnull(x) else None) self.standardised_asset_list["solar_epc_walls_insulated"] = ( ( @@ -1526,16 +1659,20 @@ class AssetList: ) ) - # We merge on the u-value for average thermal transmittance - roof_data = pd.DataFrame(cleaned["roof-description"])[ - ["original_description", "thermal_transmittance", "is_pitched", "is_loft"] - ].rename( - columns={ - "original_description": self.EPC_API_DATA_NAMES["roof-description"], - "thermal_transmittance": "roof_u_value", - } - ) - + roof_data = [] + for desc in self.standardised_asset_list[ + self.EPC_API_DATA_NAMES["roof-description"] + ].unique(): + if pd.isnull(desc): + continue + roof_data.append( + { + self.EPC_API_DATA_NAMES["roof-description"]: desc, + **RoofAttributes(desc).process() + } + ) + roof_data = pd.DataFrame(roof_data) + roof_data = roof_data.rename(columns={"thermal_transmittance": "roof_u_value"}) self.standardised_asset_list = self.standardised_asset_list.merge( roof_data, how="left", on=self.EPC_API_DATA_NAMES["roof-description"] ) @@ -1683,10 +1820,10 @@ class AssetList: self.standardised_asset_list["cavity_reason"] = None empty_cavity_map = { - "non_intrusive_indicates_empty_cavity": "Non-Intrusive Data Shows Empty Cavity: ", - "non_intrusive_indicates_empty_cavity_has_solar": "Non-Intrusive Data Shows Empty Cavity - property " + "non_intrusive_indicates_empty_cavity": self.EMPTY_CAVITY_NON_INTRUSIVE + ": ", + "non_intrusive_indicates_empty_cavity_has_solar": f"{self.EMPTY_CAVITY_NON_INTRUSIVE} - property " "already has solar: ", - "non_intrusive_indicates_empty_cavity_no_year_filter": f"Non-Intrusive Data Shows Empty Cavity, " + "non_intrusive_indicates_empty_cavity_no_year_filter": f"{self.EMPTY_CAVITY_NON_INTRUSIVE}, " f"built after {self.EMPTY_CAVITY_YEAR_THRESHOLD}: ", } @@ -1711,7 +1848,7 @@ class AssetList: )) & pd.isnull(self.standardised_asset_list["cavity_reason"]) ), - "EPC Shows Empty Cavity, inspections show retro drilled: " + self.standardised_asset_list[ + f"{self.EPC_EMPTY_INSPECTIONS_RETRO_DRILLED}: " + self.standardised_asset_list[ "SAP Category"], self.standardised_asset_list["cavity_reason"] ) @@ -1723,7 +1860,7 @@ class AssetList: self.standardised_asset_list['non_intrusive_indicates_cavity_extraction'] & pd.isnull(self.standardised_asset_list["cavity_reason"]) ), - "EPC Shows Empty Cavity, inspections show filled or other: " + self.standardised_asset_list[ + f"{self.EPC_EMPTY_INSPECTIONS_FILLED}: " + self.standardised_asset_list[ "SAP Category"], self.standardised_asset_list["cavity_reason"] ) @@ -1735,7 +1872,7 @@ class AssetList: (self.standardised_asset_list['non-intrusives: Insulated'] == "RETRO DRILLED") & pd.isnull(self.standardised_asset_list["cavity_reason"]) ), - "EPC Shows Empty Cavity, inspections show retro drilled: " + self.standardised_asset_list[ + f"{self.EPC_EMPTY_INSPECTIONS_RETRO_DRILLED}: " + self.standardised_asset_list[ "SAP Category"], self.standardised_asset_list["cavity_reason"] ) @@ -1747,8 +1884,7 @@ class AssetList: (self.standardised_asset_list['non-intrusives: Insulated'] == "FILLED AT BUILD") & pd.isnull(self.standardised_asset_list["cavity_reason"]) ), - "EPC Shows Empty Cavity, inspections show filled at build: " + self.standardised_asset_list[ - "SAP Category"], + f"{self.EPC_EMPTY_INSPECTIONS_FILLED_AT_BUILD}: " + self.standardised_asset_list["SAP Category"], self.standardised_asset_list["cavity_reason"] ) else: @@ -1758,7 +1894,7 @@ class AssetList: ~self.standardised_asset_list["non_intrusive_indicates_empty_cavity"] & pd.isnull(self.standardised_asset_list["cavity_reason"]) ), - "EPC Shows Empty Cavity: " + self.standardised_asset_list["SAP Category"], + f"{self.EPC_EMPTY}: " + self.standardised_asset_list["SAP Category"], self.standardised_asset_list["cavity_reason"] ) @@ -1768,10 +1904,12 @@ class AssetList: ~self.standardised_asset_list["non_intrusive_indicates_empty_cavity"] & pd.isnull(self.standardised_asset_list["cavity_reason"]) ), - "EPC Shows Empty Cavity, inspections show non-cavity build: " + self.standardised_asset_list[ - "SAP Category"], + f"{self.EPC_EMPTY_INSPECTIONS_NON_CAVITY}: " + self.standardised_asset_list["SAP Category"], self.standardised_asset_list["cavity_reason"] ) + + # Work type prefixes + # Landlord data: The landlord's data indicates that the wall is an uninsulated cavity wall, but EPC and # inspections show filled self.standardised_asset_list["cavity_reason"] = np.where( @@ -1781,7 +1919,7 @@ class AssetList: ~self.standardised_asset_list["epc_indicates_empty_cavity"] & pd.isnull(self.standardised_asset_list["cavity_reason"]) ), - "Landlord Data Shows Empty Cavity, EPC & Inspections Shows Filled or Non-cavity: " + + f"{self.LANDLORD_EMPTY_INSPECTIONS_OTHER}: " + self.standardised_asset_list["SAP Category"], self.standardised_asset_list["cavity_reason"] ) @@ -1792,7 +1930,7 @@ class AssetList: self.standardised_asset_list["non_intrusive_indicates_cavity_extraction"] & pd.isnull(self.standardised_asset_list["cavity_reason"]) ), - "Non-Intrusive Data Shows Cavity Extraction: " + self.standardised_asset_list["SAP Category"], + f"{self.EXTRACTION_NON_INTRUSIVE}: " + self.standardised_asset_list["SAP Category"], self.standardised_asset_list["cavity_reason"] ) @@ -1801,7 +1939,7 @@ class AssetList: self.standardised_asset_list["non_intrusive_indicates_cavity_extraction_no_year_filter"] & pd.isnull(self.standardised_asset_list["cavity_reason"]) ), - f"Non-Intrusive Data Shows Cavity Extraction, built after {self.EMPTY_CAVITY_YEAR_THRESHOLD}: " + + f"{self.EXTRACTION_NON_INTRUSIVE}, built after {self.EMPTY_CAVITY_YEAR_THRESHOLD}: " + self.standardised_asset_list["SAP Category"], self.standardised_asset_list["cavity_reason"] ) @@ -1814,11 +1952,9 @@ class AssetList: # Map of variables and fill values for the solar_reason variable # ordering of this map is important, where we flag our prioritised work types first solar_reason_map = { - "solar_eligible": "Solar Eligible: ", - "solar_eligible_solid_wall_uninsulated": "Solar Eligible, Solid Wall Uninsulated, EPC E or Below: ", - "solar_eligible_needs_heating_upgrade": ( - "Solar Eligible, Needs Heating Upgrade: " - ) + "solar_eligible": f"{self.SOLAR_ELIGIBLE}: ", + "solar_eligible_solid_wall_uninsulated": f"{self.SOLAR_ELIGIBLE_SOLID_WALL_UNINSULATED}: ", + "solar_eligible_needs_heating_upgrade": f"{self.SOLAR_ELIGIBLE_NEEDS_HEATING_UPGRADE}: " } for variable, reason in solar_reason_map.items(): @@ -1864,22 +2000,54 @@ class AssetList: for col in ["cavity_reason", "solar_reason"]: self.standardised_asset_list[col] = np.where( ( - (~pd.isnull(self.standardised_asset_list["submission_date"])) + (~pd.isnull(self.standardised_asset_list["submission_status"])) ), None, self.standardised_asset_list[col] ) - if self.ecosurv is not None: + if self.ecosurv is not None and "ecosurv_install_status" in self.standardised_asset_list.columns: + # If we didn't match anything to ecosurv, the ecosurv_install_status won't exist for col in ["cavity_reason", "solar_reason"]: self.standardised_asset_list[col] = np.where( ( - (~pd.isnull(self.standardised_asset_list["ecosurv_reference"])) + (~pd.isnull(self.standardised_asset_list["ecosurv_install_status"])) ), None, self.standardised_asset_list[col] ) + # We prepare outcomes for output + if self.outcomes is not None: + logger.info("Preparing outcomes for output") + identified_work = self.standardised_asset_list[ + ~pd.isnull(self.standardised_asset_list["cavity_reason"]) | + ~pd.isnull(self.standardised_asset_list["solar_reason"]) + ][self.DOMNA_PROPERTY_ID].values + + if self.DOMNA_PROPERTY_ID in self.outcomes.columns: + self.outcomes_for_output = self.outcomes[ + self.outcomes[self.DOMNA_PROPERTY_ID].isin(identified_work) + ] + + # Finally, direct operations feedback has suggested that if a property is a flat that has a SAP rating of + # 76 or above, we should exclude it because it's likely not going to be eligible for anyting + self.standardised_asset_list["cavity_reason"] = np.where( + (self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] == "flat") & + (self.standardised_asset_list["SAP Category"] == "SAP Rating 76 or more"), + self.standardised_asset_list["cavity_reason"] + " - (unlikely to quality)", + self.standardised_asset_list["cavity_reason"] + ) + + # Split cavity_reason on the colon and check if the first part is equal to one of the two options above + # that indicates empties + self.standardised_asset_list["identified_empty_cavity"] = ( + self.standardised_asset_list["cavity_reason"].str.split(":").str[0].isin( + [self.EMPTY_CAVITY_NON_INTRUSIVE, self.EMPTY_CAVITY_NON_INTRUSIVE_YEAR, self.EPC_EMPTY] + ) + ) + + def get_work_figures(self): blocks_of_flats = self.standardised_asset_list[ self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] == "block of flats" ] @@ -1897,56 +2065,311 @@ class AssetList: }, **self.standardised_asset_list["solar_reason"].value_counts().to_dict() } + pprint(self.work_type_figures) - # We prepare outcomes for output - if self.outcomes is not None: - logger.info("Preparing outcomes for output") - identified_work = self.standardised_asset_list[ - ~pd.isnull(self.standardised_asset_list["cavity_reason"]) | - ~pd.isnull(self.standardised_asset_list["solar_reason"]) - ][self.DOMNA_PROPERTY_ID].values + def fill_landlord_block_reference(self, has_blocks_of_flats): + if not has_blocks_of_flats: + return - if self.DOMNA_PROPERTY_ID in self.outcomes.columns: - self.outcomes_for_output = self.outcomes[ - self.outcomes[self.DOMNA_PROPERTY_ID].isin(identified_work) - ] - - def flat_analysis(self): - - # We need to deduce the building name - we strip out the house number - - # We want to deduce if flats have 50% of the properties below C75 - # We group by postcode and property type - grouped = self.standardised_asset_list.groupby( - [self.STANDARD_POSTCODE, self.STANDARD_PROPERTY_TYPE] + # If we have blocks of flats, we fill the landlord_block_reference field with address 1 + postcode + self.standardised_asset_list[self.STANDARD_BLOCK_REFERENCE] = np.where( + (self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] == "block of flats") & ( + pd.isnull(self.standardised_asset_list[self.STANDARD_BLOCK_REFERENCE]) + ), + self.standardised_asset_list[self.STANDARD_ADDRESS_1] + " " + + self.standardised_asset_list[self.STANDARD_POSTCODE], + self.standardised_asset_list[self.STANDARD_BLOCK_REFERENCE] ) - flat_data = [] - for _, group in grouped: - if "flat" in group[self.STANDARD_PROPERTY_TYPE].values: - num_flats = group[self.STANDARD_PROPERTY_TYPE].shape[0] - num_below_c75 = group[ - self.EPC_API_DATA_NAMES["current-energy-efficiency"] - ].lt(self.FILLED_CAVITY_SAP_THRESHOLD).sum() - # Check if any flats are below C69 - num_flats_below_c69 = group[ - self.EPC_API_DATA_NAMES["current-energy-efficiency"] - ].lt(69).sum() + def split_blocks(self): + """ + Where we have a single row that is a block of flats, we split this into multiple rows, + one for each unit. The data that we have will be copied across rows + :return: + """ - flat_data.append( - { - "Postcode": group[self.STANDARD_POSTCODE].iloc[0], - "Property Type": "Flat", - "Number of Flats with EPC": num_flats, - "Number of Flats below C75": num_below_c75, - "Proportion of Flat EPCs below C75": round(100 * num_below_c75 / num_flats), - "Number of Flats Below C69": num_flats_below_c69, - } + blocks = self.standardised_asset_list[ + self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] == "block of flats" + ].copy() + + if blocks.empty: + return + + RANGE_RE = re.compile(r'\b(\d+[A-Za-z]?)\s*[-–]\s*(\d+[A-Za-z]?)\b') + NUM_RE = re.compile(r'\b\d+[A-Za-z]?\b') # captures 12, 12A, etc. + + expanded_rows = [] + + for _, row in blocks.iterrows(): + addr = str(row[self.STANDARD_ADDRESS_1]) + full_addr = row[self.STANDARD_FULL_ADDRESS] + + # We also look for terms like "Odd", "even", "all" in the address to indicate if it should be just + # the odds, evens or all of the numbers + has_odd = ( + "(odd)" in addr.lower() or + "(odd)" in full_addr.lower() or + "(odds)" in addr.lower() or + "(odds)" in full_addr.lower() + ) + has_even = ( + "(even)" in addr.lower() or + "(even)" in full_addr.lower() or + "(evens)" in addr.lower() or + "(evens)" in full_addr.lower() + ) + + # 1 ─ Range (e.g. 1-7) + m_range = RANGE_RE.search(addr) + if m_range: + + start, end = m_range.groups() + start, end = int(re.match(r'\d+', start)[0]), int(re.match(r'\d+', end)[0]) + if start > end or (end - start) > 100: + raise ValueError(f"Suspicious range '{addr}'") + + # We define the looping range on whether we have odd, even or all numbers + house_number_range = range(start, end + 1) + if has_odd: + house_number_range = [x for x in house_number_range if x % 2 != 0] + if has_even: + house_number_range = [x for x in house_number_range if x % 2 == 0] + + for n in house_number_range: + new = row.copy() + new_addr = RANGE_RE.sub(str(n), addr, count=1) + original_full_address = new[self.STANDARD_FULL_ADDRESS] + new_full_address = original_full_address.replace(addr, new_addr) + new[self.STANDARD_ADDRESS_1] = new_addr + new[self.STANDARD_FULL_ADDRESS] = new_full_address + new[self.STANDARD_PROPERTY_TYPE] = "flat" + # Keep a record of the previous address 1 + new["block_address1"] = addr + new["block_full_address"] = original_full_address + new["is_expended_block"] = True + # We update the full address + + new[self.DOMNA_PROPERTY_ID] = f"{row[self.DOMNA_PROPERTY_ID]}-{new_addr}" + expanded_rows.append(new) + continue + + # 2 ─ Explicit list (e.g. 1, 2, 5 Block) or split by an ampersand (e.g. 1 & 2 Block) + nums = NUM_RE.findall(addr) + if len(nums) > 1 and (',' in addr or '&' in addr): + for n in nums: + new = row.copy() + new_addr = re.sub(NUM_RE, n, addr, count=1) # replace the first number only + new[self.STANDARD_ADDRESS_1] = new_addr + new[self.DOMNA_PROPERTY_ID] = f"{row[self.DOMNA_PROPERTY_ID]}-{new_addr}" + expanded_rows.append(new) + continue + + # 3 ─ Single number or no number, treat as individual dwelling + if (len(nums) == 1) or not nums: + expanded_rows.append(row) + continue + + # Anything else with digits is unrecognised + raise NotImplementedError(f"Unhandled block format: '{addr}'") + + expanded_blocks = pd.DataFrame(expanded_rows) + + # We drop the blocks from the standardised asset list and append on the expanded blocks + self.standardised_asset_list = self.standardised_asset_list[ + self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] != "block of flats" + ] + + self.standardised_asset_list = pd.concat( + [self.standardised_asset_list, expanded_blocks], + ignore_index=True + ) + + # As a final clean up, for any blocks that are size 1, we don't includr a project code + sizes = ( + expanded_blocks + .groupby(self.STANDARD_BLOCK_REFERENCE)[self.DOMNA_PROPERTY_ID] + .nunique() + .reset_index() + ) + size_1 = sizes[sizes[self.DOMNA_PROPERTY_ID] <= 1] + # Remove the size 1 blocks from the standardised asset list + self.standardised_asset_list[self.STANDARD_BLOCK_REFERENCE] = np.where( + self.standardised_asset_list[self.STANDARD_BLOCK_REFERENCE].isin( + size_1[self.STANDARD_BLOCK_REFERENCE].values + ), + None, + self.standardised_asset_list[self.STANDARD_BLOCK_REFERENCE] + ) + + def label_property_status(self): + """ + This function is designed to be run after identify_worktypes() has been run, and will create a "property_status" + column, which will note where each property is (to be surveyed, surveyed, installed), using the stages we + recognise within hubspot + :return: + """ + + # For anything that is ready to go, that gets set to ready to be scheduled + self.standardised_asset_list["hubspot_status"] = np.where( + ~pd.isnull(self.standardised_asset_list["cavity_reason"]) | + ~pd.isnull(self.standardised_asset_list["solar_reason"]), + hubspot_config.HubspotProcessStatus.READY_TO_BE_SCHEDULED.label, + None + ) + + # we step through the process of flagging completed surveys + + # We utilise submissions, ecosurv and outcomes to define the hubspot status + # We'll take the maximum of these three columns, based on the enum integer value + label_to_enum = {e.label: e for e in hubspot_config.HubspotProcessStatus} + + def get_max_status_from_columns(row): + status_candidates = [] + for col in ["submission_status", "ecosurv_install_status", "outcome_status"]: + label = row.get(col) + if label in label_to_enum: + status_candidates.append(label_to_enum[label]) + if not status_candidates: + return row["hubspot_status"] # fallback to existing status if no updates + return max(status_candidates).label + + self.standardised_asset_list["hubspot_status"] = self.standardised_asset_list.apply( + get_max_status_from_columns, axis=1 + ) + + self.standardised_asset_list["project_code"] = None + # if we have any blocks, where work is eligible, we flag them now + # These blocks may be refecence via the landlord_block_reference field, or by property types being + # blocks of flats + has_landlord_block_reference = sum(~pd.isnull(self.standardised_asset_list[self.STANDARD_BLOCK_REFERENCE])) + + if has_landlord_block_reference: + # For blocks that have a 50% allocation, we create project codes + self.block_analysis() + # find any block refs with more than 50% emptires + viable_empty_blocks = self.block_analysis_df[ + self.block_analysis_df['Percentage of Empties'] >= 0.50 + ] + + if not viable_empty_blocks.empty: + project_code_lookup = viable_empty_blocks[["Block Reference"]].copy() + self.standardised_asset_list = self.standardised_asset_list.merge( + project_code_lookup, how="left", left_on=self.STANDARD_BLOCK_REFERENCE, right_on="Block Reference" ) + self.standardised_asset_list["project_code"] = np.where( + ~pd.isnull(self.standardised_asset_list["Block Reference"]), + self.standardised_asset_list["Block Reference"], + self.standardised_asset_list["project_code"] + ) + self.standardised_asset_list = self.standardised_asset_list.drop(columns=["Block Reference"]) - flat_data = pd.DataFrame(flat_data) + def analyse_geographies(self): + cavity_programme = ( + self.standardised_asset_list[["domna_postcode", "cavity_reason"]] + .groupby(["domna_postcode"])["cavity_reason"] + .count() + .reset_index() + ) + solar_programme = ( + self.standardised_asset_list[["domna_postcode", "solar_reason"]] + .groupby(["domna_postcode"])["solar_reason"] + .count() + .reset_index() + ) + postcodes = ( + self.standardised_asset_list[["domna_postcode", "landlord_property_id"]] + .groupby("domna_postcode")["landlord_property_id"] + .count() + .reset_index() + .rename(columns={"landlord_property_id": "n_properties"}) + ) + geographical_areas = postcodes.merge(cavity_programme, how="left", on="domna_postcode").merge( + solar_programme, how="left", on="domna_postcode" + ).fillna(0) + geographical_areas["coverage"] = ( + ( + geographical_areas["solar_reason"] + geographical_areas["cavity_reason"] + ) / geographical_areas["n_properties"] * 100 + ) - self.flat_data = flat_data + geographical_areas = geographical_areas.sort_values("coverage", ascending=False) + self.geographical_areas = geographical_areas + + def block_analysis(self): + + # Reverse mapping: label -> enum + LABEL_TO_ENUM = {e.label: e for e in hubspot_config.HubspotProcessStatus} + + # Threshold status - anything that is at this stage or beyond is considered surveyed + threshold = hubspot_config.HubspotProcessStatus.SURVEYED_COMPLETED_SIGNED_OFF.value + + block_analysis = [] + for block_reference, group in self.standardised_asset_list.groupby(self.STANDARD_BLOCK_REFERENCE): + + cavity_breakdown = group["cavity_reason"].fillna("No Eligibility").value_counts(normalize=True) * 100 + + if all(cavity_breakdown.index == "No Eligibility"): + continue + + # We check the % of empty vs not empty as right now, we're focused on empty + n_empties = ( + (group["identified_empty_cavity"] == True) & + (~pd.isnull(group["cavity_reason"])) & + (~group["cavity_reason"].str.contains("(unlikely to quality)", case=False, na=False, regex=False)) + ).sum() + + works = group["hubspot_status"] + above_threshold = works.map(LABEL_TO_ENUM.get).dropna() + count_above = (above_threshold >= threshold).sum() + proportion_surveyed = count_above / len(works) + proportion_empty = n_empties / len(works) + # We auto-populate any blocks that have greater than 50% proportion empty + + block_analysis.append( + { + "Block Reference": block_reference, + "Proportion of properties suryeyed": proportion_surveyed, + "Percentage of Empties": proportion_empty, + **cavity_breakdown.to_dict(), + } + ) + + block_analysis = pd.DataFrame(block_analysis) + block_analysis = block_analysis.fillna(0) + + # We flag which properties are eligible for works. We need at least 50% + block_analysis["Eligible for Works"] = ( + block_analysis["Percentage of Empties"] >= 0.50 + ) + block_analysis = block_analysis.sort_values("Percentage of Empties", ascending=False) + + # For properties that are NOT eligible, we should update the cavity reason + ineligible_blocks = block_analysis[ + ~block_analysis["Eligible for Works"] + ]["Block Reference"].values + + eligible_blocks = block_analysis[ + block_analysis["Eligible for Works"] + ]["Block Reference"].values + + self.standardised_asset_list["cavity_reason"] = np.where( + self.standardised_asset_list[self.STANDARD_BLOCK_REFERENCE].isin(ineligible_blocks), + self.standardised_asset_list["cavity_reason"] + " (Flat in block with less than 50% eligible)", + self.standardised_asset_list["cavity_reason"] + ) + + # if the property is in a block of flats that eligible, but the property itself is not eligible, we flag this + # The criteria is: + # =The property should be in a block of flats + + self.standardised_asset_list["cavity_reason"] = np.where( + self.standardised_asset_list[self.STANDARD_BLOCK_REFERENCE].isin(eligible_blocks), + self.standardised_asset_list["cavity_reason"] + + " " + "(Flat in block with more than 50% eligible)", + self.standardised_asset_list["cavity_reason"] + ) + + self.block_analysis_df = block_analysis @staticmethod def split_full_name(x): @@ -1970,6 +2393,8 @@ class AssetList: sheet_name, landlord_property_id, phone_number_column=None, + secondary_phone_number_column=None, + secondary_contact_full_name=None, email_column=None, fullname_column=None, firstname_column=None, @@ -1979,6 +2404,8 @@ class AssetList: self.contact_detail_fields = { "landlord_property_id": landlord_property_id, "phone_number": phone_number_column, + "secondary_phone_number": secondary_phone_number_column, + "secondary_contact_full_name": secondary_contact_full_name, "email": email_column, "fullname": fullname_column, "firstname": firstname_column, @@ -1986,12 +2413,18 @@ class AssetList: } details_colnames = [ - phone_number_column, email_column, fullname_column, firstname_column, lastname_column + phone_number_column, secondary_phone_number_column, email_column, fullname_column, firstname_column, + lastname_column ] # We'll fill them none_details = [x for x in details_colnames if x is None] details_colnames = [x for x in details_colnames if x is not None] + if local_filepath is None: + # Create an empty DataFrame based on the fields in self.contact_detail_fields + self.contact_details = pd.DataFrame(columns=list(self.contact_detail_fields.keys())) + return + contact_details = pd.read_excel( local_filepath, sheet_name=sheet_name )[[self.contact_detail_fields["landlord_property_id"]] + details_colnames] @@ -2007,68 +2440,117 @@ class AssetList: *contact_details[fullname_column].apply(self.split_full_name) ) else: - raise NotImplementedError("Implement me") + contact_details["title"] = None self.contact_details = contact_details - def prepare_for_crm(self, company_domain, crm_pipeline_name, first_dealstage, assigned_surveyors): + @classmethod + def load_standardised_asset_list(cls, filepath, sheet_name, header): """ - This function prepares the data for upload into Hubspot + This function is designed to load the standardised asset list from a file :return: """ # This is a placeholder for now + # instantiate the class + instance = cls( + local_filepath=filepath, + sheet_name=sheet_name, + address1_colname=cls.STANDARD_ADDRESS_1, + postcode_colname=cls.STANDARD_POSTCODE, + full_address_colname=cls.STANDARD_FULL_ADDRESS, + landlord_property_id=cls.STANDARD_LANDLORD_PROPERTY_ID, + full_address_cols_to_concat=[], + missing_postcodes_method=None, + address1_extraction_method=None, + landlord_year_built=cls.STANDARD_YEAR_BUILT, + landlord_uprn=cls.STANDARD_UPRN, + landlord_property_type=cls.STANDARD_PROPERTY_TYPE, + landlord_built_form=cls.STANDARD_BUILT_FORM, + landlord_wall_construction=cls.STANDARD_WALL_CONSTRUCTION, + landlord_roof_construction=cls.STANDARD_ROOF_CONSTRUCTION, + landlord_heating_system=cls.STANDARD_HEATING_SYSTEM, + landlord_existing_pv=cls.STANDARD_EXISTING_PV, + landlord_sap=cls.STANDARD_SAP, + landlord_block_reference=cls.STANDARD_BLOCK_REFERENCE, + phase=False, + header=header + ) + return instance + def prepare_for_crm(self, company_domain, installer_name, reconcile_programme=False): + """ + This function prepares the data for upload into Hubspot + :param company_domain: The company domain name to be used in the CRM + :param installer_name: The name of the installer to be used in the CRM + :param reconcile_programme: If True, will include all properties with a project code, regardless of status + :raises ValueError: If the installer name is not valid or if there are missing products + :return: + """ # This maps the opportunities as we reference them, to the product data as stored in Hubspot - product_lookup_table = { - "Non-Intrusive Data Showed Cavity Extraction": { - "name": "Extract & Fill - ECO4", "id": 100307905778, "unit_price": 500 - }, - "Non-Intrusive Data Showed Empty Cavity": { - "name": "Empty Cavity & Loft - ECO4", "id": 82733738177, "unit_price": 1000 - }, - "Non-Intrusive Data Showed Empty Cavity but all SAP scores allowed": { - "name": "Empty Cavity & Loft - ECO4", "id": 82733738177, "unit_price": 1000 - }, - "Non-Intrusive Data Showed Cavity Extraction but all SAP scores allowed": { - "name": "Extract & Fill - ECO4", "id": 100307905778, "unit_price": 500 - }, - "EPC Data Showed Empty Cavity": { - "name": "Empty Cavity & Loft - ECO4", "id": 82733738177, "unit_price": 1000 - }, - "Solid Floor, Insulated, No Solar": { - "name": "Solar PV - ECO4", "id": 82623589564, "unit_price": 1608 - }, - "Solid Floor, Insulated, Needs Loft": { - "name": "Solar PV - ECO4", "id": 82623589564, "unit_price": 1608 - }, - "Other Floor, Insulated, No Solar": { - "name": "Solar PV - ECO4", "id": 82623589564, "unit_price": 1608 - }, - "Other Floor, Insulated, Needs Loft": { - "name": "Solar PV - ECO4", "id": 82623589564, "unit_price": 1608 - } - } + if not hubspot_config.Installer.is_valid_value(installer_name): + raise ValueError(f"Installer name {installer_name} is not valid. Please check the installer name.") + # We check if all products are covered in the lookup table - cavity_products = self.standardised_asset_list["cavity_reason"].unique() - solar_products = self.standardised_asset_list["solar_reason"].unique() - # Check if there any options not in out lookup table - if ( - any(x for x in cavity_products if x not in product_lookup_table) or - any(x for x in solar_products if x not in product_lookup_table) - ): - raise ValueError("We have products not referenced in the lookup table - check this") + cavity_products = self.standardised_asset_list["cavity_reason"].unique().tolist() + cavity_products = [x for x in cavity_products if not pd.isnull(x)] + solar_products = self.standardised_asset_list["solar_reason"].unique().tolist() + solar_products = [x for x in solar_products if not pd.isnull(x)] + + product_map = {} + for identified_product in cavity_products + solar_products: + if pd.isnull(identified_product): + continue + + matched_product = None + for product_prefix, crm_product in self.prefixes_to_products.items(): + if identified_product.startswith(product_prefix): + matched_product = crm_product + + product_map[identified_product] = matched_product + + # For each cavity and solar product, we iterate through the prexies and map to the products programme_data = self.standardised_asset_list.copy() + programme_data["domna_full_address"] = ( + programme_data["domna_full_address"].str.replace(";", ", ", regex=False).str.replace(" ", "") + ) - # Exclusions - these are properties we won't treat for the moment - product_exclusions = [ - "Other Floor, Insulated, No Solar", - "Other Floor, Insulated, Needs Loft" - ] - if product_exclusions: - logger.warning("Excluding products: %s", product_exclusions) + # Format the two date columns + programme_data["survey_date"] = pd.to_datetime(programme_data["survey_date"], errors="coerce") + programme_data[self.EPC_API_DATA_NAMES["inspection-date"]] = pd.to_datetime( + programme_data[self.EPC_API_DATA_NAMES["inspection-date"]], + errors="coerce" + ) + # Convert to dd/mm/yyyy format + programme_data["survey_date"] = programme_data["survey_date"].dt.strftime("%d/%m/%Y") + programme_data[self.EPC_API_DATA_NAMES["inspection-date"]] = ( + programme_data[self.EPC_API_DATA_NAMES["inspection-date"]].dt.strftime("%d/%m/%Y") + ) - programme_data = programme_data[programme_data["solar_reason"].isin(product_exclusions) == False] + # We take rows that have a survyor and a date for the survey + # We include properties under 2 circumstances: + # 1) The hubspot status is ready to be scheduled and there is an assigned surveyor and week for survey + # 2) The hubspot status is something else, meaning this has been included in an existing programme + # 3) reconcile programme is true, and therefore all proeprties with a project code will be included + + if reconcile_programme: + programme_data = programme_data[~pd.isnull(programme_data["project_code"])] + else: + + if programme_data["hubspot_status"].nunique() > 1: + logger.info("Multiple hubspot_status found - are you sure you don't want to reconcile the programme?") + + ready_to_be_scheduled = ( + ( + programme_data["hubspot_status"] == hubspot_config.HubspotProcessStatus.READY_TO_BE_SCHEDULED.label + ) + ) + # completed_works = ( + # (programme_data["hubspot_status"] != + # hubspot_config.HubspotProcessStatus.READY_TO_BE_SCHEDULED.label) & + # (~pd.isnull(programme_data["hubspot_status"])) + # ) + programme_data = programme_data[ready_to_be_scheduled] # Merge on the contact details programme_data = programme_data.merge( @@ -2081,26 +2563,47 @@ class AssetList: programme_data["Company Domain Name "] = company_domain # Append the product data onto the programme data programme_data["cavity_product"] = programme_data["cavity_reason"].map( - lambda x: product_lookup_table.get(x, {"name": None})["name"] + lambda x: product_map.get(x, {"name": None})["name"] ) programme_data["solar_product"] = programme_data["solar_reason"].map( - lambda x: product_lookup_table.get(x, {"name": None})["name"] + lambda x: product_map.get(x, {"name": None})["name"] ) - programme_data["domna_product"] = programme_data["solar_reason"].copy() + # We check if we have any missings + cavity_missing = pd.isnull(programme_data[~pd.isnull(programme_data["cavity_reason"])]["cavity_product"]).sum() + solar_missing = pd.isnull(programme_data[~pd.isnull(programme_data["solar_reason"])]["solar_product"]).sum() + + if cavity_missing > 0 or solar_missing > 0: + raise ValueError( + f"We have {cavity_missing} cavity products and {solar_missing} solar products that are not " + "mapped to a product in the lookup table. Please check the mapping." + ) + + programme_data["domna_product"] = programme_data["solar_product"].copy() programme_data["domna_product"] = np.where( pd.isnull(programme_data["domna_product"]), - programme_data["solar_product"], + programme_data["cavity_product"], programme_data["domna_product"] ) # We filter just on rows where we have a product - programme_data = programme_data[ - ~pd.isnull(programme_data["domna_product"]) - ] + if reconcile_programme: + # We include historical works, which will include hisorical cavity so we set these as extraction (as + # this is the main work mix) + programme_data["domna_product"] = programme_data["domna_product"].fillna( + self.CRM_HISTORICAL_CAVITY_PRODUCT["name"] + ) + else: + # We shouldn't have any missing products + # programme_data = programme_data[ + # ~pd.isnull(programme_data["survey_date"]) + # ] + + if pd.isnull(programme_data["domna_product"]).sum(): + raise ValueError("Missing products") programme_data = programme_data.drop(columns=["solar_product", "cavity_product"]) product_df = ( - pd.DataFrame(product_lookup_table).T[["name", "id", "unit_price"]] + pd.DataFrame(self.CRM_PRODUCTS).T[["name", "id", "unit_price"]] .reset_index() .rename( columns={ @@ -2115,28 +2618,105 @@ class AssetList: product_df['Quantity '] = 1 # Append on the product data - programme_data = programme_data.merge( - product_df, - how="left", - on="domna_product", - ) + programme_data = programme_data.merge(product_df, how="left", on="domna_product") # Add in deal and pipeline information - programme_data["dealname"] = programme_data[self.STANDARD_FULL_ADDRESS] + " : " + programme_data[ - "domna_product"] - programme_data['Pipeline '] = crm_pipeline_name - programme_data['Deal Stage '] = first_dealstage + programme_data["dealname"] = ( + programme_data[self.STANDARD_FULL_ADDRESS] + " : " + programme_data["domna_product"] + ) + programme_data['Pipeline '] = hubspot_config.CRM_PIPELINE_NAME programme_data['Associations: Listing'] = "Property Owner" - programme_data = programme_data.merge( - assigned_surveyors.rename( - columns={self.landlord_property_id: self.STANDARD_LANDLORD_PROPERTY_ID} - ), how="left", on=self.STANDARD_LANDLORD_PROPERTY_ID + # We determine which column we should use for the UPRN + if self.STANDARD_UPRN not in programme_data.columns: + uprn_column = self.EPC_API_DATA_NAMES["uprn"] + # If we're working form the EPC, we don't have this information if the EPC is estimated + programme_data[uprn_column] = np.where( + programme_data["estimated"] == True, None, programme_data[uprn_column] + ) + else: + # Use the value that has the most coverage + uprn_column = "hubspot_uprn" + programme_data[uprn_column] = programme_data[self.STANDARD_UPRN].fillna( + programme_data[self.EPC_API_DATA_NAMES["uprn"]] + ) + + # Remove any negative URPSN which are not valid + programme_data[uprn_column] = np.where( + programme_data["estimated"].isin([1, True]), + None, + programme_data[uprn_column] ) + # Add in some columns if we have them + date_of_inspections = ( + "Non-Intrusives: Date of Inspection" if + "Non-Intrusives: Date of Inspection" in programme_data.columns else None + ) + + # Ammend the property type and built form columns + programme_data["hubspot_property_type"] = programme_data[self.STANDARD_PROPERTY_TYPE].copy() + programme_data["hubspot_built_form"] = programme_data[self.STANDARD_BUILT_FORM].copy() + + def _replace_property_description_data(programme_data, column_name): + """ + Helper function to replace property type or built form data with a specified value. + """ + + if column_name == "hubspot_property_type": + valid_values = ["house", "bungalow", "flat", "maisonette"] + epc_fill_col = "property-type" + elif column_name == "hubspot_built_form": + valid_values = ["detached", "semi-detached", "mid-terrace", "end-terrace"] + epc_fill_col = "built-form" + else: + raise ValueError(f"Invalid column name: {column_name}. Must be 'hubspot_property_type' or " + f"'hubspot_built_form'.") + + # Any vakue that is not house, bungalow, flat or maisonette is set to None + programme_data[column_name] = np.where( + ~programme_data[column_name].isin(valid_values), + None, + programme_data[column_name] + ) + # We fill with the EPC property type + programme_data[column_name] = np.where( + pd.isnull(programme_data[column_name]), + programme_data[self.EPC_API_DATA_NAMES[epc_fill_col]], + programme_data[column_name] + ) + + programme_data[column_name] = programme_data[column_name].fillna("unknown") + + return programme_data + + # Clean up the property type and built form columns + programme_data = _replace_property_description_data(programme_data, "hubspot_property_type") + programme_data = _replace_property_description_data(programme_data, "hubspot_built_form") + + # We accomodate the old vs new inspections format + if "non-intrusives: WFT Findings" in programme_data.columns: + # We have the old format - we only have notes + non_intrusives_surveyor_notes = "non-intrusives: WFT Findings" + non_intrusives_construction = None + non_intrusives_insulated = None + non_intrusives_insulation_material = None + non_intrusives_ciga_check_required = None + non_intrusives_pv_access = None + non_intrusives_roof_orientation = None + non_intrusives_surveyor_name = None + else: + non_intrusives_surveyor_notes = 'non-intrusives: Any further surveyor notes' + non_intrusives_construction = "non-intrusives: Construction" + non_intrusives_insulated = "non-intrusives: Insulated" + non_intrusives_insulation_material = "non-intrusives: Material" + non_intrusives_ciga_check_required = 'non-intrusives: CIGA Check Required' + non_intrusives_pv_access = 'non-intrusives: PV, ACCESS ISSUE, SEE NOTES' + non_intrusives_roof_orientation = 'non-intrusives: OFF GAS - ROOF ORIENTATION' + non_intrusives_surveyor_name = 'non-intrusives: Surveyors Name' + # This maps the hubspot schema to the template. Anything that is not covered in this will be flagged schema_mappings = { - 'Name ': self.DOMNA_PROPERTY_ID, # TODO: Maybe change this? 'Company Domain Name ': 'Company Domain Name ', 'Email ': ( self.contact_detail_fields["email"] if self.contact_detail_fields["email"] else None @@ -2150,49 +2730,42 @@ class AssetList: 'Phone ': ( self.contact_detail_fields["phone_number"] if self.contact_detail_fields["phone_number"] else None ), # TODO: Review + 'Secondary Phone ': ( + self.contact_detail_fields["secondary_phone_number"] if + self.contact_detail_fields["secondary_phone_number"] else None + ), + "Secondary Contact Full Name ": ( + self.contact_detail_fields["secondary_contact_full_name"] if + self.contact_detail_fields["secondary_contact_full_name"] else None + ), 'Full Address ': self.STANDARD_FULL_ADDRESS, 'Address 1 ': self.STANDARD_ADDRESS_1, 'Address 2 ': None, # TODO: Don't have this for the moment 'Postcode ': self.STANDARD_POSTCODE, - 'Property Type ': self.STANDARD_PROPERTY_TYPE, - 'Property Sub Type ': None, # TODO: Don't have this for the moment + 'Property Type ': "hubspot_property_type", + 'Property Sub Type ': "hubspot_built_form", 'Bedroom(s) ': None, # TODO: Don't have this for the moment 'Domna Property ID ': self.DOMNA_PROPERTY_ID, - 'National UPRN ': ( - self.STANDARD_UPRN if self.STANDARD_UPRN is not None else self.EPC_API_DATA_NAMES["uprn"] - ), + # We populate this with the column that we have + 'National UPRN ': uprn_column, 'Owner Property ID ': self.STANDARD_LANDLORD_PROPERTY_ID, 'Wall Construction ': self.STANDARD_WALL_CONSTRUCTION, 'Heating System ': self.STANDARD_HEATING_SYSTEM, 'Year Built ': self.STANDARD_YEAR_BUILT, 'Boiler Make ': None, # TODO: Don't have this for the moment 'Boiler Model ': None, # TODO: Don't have this for the moment - 'Non-Intrusives: Date Checked ': None, - # TODO: Don't have this for the moment - 'Non-Intrusives: Wall Type ': ( - "non-intrusives: Construction" if self.non_intrusives_present else None - ), - 'Non-intrusives: Insulation ': ( - "non-intrusives: Insulated" if self.non_intrusives_present else None - ), - 'Non-intrusives: Insulation Material ': ( - "non-intrusives: Material" if self.non_intrusives_present else None - ), - 'Non-Intrusives: CIGA Check Required ': ( - 'non-intrusives: CIGA Check Required' if self.non_intrusives_present else None - ), - 'Non-Intrusives: PV Access Issues ': ( - 'non-intrusives: PV, ACCESS ISSUE, SEE NOTES' if self.non_intrusives_present else None - ), - 'Non-Intrusives: Roof Orientation ': ( - 'non-intrusives: OFF GAS - ROOF ORIENTATION' if self.non_intrusives_present else None - ), - 'Non-Intrusives: Surveyor Notes ': ( - 'non-intrusives: Any further surveyor notes' if self.non_intrusives_present else None - ), - 'Non-Intrusives: Surveyor Name ': ( - 'non-intrusives: Surveyors Name' if self.non_intrusives_present else None - ), + 'Non-Intrusives: Date Checked ': date_of_inspections, + 'Non-Intrusives: Wall Type ': non_intrusives_construction, + 'Non-intrusives: Insulation ': non_intrusives_insulated, + 'Non-intrusives: Insulation Material ': + non_intrusives_insulation_material, + 'Non-Intrusives: CIGA Check Required ': + non_intrusives_ciga_check_required, + 'Non-Intrusives: PV Access Issues ': non_intrusives_pv_access, + 'Non-Intrusives: Roof Orientation ': + non_intrusives_roof_orientation, + 'Non-Intrusives: Surveyor Notes ': non_intrusives_surveyor_notes, + 'Non-Intrusives: Surveyor Name ': non_intrusives_surveyor_name, 'CIGA: Date Requested ': None, # TODO: Don't have this for the moment 'CIGA: Cavity Guarantee Found ': None, 'Last EPC: Is Estimated ': self.EPC_API_DATA_NAMES["estimated"], @@ -2209,18 +2782,24 @@ class AssetList: 'Last EPC: Floor ': self.EPC_API_DATA_NAMES["floor-description"], 'Last EPC: Room Height ': self.EPC_API_DATA_NAMES["floor-height"], 'Last EPC: Age Band ': self.EPC_API_DATA_NAMES["construction-age-band"], - 'Deal Stage ': 'Deal Stage ', 'Pipeline ': 'Pipeline ', - 'Expected Commencement Date ': None, # TODO: Need to set this, + 'Expected Commencement Date ': "survey_date", 'Deal Name ': "dealname", # Need to create this, 'Product ID ': 'Product ID ', 'Name ': 'Name ', 'Unit price ': 'Unit price ', 'Quantity ': 'Quantity ', - 'Deal Owner': 'surveyor_email', - 'Amount ': 'Unit price ', + 'Deal Owner': 'surveyor', + 'Project Code ': 'project_code', + 'Associations: Listing': 'Associations: Listing', + 'Deal Stage ': "hubspot_status", } + # We sometimes columns if the landlord never provided them + missed_mapping_cols = [c for c in schema_mappings.values() if c not in programme_data.columns if c is not None] + for c in missed_mapping_cols: + programme_data[c] = None + # We now create the finalised dataset to be uploaded into Hubspot variables_required = list(schema_mappings.values()) variables_required = [v for v in variables_required if v is not None] @@ -2235,6 +2814,28 @@ class AssetList: columns={v: k for k, v in schema_mappings.items() if v is not None} ) + programme_data['Postcode '] = programme_data['Postcode '].copy() + programme_data['Installer '] = installer_name + programme_data['Name '] = ( + programme_data['Full Address '] + " ," + programme_data['Postcode '] + ) + # The listing owner email is the same as the surveyor email (deal owner), so they can see the listing + programme_data['Listing Owner Email '] = programme_data['Deal Owner'] + programme_data['Amount '] = 0 + programme_data["Deal Owner"] = np.where( + ~pd.isnull(programme_data["Deal Owner"]), + programme_data["Deal Owner"].astype(str).str.lower(), + programme_data["Deal Owner"] + ) + + # We make sure we have all of the columns that we need + missed_columns = [c for c in hubspot_config.CRM_UPLOAD_COLUMNS if c not in programme_data.columns] + if missed_columns: + raise ValueError( + f"We have the following columns that are not in the programme data: {missed_columns}. " + "Please check the mapping and ensure all required columns are present." + ) + self.hubspot_data = programme_data def flag_ecosurv(self, ecosurv_landlords=None, landlords_to_ignore=None): @@ -2324,13 +2925,63 @@ class AssetList: logger.info("Matched %s properties to ecosurv data", len(matched)) logger.info("%s properties in Ecosurv remain unmatched", len(unmatched)) - # We now match + if not matched: + return + + # We now match matched = pd.DataFrame(matched) # We'll possibly have duplicates here, where properties have been sold twice. Ww de-dupe if matched[self.STANDARD_LANDLORD_PROPERTY_ID].duplicated().sum(): # It doesn't matter too much which record we take matched = matched.drop_duplicates(subset=[self.STANDARD_LANDLORD_PROPERTY_ID]) + # We merge on the status of the property + matched = matched.merge( + self.ecosurv[["Reference", "Status", "Lead Status", "Tags"]].rename( + columns={ + "Reference": "ecosurv_reference", + "Status": "ecosurv_status", + "Lead Status": "ecosurv_lead_status", + "Tags": "ecosurv_tags", + "Installer": "ecosurv_installer" + } + ), how="left", on="ecosurv_reference" + ) + + matched["ecosurv_install_status"] = hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER + + # This mapping is ordered by process order, where lodgment is the final step so if we have an indication + # that the property is ready for lodgement, we set the status to that. We then proceed through the other + # statuses where the penultimate status is install complete + mapping = { + "Cancelled": hubspot_config.HubspotProcessStatus.INSTALLER_CANCELLED_FINALIZED, + "TrustMark: Lodged": hubspot_config.HubspotProcessStatus.LODGEMENT_COMPLETE, + "Retrofit: Complete": hubspot_config.HubspotProcessStatus.INSTALL_COMPLETE, + "Retrofit: Awaiting TrustMark": hubspot_config.HubspotProcessStatus.INSTALL_COMPLETE, + "Retrofit: Awaiting post checks": hubspot_config.HubspotProcessStatus.INSTALL_COMPLETE, + "Installer Notification Sent": hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER, + "Submitted to RC": hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER, + "COONEY": hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER, + "Signed off for install": hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER, + "Retrofit: Signed off for install": hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER, + "Audit": hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER, + "Accepted": hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER, + "Sold": hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER + } + + def get_max_status(tag_str): + if pd.isna(tag_str): + return None + matched_statuses = [] + for tag, status in mapping.items(): + if tag in tag_str: + matched_statuses.append(status) + if not matched_statuses: + return None + return max(matched_statuses).label + + matched["ecosurv_install_status"] = matched["ecosurv_tags"].apply(get_max_status) + self.standardised_asset_list = self.standardised_asset_list.merge( matched, how="left", @@ -2362,7 +3013,7 @@ class AssetList: outcomes["row_id"] = outcomes.index if outcomes_houseno[idx] is None: - outcomes_houseno = "houseno" + outcomes_houseno[idx] = "houseno" outcomes["houseno"] = outcomes[outcomes_address[idx]].apply( lambda x: SearchEpc.get_house_number(x, outcomes[outcomes_postcode]) ) @@ -2380,7 +3031,7 @@ class AssetList: # Perform the remap outcomes["Outcome"] = outcomes["Notes / Outcomes"].map(remap_dictionary) - outcomes["Outcome"] = outcomes["Outcome"].str.lower() + outcomes["Outcome"] = outcomes["Outcome"].str.lower().str.strip() logger.info("Matching outcomes to asset list") # Merge the outcomes onto the asset list - we check we're able to match sufficiently well @@ -2507,7 +3158,7 @@ class AssetList: else: raise NotImplementedError("Invalid date in outcomes - implement me") - notes_col = "Notes" if "Notes" in outcomes.columns else "Notes / Outcomes" + notes_col = "Notes" if "Notes" in self.outcomes.columns else "Notes / Outcomes" lookup = lookup.merge( self.outcomes[["row_id", "Outcome", notes_col, date_col]], how="left", on="row_id" @@ -2542,12 +3193,13 @@ class AssetList: apply(get_latest_note). reset_index(drop=True) ) - latest_note = latest_note[["domna_property_id", notes_col]] + latest_note = latest_note[["domna_property_id", notes_col, "Outcome"]].rename( + columns={"Notes": "latest_outcome_note", "Outcome": "latest_outcome"} + ) pivot_df = lookup.groupby(["domna_property_id", "Outcome"]).size().unstack(fill_value=0).reset_index() - pivot_df = pivot_df.merge( - visit_counts, how="left", on="domna_property_id" - ) + pivot_df = pivot_df.merge(visit_counts, how="left", on="domna_property_id") + pivot_df = pivot_df.merge(latest_note, how="left", on="domna_property_id") # We want the latest note @@ -2558,15 +3210,32 @@ class AssetList: self.outcomes["matched_to_asset_list"] = self.outcomes["row_id"].isin(lookup["row_id"].values) self.outcomes = self.outcomes.merge(lookup[["row_id", "domna_property_id"]], how="left", on="row_id") + # We flag the outcome status, based on the outcome + pivot_df["outcome_status"] = None + + if "surveyed" in pivot_df.columns: + pivot_df["outcome_status"] = np.where( + pivot_df["surveyed"] > 0, hubspot_config.HubspotProcessStatus.SURVEYED_COMPLETED_SIGNED_OFF.label, + pivot_df["outcome_status"] + ) + + if "installer refusal" in pivot_df.columns: + pivot_df["outcome_status"] = np.where( + pivot_df["installer refusal"] > 0, hubspot_config.HubspotProcessStatus.NOT_VIABLE.label, + pivot_df["outcome_status"] + ) + + pivot_df["outcome_status"] = np.where( + pivot_df["latest_outcome"].isin(["see notes"]) & + (pivot_df["outcome_status"] != hubspot_config.HubspotProcessStatus.SURVEYED_COMPLETED_SIGNED_OFF.label), + hubspot_config.HubspotProcessStatus.SURVEYED_NO_ACCESS_NEEDS_SIGN_OFF.label, + pivot_df["outcome_status"] + ) + # We merge out pivoted outcomes onto the asset list self.standardised_asset_list = self.standardised_asset_list.merge( pivot_df, how="left", left_on=self.DOMNA_PROPERTY_ID, right_on="domna_property_id" ) - # Merge the latest note - self.standardised_asset_list = self.standardised_asset_list.merge( - latest_note.rename(columns={notes_col: "Latest Route March Note"}), - how="left", left_on=self.DOMNA_PROPERTY_ID, right_on="domna_property_id" - ) if self.standardised_asset_list[self.DOMNA_PROPERTY_ID].duplicated().sum(): raise ValueError("Duplicates appreared - something went wrong") @@ -2576,6 +3245,7 @@ class AssetList: def flag_survey_master( self, master_filepaths, + master_id_colnames, master_to_asset_list_filepath=None ): # TODO: This probably needs further expansion @@ -2591,7 +3261,7 @@ class AssetList: logger.info("Getting masters and merging onto asset list") master_surveyed = [] unmatched_submissions = [] - for filepath in master_filepaths: + for idx, filepath in enumerate(master_filepaths): master_data = pd.read_csv(filepath) # Strip columns master_data.columns = [c.strip() for c in master_data.columns] @@ -2611,28 +3281,21 @@ class AssetList: install_col = "INSTALL / CANCELLATION DATE" elif 'INSTALL/ CANCELLATION DATE' in master_data.columns: install_col = 'INSTALL/ CANCELLATION DATE' + elif "INSTALL/CANCELLATION DATE" in master_data.columns: + install_col = "INSTALL/CANCELLATION DATE" + elif 'Measure 1 Install Date' in master_data.columns: + install_col = 'Measure 1 Install Date' else: raise ValueError("No install or cancellation date") - submission_col = ( - "SUBMISSION DATE" if "SUBMISSION DATE" in master_data.columns else "SUBMISSION DATE TO INSTALLERS" - ) - - # if "UPRN" in master_data.columns: - # # We just need to check if any were cancelled - # master_to_append = master_data[ - # ["UPRN", install_col, submission_col] - # ].rename( - # columns={ - # "UPRN": self.STANDARD_LANDLORD_PROPERTY_ID, - # install_col: "survey_status", - # submission_col: "submission_date" - # } - # ) - # master_to_append["cancelled"] = master_to_append["survey_status"].str.lower().str.contains("cancel") - # - # master_surveyed.append(master_to_append) - # continue + if "SUBMISSION DATE" in master_data.columns: + submission_col = "SUBMISSION DATE" + elif "SUBMISSION DATE TO INSTALLERS" in master_data.columns: + submission_col = "SUBMISSION DATE TO INSTALLERS" + elif "Submission Date" in master_data.columns: + submission_col = "Submission Date" + else: + raise ValueError("No submission date column found in master data") master_data["row_id"] = master_data.index @@ -2643,21 +3306,55 @@ class AssetList: axis=1 ) - scheme_col = ( - "AFFORDABLE WARMTH OR EPC FOR HOUSING ASSOCIATION" if - "AFFORDABLE WARMTH OR EPC FOR HOUSING ASSOCIATION" in master_data.columns else "AFFORDABLE WARMTH" - ) - postcode_col = "POSTCODE" if "POSTCODE" in master_data.columns else "Post Code" - house_no_col = 'NO.' if 'NO.' in master_data.columns else "NO" - property_type_col = ( - "PROPERTY TYPE As per table emailed" if - "PROPERTY TYPE As per table emailed" in - master_data.columns else "PROPERTY TYPE As per table emailed" - ) - measure_mix_col = "MEASURE COMBO" + if "AFFORDABLE WARMTH OR EPC FOR HOUSING ASSOCIATION" in master_data.columns: + scheme_col = "AFFORDABLE WARMTH OR EPC FOR HOUSING ASSOCIATION" + elif "AFFORDABLE WARMTH" in master_data.columns: + scheme_col = "AFFORDABLE WARMTH" + elif "Scheme" in master_data.columns: + scheme_col = "Scheme" + elif "Affordable Warmth" in master_data.columns: + scheme_col = "Affordable Warmth" + else: + scheme_col = "OFFICE USE ONLY" + + postcode_col = "POSTCODE" if "POSTCODE" in master_data.columns else "Post Code" + if 'NO.' in master_data.columns: + house_no_col = 'NO.' + elif "NO" in master_data.columns: + house_no_col = 'NO' + else: + house_no_col = "NUMBER" + + if "PROPERTY TYPE As per table emailed" in master_data.columns: + property_type_col = "PROPERTY TYPE As per table emailed" + elif "PROPERTY TYPE As per table emailed" in master_data.columns: + property_type_col = "PROPERTY TYPE As per table emailed" + elif "PROPERTY TYPE" in master_data.columns: + property_type_col = "PROPERTY TYPE" + else: + property_type_col = "PROPERTY TYPE (SEE DEEMED SCORES SHEET) Eg. 3W_Flat_1 (As per Matrix)" + + if "INSTALLERS NOTES ; REASONS FOR CANCELLATIONS" in master_data.columns: + installer_notes_col = "INSTALLERS NOTES ; REASONS FOR CANCELLATIONS" + elif "INSTALLERS NOTES" in master_data.columns: + installer_notes_col = "INSTALLERS NOTES" + elif 'Installers Notes' in master_data.columns: + installer_notes_col = 'Installers Notes' + elif 'NOTES ; REASONS FOR CANCELLATIONS OR WHERE INSTALL DATE WAS OBTAINED FROM' in master_data.columns: + installer_notes_col = 'NOTES ; REASONS FOR CANCELLATIONS OR WHERE INSTALL DATE WAS OBTAINED FROM' + else: + raise ValueError("No installer notes column found in master data") + + if "INSTALLER" in master_data.columns: + installer_col = "INSTALLER" + elif "Installer" in master_data.columns: + installer_col = "Installer" + else: + raise ValueError("No installer column found in master data") + + measure_mix_col = "MEASURE COMBO" + town_colname = "TOWN" if "TOWN" in master_data.columns else 'Town/Area' - # Otherwise, we need to match algorithmically - has_property_id = "UPRN" in master_data.columns logger.info("Matching master data to asset list") matched = [] unmatched = [] @@ -2670,13 +3367,22 @@ class AssetList: if pd.isnull(row[postcode_col]): continue - # if has_property_id: - # submission_uprn = row["UPRN"] - # - # if not pd.isnull(submission_uprn): - # df = self.standardised_asset_list[ - # self.standardised_asset_list[self.STANDARD_LANDLORD_PROPERTY_ID] == submission_uprn - # ] + if master_id_colnames[idx] is not None: + # Filter the standardised asset list on this + df = self.standardised_asset_list[ + self.standardised_asset_list[self.STANDARD_LANDLORD_PROPERTY_ID] == row[master_id_colnames[idx]] + ] + if df.shape[0] == 1: + matched.append( + { + "row_id": row["row_id"], + "original_house_no": original_house_no, + "original_street": original_street, + "original_postcode": original_postcode, + self.STANDARD_LANDLORD_PROPERTY_ID: df[self.STANDARD_LANDLORD_PROPERTY_ID].values[0], + } + ) + continue postcode_no_space = row[postcode_col].strip().replace(" ", "").lower() @@ -2688,6 +3394,10 @@ class AssetList: ] house_no = row[house_no_col] + + if pd.isnull(house_no): + house_no = None + if isinstance(house_no, (float, int)): house_no = str(int(house_no)) @@ -2721,6 +3431,7 @@ class AssetList: self.STANDARD_LANDLORD_PROPERTY_ID: df[self.STANDARD_LANDLORD_PROPERTY_ID].values[0], } ) + continue if house_no in df["house_no"].values: df = df[df["house_no"] == house_no] @@ -2736,7 +3447,8 @@ class AssetList: df = df[ df[self.STANDARD_FULL_ADDRESS].str.lower().apply( lambda x: process.extractOne( - " ".join([row[house_no_col], row["Street / Block Name"], row["TOWN"]]).lower(), + " ".join( + [row[house_no_col], row["Street / Block Name"], row[town_colname]]).lower(), x )[1] ) > 90 @@ -2781,18 +3493,34 @@ class AssetList: self.standardised_asset_list = self.standardised_asset_list.drop(columns="house_no") # We match the "UPRN" which is the landlords ID, onto the master sheet + + if measure_mix_col not in master_data.columns: + master_data[measure_mix_col] = "Measure mix not recorded" + matched = pd.DataFrame(matched) - master_to_append = master_data[[scheme_col, "row_id", install_col, submission_col, measure_mix_col]].merge( + if matched.empty: + continue + + master_to_append = master_data[ + [scheme_col, "row_id", install_col, submission_col, measure_mix_col, installer_notes_col, installer_col] + ].merge( matched, how="left", on="row_id" ).rename( columns={ scheme_col: "funding_scheme", measure_mix_col: "measure_mix", install_col: "survey_status", - submission_col: "submission_date" + submission_col: "submission_date", + installer_notes_col: "submission_installer_notes", + installer_col: "submission_installer" } ) - master_to_append["cancelled"] = master_to_append["survey_status"].str.lower().str.contains("cancel") + master_to_append["submission_cancelled"] = ( + master_to_append["survey_status"].str.lower().str.contains("cancel") + ) + master_to_append["submission_installed"] = ( + master_to_append["survey_status"].str.lower().str.contains("installed") + ) master_surveyed.append(master_to_append) unmatched_df = master_data[ master_data["row_id"].isin(unmatched) @@ -2828,7 +3556,21 @@ class AssetList: ].astype(str) # We de-dupe crudely on landlord property id - self.master_surveyed = master_surveyed.drop_duplicates(subset=[self.STANDARD_LANDLORD_PROPERTY_ID]) + self.master_surveyed = master_surveyed.drop_duplicates(subset=[self.STANDARD_LANDLORD_PROPERTY_ID]).copy() + + # We now add the submission status, based on the hubspot stages + self.master_surveyed["submission_status"] = hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER.label + self.master_surveyed["submission_status"] = np.where( + self.master_surveyed["submission_cancelled"] == True, + hubspot_config.HubspotProcessStatus.INSTALLER_CANCELLED_FINALIZED.label, + self.master_surveyed["submission_status"] + ) + + self.master_surveyed["submission_status"] = np.where( + self.master_surveyed["submission_installed"] == True, + hubspot_config.HubspotProcessStatus.INSTALL_COMPLETE.label, + self.master_surveyed["submission_status"] + ) self.standardised_asset_list = self.standardised_asset_list.merge( self.master_surveyed, how="left", on=self.STANDARD_LANDLORD_PROPERTY_ID diff --git a/asset_list/abs_estimates.py b/asset_list/abs_estimates.py new file mode 100644 index 00000000..58adcca6 --- /dev/null +++ b/asset_list/abs_estimates.py @@ -0,0 +1,229 @@ +""" +Simple script to take a standardised asset list and calculate the abs. We'll use this code to estimate +the ABS for properties, going forward +""" +import os +import pandas as pd +import numpy as np +from dotenv import load_dotenv +from etl.find_my_epc.AssetListEpcData import AssetListEpcData +from backend.Funding import Funding +from backend.app.utils import sap_to_epc + +load_dotenv(dotenv_path="backend/.env") +EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN") + +asset_list = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Instagroup Review/Livewest South-West - Standardised V2.xlsx", + sheet_name="Cavity Route (Insta Review)" +) + +abs_matrix = pd.read_csv( + "/Users/khalimconn-kowlessar/Downloads/ECO4 Full Project Scores Matrix.csv" +) +pps_matrix = pd.read_excel( + "/Users/khalimconn-kowlessar/Downloads/ECO4 Partial Project Scores Matrix v5.xlsx", + header=1 +) +pps_matrix.columns = [c.strip() for c in pps_matrix.columns] + +# We need to estimate the number of points the work will produce and the finishing band. For this, we assume 7 for +# cavity and 15 for solar. We'll be more specific in the future, but for now, this is a good enough estimate. +route = asset_list[["domna_address_1", "domna_postcode", "epc_os_uprn"]].rename( + columns={"domna_address_1": "address", "domna_postcode": "postcode", "epc_os_uprn": "upr"} +) +route["address"] = route["address"].astype(str) + +asset_list_epc_client = AssetListEpcData( + asset_list=route, + epc_auth_token=EPC_AUTH_TOKEN +) + +asset_list_epc_client.get_data() +asset_list_epc_client.get_non_invasive_recommendations() + +solar_sap_points = [] +for r in asset_list_epc_client.non_invasive_recommendations: + if not r.get("recommendations"): + continue + solar_recommendations = [ + x for x in r["recommendations"] if "solar_pv" in x["type"] + ] + if solar_recommendations: + solar_recommendations = solar_recommendations[0] + else: + continue + + address = r["address"] + postcode = r["postcode"] + + solar_sap_points.append( + { + "address": address, + "postcode": postcode, + "sap_points": solar_recommendations["sap_points"] + } + ) + +solar_sap_points = pd.DataFrame(solar_sap_points) +solar_sap_points.drop_duplicates(subset=["address", "postcode"], inplace=True) +# Store the sap points in the cavity route to csv +# cwi_sap_points.to_csv( +# "/Users/khalimconn-kowlessar/Documents/hestia/Instagroup Review/cwi_sap_points_livewest_sw.csv", +# index=False +# ) + +avg_solar_points_by_postcode = solar_sap_points.groupby(["postcode"]).agg({"sap_points": "mean"}).reset_index() +avg_solar_points = solar_sap_points["sap_points"].median() +asset_list["domna_address_1"] = asset_list["domna_address_1"].astype(str) +asset_list = asset_list.merge( + solar_sap_points, how="left", left_on=["domna_address_1", "domna_postcode"], right_on=["address", "postcode"] +).drop( + columns=["address", "postcode"] +) + +# Fill the sap points with the average cwi points +asset_list = asset_list.merge( + avg_solar_points_by_postcode.rename(columns={"postcode": "domna_postcode"}), + how="left", on=["domna_postcode"], suffixes=("", "_avg") +) +asset_list["sap_points"] = asset_list["sap_points"].fillna(asset_list["sap_points_avg"]) +asset_list.drop(columns=["sap_points_avg"], inplace=True) + +asset_list["sap_points"] = asset_list["sap_points"].fillna(avg_solar_points) +asset_list["post_works_sap"] = asset_list["epc_sap_score_on_register"] + asset_list["sap_points"] +asset_list["post_works_epc"] = asset_list["post_works_sap"].apply(lambda x: sap_to_epc(x)) +asset_list["starting_half_band"] = asset_list["epc_sap_score_on_register"].apply(lambda x: Funding.get_sap_band(x)) +asset_list["ending_half_band"] = asset_list["post_works_sap"].apply(lambda x: Funding.get_sap_band(x)) +asset_list["floor_area_band"] = asset_list["epc_total_floor_area"].apply(lambda x: Funding.get_floor_area_band(x)) + +asset_list["ending_half_band"] = np.where( + (asset_list["post_works_epc"] == asset_list["epc_rating_on_register"]), + "Low_C", + asset_list["ending_half_band"] +) +# Realistically, we'll take the properties to a low C at worst +asset_list["ending_half_band"] = np.where( + (asset_list["post_works_sap"] < 69), + "Low_C", + asset_list["ending_half_band"] +) + +asset_list = asset_list.merge( + abs_matrix, how="left", left_on=["starting_half_band", "ending_half_band", "floor_area_band"], + right_on=['Starting Band', 'Finishing Band', 'Floor Area Segment', ] +) +asset_list = asset_list.drop(columns=['Starting Band', 'Finishing Band', 'Floor Area Segment']) + +asset_list = asset_list.rename( + columns={"Cost Savings": "funding_abs"} +) + +print(asset_list["domna_property_id"].duplicated().sum()) + +# Store this data +asset_list.to_csv( + "/Users/khalimconn-kowlessar/Documents/hestia/Instagroup Review/livewest_sw_solar_abs_estimates-solar.csv", + index=False +) + +# Cavity process! +# cwi_sap_points = [] +# for r in asset_list_epc_client.non_invasive_recommendations: +# if not r.get("recommendations"): +# continue +# cwi_recommendations = [ +# x for x in r["recommendations"] if "cavity_wall_insulation" in x["type"] +# ] +# if cwi_recommendations: +# cwi_recommendations = cwi_recommendations[0] +# else: +# continue +# +# address = r["address"] +# postcode = r["postcode"] +# +# cwi_sap_points.append( +# { +# "address": address, +# "postcode": postcode, +# "sap_points": cwi_recommendations["sap_points"] +# } +# ) +# +# cwi_sap_points = pd.DataFrame(cwi_sap_points) +# cwi_sap_points = pd.read_csv( +# "/Users/khalimconn-kowlessar/Documents/hestia/Instagroup Review/cwi_sap_points_livewest_sw.csv" +# ) +# cwi_sap_points.drop_duplicates(subset=["address", "postcode"], inplace=True) +avg_cwi_points_by_postcode = cwi_sap_points.groupby(["postcode"]).agg({"sap_points": "mean"}).reset_index() +avg_cwi_points = cwi_sap_points["sap_points"].median() +asset_list = asset_list.merge( + cwi_sap_points, how="left", left_on=["domna_address_1", "domna_postcode"], right_on=["address", "postcode"] +).drop( + columns=["address", "postcode"] +) + +# Fill the sap points with the average cwi points +asset_list = asset_list.merge( + avg_cwi_points_by_postcode.rename(columns={"postcode": "domna_postcode"}), + how="left", on=["domna_postcode"], suffixes=("", "_avg") +) +asset_list["sap_points"] = asset_list["sap_points"].fillna(asset_list["sap_points_avg"]) +asset_list.drop(columns=["sap_points_avg"], inplace=True) + +asset_list["sap_points"] = asset_list["sap_points"].fillna(avg_cwi_points) +asset_list["post_works_sap"] = asset_list["epc_sap_score_on_register"] + asset_list["sap_points"] +asset_list["post_works_epc"] = asset_list["post_works_sap"].apply(lambda x: sap_to_epc(x)) +asset_list["starting_half_band"] = asset_list["epc_sap_score_on_register"].apply(lambda x: Funding.get_sap_band(x)) +asset_list["ending_half_band"] = asset_list["post_works_sap"].apply(lambda x: Funding.get_sap_band(x)) +asset_list["floor_area_band"] = asset_list["epc_total_floor_area"].apply(lambda x: Funding.get_floor_area_band(x)) + +asset_list["funding_scheme"] = np.where( + ( + (asset_list["post_works_epc"] == asset_list["epc_rating_on_register"]) + ), + "GBIS", + "ECO4" +) +asset_list = asset_list.merge( + abs_matrix, how="left", left_on=["starting_half_band", "ending_half_band", "floor_area_band"], + right_on=['Starting Band', 'Finishing Band', 'Floor Area Segment', ] +) +asset_list = asset_list.drop(columns=['Starting Band', 'Finishing Band', 'Floor Area Segment']) + +# Using CWI solid 1.7 -> 0.3 rates +cwi_pps_matrix = pps_matrix[ + pps_matrix["Measure_Type"].isin(["CWI_0.033"]) +] +# Merge on +asset_list = asset_list.merge( + cwi_pps_matrix[['Starting Band', 'Total Floor Area Band', 'Cost Savings']].rename( + columns={ + "Cost Savings": "partial_project_score", + "Starting Band": "starting_half_band", + "Total Floor Area Band": "floor_area_band" + } + ), + how="left", + on=["starting_half_band", "floor_area_band"], +) +asset_list["partial_project_score"] = np.where( + (asset_list["epc_sap_score_on_register"] > 69), + None, + asset_list["partial_project_score"] +) + +asset_list["funding_abs"] = np.where( + asset_list["funding_scheme"] == "GBIS", + asset_list["partial_project_score"], + asset_list["Cost Savings"] +) + +asset_list["domna_property_id"].duplicated().sum() + +# Store this data +asset_list.to_csv( + "/Users/khalimconn-kowlessar/Documents/hestia/Instagroup Review/livewest_sw_abs_estimates.csv", + index=False +) diff --git a/asset_list/app.py b/asset_list/app.py index bb898c09..e431f723 100644 --- a/asset_list/app.py +++ b/asset_list/app.py @@ -1,9 +1,6 @@ import os import json import pandas as pd -from pprint import pprint -import msgpack -from utils.s3 import read_from_s3 from asset_list.AssetList import AssetList from asset_list.mappings.property_type import PROPERTY_MAPPING from asset_list.mappings.built_form import BUILT_FORM_MAPPINGS @@ -62,98 +59,558 @@ def app(): Property UPRN """ - # Thurrock - data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thurrock" - data_filename = "THURROCK COUNCIL - For analysis.xlsx" - sheet_name = "Assets" + # CDS + data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/CDS" + data_filename = "Founder Estates - Asset List.xlsx" + sheet_name = "Combined" postcode_column = 'Postcode' - fulladdress_column = "Full Address" + fulladdress_column = "Address" address1_column = None address1_method = "house_number_extraction" address_cols_to_concat = [] missing_postcodes_method = None - landlord_year_built = "Construction Date" + landlord_year_built = None landlord_os_uprn = None - landlord_property_type = "Property Type" - landlord_built_form = "Property Subtype" + landlord_property_type = None + landlord_built_form = None landlord_wall_construction = None - landlord_roof_construction = None - landlord_heating_system = "Main Heating Type" - landlord_existing_pv = None - landlord_property_id = "Property Reference" - landlord_sap = None - outcomes_filename = [] - outcomes_sheetname = [] - outcomes_postcode = [] - outcomes_houseno = [] - outcomes_id = [] - outcomes_address = [] - master_filepaths = [] - master_to_asset_list_filepath = None - phase = False - ecosurv_landlords = None - - # Medway - data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Medway" - data_filename = "MEDWAY Asset List.xlsx" - sheet_name = "Asset list" - postcode_column = 'Postcode' - fulladdress_column = None - address1_column = "House Number" - address1_method = None - address_cols_to_concat = ["House Number", "Street 1"] - missing_postcodes_method = None - landlord_year_built = "Year Built" - landlord_os_uprn = None - landlord_property_type = "Property Type - Academy" - landlord_built_form = "Property Type - Academy" - landlord_wall_construction = None - landlord_roof_construction = None - landlord_heating_system = None + landlord_heating_system = "Heating Type" landlord_existing_pv = None landlord_property_id = "Row ID" - landlord_sap = None outcomes_filename = [] outcomes_sheetname = [] outcomes_postcode = [] outcomes_houseno = [] - outcomes_id = [] outcomes_address = [] + outcomes_id = [] master_filepaths = [] master_to_asset_list_filepath = None + asset_list_header = 0 + landlord_block_reference = None + master_id_colnames = [] + landlord_roof_construction = None phase = False + landlord_sap = None ecosurv_landlords = None - # MHS - data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS" - data_filename = "MHS HOMES (Full Asset List) - for programme build.xlsx" + # Plus Dane + data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Plus Dane/New Programme July 2025/" + data_filename = "20250711 Plus Dane Asset List.xlsx" sheet_name = "Sheet1" postcode_column = 'Postcode' - fulladdress_column = "FullAddress" + fulladdress_column = "Address" address1_column = None address1_method = "house_number_extraction" address_cols_to_concat = [] missing_postcodes_method = None - landlord_year_built = "BuiltInYear" + landlord_year_built = "Property Age" landlord_os_uprn = None - landlord_property_type = "AssetType" - landlord_built_form = "PropertyType" - landlord_wall_construction = None - landlord_roof_construction = None - landlord_heating_system = None + landlord_property_type = "Property Type" + landlord_built_form = "Built Form" + landlord_wall_construction = "Wall Construction" + landlord_heating_system = "Full Heating System" landlord_existing_pv = None landlord_property_id = "UPRN" - landlord_sap = None - outcomes_filename = [] - outcomes_sheetname = [] - outcomes_postcode = [] - outcomes_houseno = [] - outcomes_id = [] - outcomes_address = [] - master_filepaths = [] + outcomes_filename = [ + os.path.join(data_folder, "Outcomes - Plus Dane_CWI_2024.xlsx"), + os.path.join(data_folder, "Outcomes - Plus Dane_CWI_2025.xlsx"), + os.path.join(data_folder, "Outcomes - Plus Dane_PV_2025.xlsx"), + ] + outcomes_sheetname = [ + "CWI & LI - 2024", "2025 - CWI", "PV - 2025", + ] + outcomes_postcode = ["Postcode", "Postcode", "Postcode"] + outcomes_houseno = ["No.", "No", "No"] + outcomes_address = ["Address", "Address", "Address"] + outcomes_id = ["Asset Reference", "LL UPRN", "LL UPRN"] + master_filepaths = [ + os.path.join(data_folder, "submissions/JJC-Table 1.csv"), + os.path.join(data_folder, "submissions/SCIS-Table 1.csv") + ] master_to_asset_list_filepath = None + asset_list_header = 1 + landlord_block_reference = None + master_id_colnames = [None, None] + landlord_roof_construction = None phase = False - ecosurv_landlords = None + landlord_sap = "SAP Rating" + ecosurv_landlords = "plus dane" + + # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Brentwood/July 2025 New Programme" + # data_filename = "20250710 Asset List Brentwood.xlsx" + # sheet_name = "Sheet1" + # postcode_column = 'Postcode' + # fulladdress_column = None + # address1_column = "House Number" + # address1_method = None + # address_cols_to_concat = ["House Number", "Address Line 1", "Address Line 2", "Address Line 3"] + # missing_postcodes_method = None + # landlord_year_built = "Year Built" + # landlord_os_uprn = None + # landlord_property_type = "Dwelling" + # landlord_built_form = None + # landlord_wall_construction = None + # landlord_heating_system = "Heating" + # landlord_existing_pv = None + # landlord_property_id = "UPRN" + # outcomes_filename = [os.path.join(data_folder, "Brentwood - outcomes for analysis.xlsx")] + # outcomes_sheetname = ["OUTCOMES"] + # outcomes_postcode = ["POSTCODE"] + # outcomes_houseno = [None] + # outcomes_address = ["ADDRESS"] + # outcomes_id = [None] + # master_filepaths = [os.path.join(data_folder, "Submissions.csv")] + # master_to_asset_list_filepath = None + # asset_list_header = 1 + # landlord_block_reference = None + # master_id_colnames = [None] + # landlord_roof_construction = None + # phase = False + # landlord_sap = None + # ecosurv_landlords = "brentwood" + + # Brentwood + # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Brentwood/July 2025 New Programme" + # data_filename = "20250710 Asset List Brentwood.xlsx" + # sheet_name = "Sheet1" + # postcode_column = 'Postcode' + # fulladdress_column = None + # address1_column = "House Number" + # address1_method = None + # address_cols_to_concat = ["House Number", "Address Line 1", "Address Line 2", "Address Line 3"] + # missing_postcodes_method = None + # landlord_year_built = "Year Built" + # landlord_os_uprn = None + # landlord_property_type = "Dwelling" + # landlord_built_form = None + # landlord_wall_construction = None + # landlord_heating_system = "Heating" + # landlord_existing_pv = None + # landlord_property_id = "UPRN" + # outcomes_filename = [os.path.join(data_folder, "Brentwood - outcomes for analysis.xlsx")] + # outcomes_sheetname = ["OUTCOMES"] + # outcomes_postcode = ["POSTCODE"] + # outcomes_houseno = [None] + # outcomes_address = ["ADDRESS"] + # outcomes_id = [None] + # master_filepaths = [os.path.join(data_folder, "Submissions.csv")] + # master_to_asset_list_filepath = None + # asset_list_header = 1 + # landlord_block_reference = None + # master_id_colnames = [None] + # landlord_roof_construction = None + # phase = False + # landlord_sap = None + # ecosurv_landlords = "brentwood" + # + # # Eastlight + # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Eastlight/New Programme" + # data_filename = "INSPECTIONS MASTER Non Tech.xlsx" + # sheet_name = "EASTLIGHT CW" + # postcode_column = 'Postcode' + # fulladdress_column = None + # address1_column = "HouseName" + # address1_method = None + # address_cols_to_concat = ["HouseName", "Block", "Address1", "Address2", "Address3"] + # missing_postcodes_method = None + # landlord_year_built = "Built In Year" + # landlord_os_uprn = None + # landlord_property_type = "AssetType" + # landlord_built_form = "Archetype" # Using inspections archetype + # landlord_wall_construction = None + # landlord_roof_construction = None + # landlord_heating_system = "Main Heating Source" + # landlord_existing_pv = None + # landlord_property_id = "UPRN" + # landlord_sap = "SAP Score" + # outcomes_filename = [ + # os.path.join(data_folder, "Eastlight_CWI_JJC_2025.xlsx"), + # os.path.join(data_folder, "Eastlight_CWI_SCIS_2025.xlsx"), + # ] + # outcomes_sheetname = ["Outcomes", "Feedback"] + # outcomes_postcode = ["Postcode", "Postcode"] + # outcomes_houseno = ["No", "No."] + # outcomes_id = [None, None] + # outcomes_address = ["Address", "Address"] + # master_filepaths = [ + # os.path.join(data_folder, "ECO 3-Table 1.csv"), + # os.path.join(data_folder, "ECO 4-Table 1.csv"), + # ] + # master_to_asset_list_filepath = None + # phase = False + # ecosurv_landlords = "eastlight" + # asset_list_header = 0 + # landlord_block_reference = None + # master_id_colnames = [None, None] + # landlord_sap = None + + # Pickering and Ferens + # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Pickering & Ferens" + # data_filename = "SAP 9 vs SAP 10 Sava Intelligent Energy - Property List (190625).xlsx" + # sheet_name = "Sava Intelligent Energy - Prope" + # postcode_column = 'Postcode' + # fulladdress_column = 'Address' + # address1_column = None + # address1_method = "house_number_extraction" + # address_cols_to_concat = [] + # missing_postcodes_method = None + # landlord_year_built = None + # landlord_os_uprn = None + # landlord_property_type = "Property Type" # Using the inspections property type + # landlord_built_form = "Archetype 2" + # landlord_wall_construction = None + # landlord_roof_construction = None + # landlord_heating_system = None + # landlord_existing_pv = None + # landlord_property_id = "UPRN" + # landlord_sap = "SAP Rating (RdSAP 10)" + # outcomes_filename = [] + # outcomes_sheetname = [] + # outcomes_postcode = [] + # outcomes_houseno = [] + # outcomes_id = [] + # outcomes_address = [] + # master_filepaths = [ + # os.path.join(data_folder, "PICKERING & FERENS ROLLING MASTER SHEET HEDGEFUND - 26.7.24 - K.csv"), + # os.path.join(data_folder, "PICKERING & FERENS NEW MASTER GBIS UPDATED 21.8.24 - M - For Analysis.csv"), + # ] + # master_to_asset_list_filepath = None + # phase = False + # ecosurv_landlords = "pickering" + # asset_list_header = 0 + # landlord_block_reference = None + # master_id_colnames = [None, None] + + # Colchester + # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Colchester" + # data_filename = "Warmfront data- Colchester Borough Homes (Complete).xlsx" + # sheet_name = "Sheet1" + # postcode_column = 'Full Address.1' + # fulladdress_column = "Full Address" + # address1_column = None + # address1_method = "first_word" + # address_cols_to_concat = [] + # missing_postcodes_method = None + # landlord_year_built = "Build Date" + # landlord_os_uprn = None + # landlord_property_type = "Property Type" + # landlord_wall_construction = "Wallinsul" + # landlord_heating_system = "HeatSorc" + # landlord_existing_pv = None + # landlord_property_id = "Property Reference" + # outcomes_filename = [] + # outcomes_sheetname = [] + # outcomes_postcode = [] + # outcomes_houseno = [] + # outcomes_id = [] + # outcomes_address = [] + # master_filepaths = [] + # master_to_asset_list_filepath = None + # asset_list_header = 0 + # landlord_built_form = None + # landlord_roof_construction = None + # landlord_sap = None + # landlord_block_reference = None + # phase = False + # ecosurv_landlords = None + # master_id_colnames = [] + + # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing/Hubspot" + # data_filename = "EalingFlats.xlsx" + # sheet_name = "Sheet1" + # postcode_column = 'Postcode' + # fulladdress_column = "Address" + # address1_column = None + # address1_method = "house_number_extraction" + # address_cols_to_concat = [] + # missing_postcodes_method = None + # landlord_year_built = None + # landlord_os_uprn = None + # landlord_property_type = None # Using the inspections property type + # landlord_built_form = None + # landlord_wall_construction = None + # landlord_roof_construction = None + # landlord_heating_system = None + # landlord_existing_pv = None + # landlord_property_id = "Property ref" + # landlord_sap = None + # outcomes_filename = [] + # outcomes_sheetname = [] + # outcomes_postcode = [] + # outcomes_houseno = [] + # outcomes_id = [] + # outcomes_address = [] + # master_filepaths = [] + # master_to_asset_list_filepath = None + # phase = False + # ecosurv_landlords = None + # asset_list_header = 0 + # landlord_block_reference = "Block Ref" + # master_id_colnames = [] + + # Southern - Jan list + # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Southern/New Programme/Jan 2025 List" + # data_filename = "SOUTHERN ASSETS January 2025 Additions Query 21.03.2025.xlsx" + # sheet_name = "Jan 2025 additions" + # postcode_column = 'Post Code' + # fulladdress_column = None + # address1_column = "NO." + # address1_method = None + # address_cols_to_concat = ["NO.", "Street / Block Name", "Town/Area"] + # missing_postcodes_method = None + # landlord_year_built = None + # landlord_os_uprn = None + # landlord_property_type = None # Using the inspections property type + # landlord_built_form = None + # landlord_wall_construction = None + # landlord_roof_construction = None + # landlord_heating_system = None + # landlord_existing_pv = None + # landlord_property_id = "SH Property Reference" + # landlord_sap = None + # outcomes_filename = [ + # os.path.join(data_folder, "RT - Southern Housing Group - JJC.xlsx"), + # os.path.join(data_folder, "RT - SOUTHERN OUTCOMES - SCIS Merged.xlsx"), + # ] + # outcomes_sheetname = ["Feedback", "Collated"] + # outcomes_postcode = ["Poscode", "Postcode"] + # outcomes_houseno = ["No.", "No"] + # outcomes_id = ["UPRNs", None] + # outcomes_address = ["Address", "Address"] + # master_filepaths = [ + # os.path.join(data_folder, "southern_submissions/CAVITY'S - DECEMBER 2018-Table 1.csv"), + # os.path.join(data_folder, "southern_submissions/CAVITY'S 2019-Table 1.csv"), + # os.path.join(data_folder, "southern_submissions/CAVITY'S ECO4-Table 1.csv"), + # os.path.join(data_folder, "southern_submissions/LOFT'S-Table 1.csv"), + # ] + # master_to_asset_list_filepath = None + # phase = False + # ecosurv_landlords = "southern" + # asset_list_header = 0 + # landlord_block_reference = None + # master_id_colnames = [None, None, None, None] + + # NCHA + # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/NCHA" + # data_filename = "Energy Information MASTER June 2025.xlsx" + # sheet_name = "Data" + # postcode_column = 'Postcode' + # fulladdress_column = "Address" + # address1_column = None + # address1_method = "house_number_extraction" + # address_cols_to_concat = [] + # missing_postcodes_method = None + # landlord_year_built = "Build Date (HAR10)" + # landlord_os_uprn = None + # landlord_property_type = "Property Type (HAR10)" + # landlord_built_form = "Build Form (EPC)" + # landlord_wall_construction = "Wall Description" + # landlord_roof_construction = None + # landlord_heating_system = "HEAT Code" + # landlord_existing_pv = None + # landlord_property_id = "Place ref" + # landlord_sap = "EPC SAP" + # outcomes_filename = None + # outcomes_sheetname = None + # outcomes_postcode = None + # outcomes_houseno = None + # outcomes_id = None + # outcomes_address = None + # master_filepaths = [] + # master_to_asset_list_filepath = None + # phase = False + # ecosurv_landlords = None + # asset_list_header = 0 + # landlord_block_reference = None + # master_id_colnames = [] + + # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Calico" + # data_filename = "07.04 CALICO - Final List.xlsx" + # asset_list_header = 2 + # sheet_name = "Final List" + # postcode_column = 'Postcode' + # fulladdress_column = None + # address1_column = "Property Number / Name" + # address1_method = None + # address_cols_to_concat = [ + # "Property Number / Name", + # "Street", + # "Town" + # ] + # missing_postcodes_method = None + # landlord_year_built = "NROSH Estimated Build Date" + # landlord_os_uprn = None + # landlord_property_type = "Asset Type" + # landlord_built_form = None + # landlord_wall_construction = "Wall Type" + # landlord_heating_system = "Boiler Type" + # landlord_existing_pv = None + # landlord_property_id = "Asset Reference" + # outcomes_filename = [] + # outcomes_sheetname = [] + # outcomes_postcode = [] + # outcomes_houseno = [] + # outcomes_id = [] + # outcomes_address = [] + # master_filepaths = [] + # master_id_colnames = [] + # master_to_asset_list_filepath = None + # landlord_roof_construction = None + # landlord_block_reference = None + # landlord_sap = "Current Efficiency Rating - Score" + # phase = None + # ecosurv_landlords = None + + # data_folder = ( + # "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Livewest/Programme Update - March 2025/2018 Asset + # List" + # ) + # data_filename = "LIVEWEST STOCK - 23rd October 2018.xlsx" + # sheet_name = "Assets" + # postcode_column = 'Postcode' + # fulladdress_column = "Address" + # address1_column = None + # address1_method = "house_number_extraction" + # address_cols_to_concat = [] + # missing_postcodes_method = None + # landlord_year_built = "Build Year" + # landlord_os_uprn = None + # landlord_property_type = "Property Archetype" + # landlord_built_form = None + # landlord_wall_construction = None + # landlord_heating_system = "Heating Fuel Type" + # landlord_existing_pv = None + # landlord_property_id = "Uprn - DO NOT DELETE" + # outcomes_filename = [ + # os.path.join(data_folder, "RT - LiveWest.xlsx") + # ] + # outcomes_sheetname = ["Feedback"] + # outcomes_postcode = ["Poscode"] + # outcomes_houseno = ["No."] + # outcomes_id = ["UPRN"] + # outcomes_address = ["Address"] + # master_filepaths = [ + # "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Livewest/Programme Update - March 2025/Rolling + # Master " + # "- redacted for analysis/CAVITY-Table 1.csv" + # ] + # master_id_colnames = [None] + # master_to_asset_list_filepath = None + # landlord_roof_construction = None + # landlord_block_reference = None + # landlord_sap = None + # phase = None + # ecosurv_landlords = "livewest|live west" + + # data_folder = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Livewest/Programme Update - March " + # "2025/Livewest Asset List (Original) - csv") + # data_filename = "Report-Table 1.csv" + # sheet_name = None + # postcode_column = 'Postcode' + # fulladdress_column = "T1_Address" + # address1_column = None + # address1_method = "house_number_extraction" + # address_cols_to_concat = [] + # missing_postcodes_method = None + # landlord_year_built = "Build Yr" + # landlord_os_uprn = None + # landlord_property_type = "T1_AssetType" + # landlord_built_form = "T1_AssetType" + # landlord_wall_construction = "Wall Type Cavity" + # landlord_heating_system = "Heating Fuel" + # landlord_existing_pv = None + # landlord_property_id = "T1_UPRN" + # outcomes_filename = [ + # os.path.join(data_folder, "RT - LiveWest.xlsx") + # ] + # outcomes_address = ["Address"] + # outcomes_sheetname = ["Feedback"] + # outcomes_postcode = ["Poscode"] + # outcomes_houseno = ["No."] + # outcomes_id = ["UPRN"] + # master_filepaths = [ + # "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Livewest/Programme Update - March 2025/Rolling + # Master " + # "- redacted for analysis/CAVITY-Table 1.csv" + # ] + # master_id_colnames = [None] + # master_to_asset_list_filepath = None + # landlord_roof_construction = None + # landlord_block_reference = None + # landlord_sap = None + # phase = None + # ecosurv_landlords = "livewest|live west" + + # Stori + # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Storicymru" + # data_filename = "Asset list - for analysis.xlsx" + # sheet_name = "SAP and Costs Calculations" + # postcode_column = 'Postcode' + # fulladdress_column = "Address1" + # address1_column = None + # address1_method = "house_number_extraction" + # address_cols_to_concat = [] + # missing_postcodes_method = None + # landlord_year_built = "Age" + # landlord_os_uprn = None + # landlord_property_type = "TYPE" + # landlord_built_form = "AGE / DETACHMENT" + # landlord_wall_construction = "WALL" + # landlord_roof_construction = "LOFT INSULATION" + # landlord_heating_system = "BOILER" + # landlord_existing_pv = "SOLAR PV" + # landlord_property_id = "UPRN" + # landlord_sap = "Current SAP Rating" + # landlord_block_reference = None + # outcomes_filename = [] + # outcomes_sheetname = [] + # outcomes_postcode = [] + # outcomes_houseno = [] + # outcomes_id = [] + # outcomes_address = [] + # master_filepaths = [] + # master_to_asset_list_filepath = None + # master_id_colnames = [] + # phase = False + # ecosurv_landlords = None + + # Thrive - reconciliation + # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation" + # data_filename = "Thrive Asset List - Complete - Updated May 2025.xlsx" + # sheet_name = "Sheet1" + # postcode_column = 'postcode' + # fulladdress_column = "full_address" + # address1_column = "address_line_1" + # address1_method = None + # address_cols_to_concat = [] + # missing_postcodes_method = None + # landlord_year_built = "age_band_calculated" + # landlord_os_uprn = None + # landlord_property_type = "property_type" + # landlord_built_form = "build_form" + # landlord_wall_construction = None + # landlord_roof_construction = "assumed_loft_insulation_thickness_updated" + # landlord_heating_system = "heating_type_updated" + # landlord_existing_pv = None + # landlord_property_id = "thrive_property_id" + # landlord_sap = "sap_rating_updated" + # landlord_block_reference = "block_reference" + # outcomes_filename = [ + # os.path.join(data_folder, "Thrive - Outcomes - April 24-March25 - Corrected.xlsx") + # ] + # outcomes_sheetname = ["Sheet1"] + # outcomes_postcode = ["postcode"] + # outcomes_houseno = ["No."] + # outcomes_id = ["thrive_property_id"] + # outcomes_address = ["address"] + # master_filepaths = [ + # os.path.join(data_folder, "Thrive Submissions ECO3 - with IDS.csv"), + # os.path.join(data_folder, "Thrive Submissions ECO4 - with IDS.csv"), + # ] + # master_to_asset_list_filepath = None + # master_id_colnames = ["thrive_property_id", "thrive_property_id"] + # phase = False + # ecosurv_landlords = "thrive" # Southern Midlands # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Southern/Midlands Properties - Apr 2025" @@ -182,40 +639,12 @@ def app(): # master_filepaths = [] # master_to_asset_list_filepath = None - data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/North-West" - data_filename = "Places for People NORTH WEST - INSPECTIONS MASTER - UPDATE.xlsx" - sheet_name = "CHECKED" - postcode_column = 'Postcode' - fulladdress_column = None - address1_column = "AddressLine1" - address1_method = None - address_cols_to_concat = ["AddressLine1", "AddressLine2", "AddressLine3"] - missing_postcodes_method = None - landlord_year_built = None - landlord_os_uprn = None - landlord_property_type = "Archetype (PFP)" - landlord_built_form = "Archetype (PFP)" - landlord_wall_construction = None - landlord_roof_construction = None - landlord_heating_system = None - landlord_existing_pv = None - landlord_property_id = "Uprn" - outcomes_filename = None - outcomes_sheetname = None - outcomes_postcode = None - outcomes_houseno = None - outcomes_id = None - master_filepaths = [] - master_to_asset_list_filepath = None - landlord_sap = None - phase = None - # Maps addresses to uprn in problematic cases manual_uprn_map = {} asset_list = AssetList( local_filepath=os.path.join(data_folder, data_filename), - header=0, + header=asset_list_header, sheet_name=sheet_name, address1_colname=address1_column, postcode_colname=postcode_column, @@ -233,6 +662,7 @@ def app(): landlord_heating_system=landlord_heating_system, landlord_existing_pv=landlord_existing_pv, landlord_sap=landlord_sap, + landlord_block_reference=landlord_block_reference, phase=phase ) asset_list.init_standardise() @@ -294,7 +724,8 @@ def app(): asset_list.flag_survey_master( master_filepaths=master_filepaths, - master_to_asset_list_filepath=master_to_asset_list_filepath + master_to_asset_list_filepath=master_to_asset_list_filepath, + master_id_colnames=master_id_colnames, ) asset_list.flag_ecosurv(ecosurv_landlords) @@ -306,7 +737,7 @@ def app(): epc_api_only = False force_retrieve_data = False skip = None # Used to skip already completed chunks - chunk_size = 5000 + chunk_size = 2000 filename = "Chunk {i}.csv" download_folder = os.path.join(data_folder, "Chunks") if not os.path.exists(download_folder): @@ -486,59 +917,13 @@ def app(): ) asset_list.merge_data(epc_df) - asset_list.extract_attributes() + asset_list.identify_worktypes() - cleaned = read_from_s3( - s3_file_name="cleaned_epc_data/cleaned.bson", - bucket_name="retrofit-data-dev" - ) - cleaned = msgpack.unpackb(cleaned, raw=False) - - asset_list.identify_worktypes(cleaned) - - pprint(asset_list.work_type_figures) - - asset_list.flat_analysis() - - asset_list.load_contact_details( - local_filepath=os.path.join(data_folder, "Full property list wth D&V report V look up 12.2.25.xlsx"), - sheet_name="Report 1", - landlord_property_id=asset_list.landlord_property_id, - phone_number_column='Property Current Tel. Number', - fullname_column='Proeprty Current Occupant', - firstname_column=None, - lastname_column=None, - email_column=None, # TODO - we need this - ) - - # Convert to a format suitable for CRM - # TODO: TEMP - assigned_surveyors = pd.DataFrame( - [ - { - asset_list.landlord_property_id: "02610001", - "week_commencing": "10/10/2025", - "surveyor_name": "Khalim Conn-Kowlessar", - "surveyor_email": "khalim@domna.homes", - } - ] - ) - - # TODO: Sort the output by postcode - - company_domain = "ealing.gov.uk" - crm_pipeline_name = "Survey Management" - first_dealstage = "READY TO BEGIN SCHEDULING" - # TODO - temp, upload to either SharePoint or AWS - - asset_list.prepare_for_crm( - assigned_surveyors=assigned_surveyors, - company_domain=company_domain, - crm_pipeline_name=crm_pipeline_name, - first_dealstage=first_dealstage - ) - hubspot_data = asset_list.hubspot_data + # We now flag the status of the property + asset_list.label_property_status() + asset_list.analyse_geographies() + asset_list.get_work_figures() # Store as an excel filename = os.path.join(data_folder, ".".join(data_filename.split(".")[:-1])) + " - Standardised.xlsx" @@ -546,7 +931,8 @@ def app(): with pd.ExcelWriter(filename) as writer: asset_list.standardised_asset_list.to_excel(writer, sheet_name="Standardised Asset List", index=False) - asset_list.flat_data.to_excel(writer, sheet_name="Flat Data", index=False) + if asset_list.block_analysis_df is not None: + asset_list.block_analysis_df.to_excel(writer, sheet_name="Block Analysis", index=False) # If we have outcomes, we add a tab with the outcomes if not asset_list.outcomes_for_output.empty: asset_list.outcomes_for_output.to_excel(writer, sheet_name="Outcomes", index=False) @@ -560,5 +946,5 @@ def app(): if not asset_list.ecosurv_no_match.empty: asset_list.ecosurv_no_match.to_excel(writer, sheet_name="Unmatched Ecosurv", index=False) - # Store the Hubspot export as a csv - hubspot_data.to_csv(os.path.join(data_folder, "Hubspot Export.csv"), index=False) + if not asset_list.geographical_areas.empty: + asset_list.geographical_areas.to_excel(writer, sheet_name="Geographical Areas", index=False) diff --git a/asset_list/hubspot/config.py b/asset_list/hubspot/config.py new file mode 100644 index 00000000..23ff900a --- /dev/null +++ b/asset_list/hubspot/config.py @@ -0,0 +1,85 @@ +from enum import IntEnum, Enum + +CRM_PIPELINE_NAME = 'Operations - Housing Associations' + + +class HubspotProcessStatus(IntEnum): + def __new__(cls, value, label): + obj = int.__new__(cls, value) + obj._value_ = value + obj.label = label + return obj + + # the numerical values of this enum aren't important, but they define the order of operations + + # This is the first stage, where a survey is ready to go + READY_TO_BE_SCHEDULED = 1, "READY TO BE SCHEDULED" + # The property didn't get access and needs sign off + SURVEYED_NO_ACCESS_NEEDS_SIGN_OFF = 2, "SURVEYED - NO ACCESS - NEED SIGN OFF" + # The survey has been completed. We don't have any update as to whether the property has been installed + SURVEYED_COMPLETED_SIGNED_OFF = 3, "SURVEYED - AUTOMATED SIGNED OFF" + # The property turned out to be ineligibile + NOT_VIABLE = 4, "NOT VIABLE" + # The property is with the installer. This will likely be the default for historic programmes + SUBMITTED_TO_INSTALLER = 5, "SUBMITTED TO INSTALLER" + # The property has been installed + INSTALL_COMPLETE = 6, "INSTALL COMPLETE" + # The install has complete and lodgement is complete + LODGEMENT_COMPLETE = 7, "LODGEMENT COMPLETE" + # The property has been cancelled + INSTALLER_CANCELLED_FINALIZED = 8, "INSTALLER CANCELLED - FINALIZED" + + +class Installer(Enum): + SCIS = "SCIS" + JJ_CRUMP = "J & J CRUMP" + SGEC = "SGEC" + + @classmethod + def is_valid_value(cls, value): + """ + Check if the value is a valid installer. + """ + return value in cls._value2member_map_ + + +CRM_UPLOAD_COLUMNS = [ + 'Name ', 'Associations: Listing', 'Company Domain Name ', + 'Email ', 'First Name ', 'Last Name ', + 'Phone ', 'Secondary Phone ', + 'Secondary Contact Full Name ', + 'Listing Owner Email ', + 'Full Address ', 'Address 1 ', + 'Address 2 ', 'Postcode ', + 'Property Type ', 'Property Sub Type ', + 'Bedroom(s) ', 'Domna Property ID ', + 'National UPRN ', 'Owner Property ID ', + 'Wall Construction ', 'Heating System ', + 'Year Built ', 'Boiler Make ', + 'Boiler Model ', + 'Non-Intrusives: Date Checked ', + 'Non-Intrusives: Wall Type ', + 'Non-intrusives: Insulation ', + 'Non-intrusives: Insulation Material ', + 'Non-Intrusives: CIGA Check Required ', + 'Non-Intrusives: PV Access Issues ', + 'Non-Intrusives: Roof Orientation ', + 'Non-Intrusives: Surveyor Notes ', + 'Non-Intrusives: Surveyor Name ', + 'CIGA: Date Requested ', + 'CIGA: Cavity Guarantee Found ', + 'Last EPC: Is Estimated ', + 'Last EPC: EPC Rating ', + 'Last EPC: SAP Rating ', + 'Last EPC: Main Heating Description ', + 'Last EPC: Heating Controls ', + 'Last EPC: Lodgement Date ', + 'Last EPC: Floor Area ', 'Last EPC: Wall ', + 'Last EPC: Roof ', 'Last EPC: Floor ', + 'Last EPC: Room Height ', + 'Last EPC: Age Band ', 'Deal Stage ', + 'Pipeline ', 'Expected Commencement Date ', + 'Deal Name ', 'Project Code ', 'Postcode ', + 'Product ID ', 'Name ', 'Unit price ', + 'Quantity ', 'Deal Owner', 'Amount ', 'Installer ' +] diff --git a/asset_list/hubspot/prepare_for_hubspot.py b/asset_list/hubspot/prepare_for_hubspot.py new file mode 100644 index 00000000..b12f4c04 --- /dev/null +++ b/asset_list/hubspot/prepare_for_hubspot.py @@ -0,0 +1,176 @@ +import os +import pandas as pd +from asset_list.AssetList import AssetList + +import re + + +def normalize_uk_phone(number: str | float | int) -> str | None: + if pd.isna(number): + return None + + number = str(number) + number = re.sub(r"[^\d+]", "", number) + + # Handle common short inputs: add '0' if likely missing + if re.match(r"^7\d{8,9}$", number) or re.match(r"^1\d{8,9}$", number): + number = "0" + number + + # Convert to international format + if number.startswith("0"): + number = "+44" + number[1:] + elif number.startswith("0044"): + number = "+" + number[2:] + + # Must be +44 followed by 10 digits (some area codes may vary) + if re.match(r"^\+44\d{9,10}$", number): + return number + + return None + + +def app(): + """ + TODO: Operations may have removed some cavity_reason/solar_reason values from the standardised asset list after + review. So, we will need to update the hubspot status for these entries and set them to None, if they + were previously being set to ready for scheduling. We don't want to just filter on rows where + cavity_reason and solar_reason are populated, as if we want to include historical surveys, this will remove + them + + + TODO: If we wish to upload deals in batches + + :return: + """ + + # inputs: + reconcile_programme = True # If True, the hubspot upload will include all properties with a project code + customer_domain = "https://ealing.gov.uk" + installer_name = "SCIS" + asset_list_filepath = ( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing/Hubspot/20250707 Ealing Flats - Prepared " + "programme.xlsx" + ) + asset_list_sheet_name = "Standardised Asset List" + asset_list_header = 0 + + contact_details_filepath = None + contacts_sheet_name = "Sheet 1" + contacts_landlord_property_id = "UPRN" + contacts_phone_number_column = "phone_number" + contacts_secondary_phone_number_column = "secondary_phone_number" + contacts_secondary_contact_full_name = "secondary_contact_full_name" + contacts_email_column = "email" + contacts_fullname_column = "fullname" + contacts_firstname_column = "First Name" + contacts_lastname_column = "Last Name" + + existing_programme_filepath = None + + asset_list = AssetList.load_standardised_asset_list( + asset_list_filepath, asset_list_sheet_name, asset_list_header + ) + asset_list.load_contact_details( + local_filepath=contact_details_filepath, + sheet_name=contacts_sheet_name, + landlord_property_id=contacts_landlord_property_id, + phone_number_column=contacts_phone_number_column, + secondary_phone_number_column=contacts_secondary_phone_number_column, + secondary_contact_full_name=contacts_secondary_contact_full_name, + email_column=contacts_email_column, + fullname_column=contacts_fullname_column, + firstname_column=contacts_firstname_column, + lastname_column=contacts_lastname_column + ) + + asset_list.prepare_for_crm( + company_domain=customer_domain, + installer_name=installer_name, + reconcile_programme=reconcile_programme + ) + + # Remove the existing programme + # existing_programme = pd.read_csv(existing_programme_filepath, encoding="utf-8-sig") + # asset_list.hubspot_data = asset_list.hubspot_data[ + # ~asset_list.hubspot_data["Domna Property ID "].isin( + # existing_programme['Domna Property ID'].values + # ) + # ] + + # Get the filepath and the filename. Append hubspot upload to the filename. We also change the file type to csv + directory, filename = os.path.split(asset_list_filepath) + name, ext = os.path.splitext(filename) + output_filename = f"{name} - Hubspot Upload.csv" + output_filepath = os.path.join(directory, output_filename) + + if pd.isnull(asset_list.hubspot_data['Project Code ']).sum(): + raise ValueError("FIX MEEE") + + if pd.isnull(asset_list.hubspot_data['Deal Stage ']).any(): + raise ValueError("Warning: Some rows have missing project codes. These will not be uploaded to HubSpot.") + + # Just store locally + asset_list.hubspot_data.to_csv(output_filepath, index=False, encoding="utf-8-sig") + + # # TODO: Set this up separately, but we associate multiple contacts to the same deal + # contact_details = pd.read_csv( + # "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot " + # "Upload/Hubspot/contact " + # "details.csv" + # ) + # + # # contacts_phone_number_column = "phone_number" + # # contacts_secondary_phone_number_column = "secondary_phone_number" + # # contacts_secondary_contact_full_name = "secondary_contact_full_name" + # # contacts_email_column = "email" + # # contacts_fullname_column = "fullname" + # # contacts_firstname_column = "First Name" + # # contacts_lastname_column = "Last Name" + # contact_details["phone_number"] = contact_details["Mobile Phone"].copy() + # # If phone number is NaN, we will use the landline number + # contact_details["phone_number"] = contact_details["phone_number"].fillna(contact_details["Landline"]) + # contact_details["secondary_phone_number"] = contact_details["Landline"].copy() + # # If secondary phone number is the same as primary, we remove it + # import numpy as np + # contact_details["secondary_phone_number"] = np.where( + # contact_details["secondary_phone_number"] == contact_details["phone_number"], + # np.nan, + # contact_details["secondary_phone_number"] + # ) + # contact_details = contact_details[ + # ['Property Reference Number (Main Address) (Property)', "Email Address", "phone_number", + # "secondary_phone_number", "First Name", "Last Name"]].copy().rename( + # columns={"Property Reference Number (Main Address) (Property)": "landlord_proprty_id"} + # ) + # contact_details["fullname"] = contact_details["First Name"] + " " + contact_details["Last Name"] + # # Format the phone numbers + # + # contact_details["phone_number"] = contact_details["phone_number"].astype(int).astype(str).apply( + # normalize_uk_phone) + # contact_details["secondary_phone_number"] = contact_details["secondary_phone_number"].astype("Int64").astype( + # str).apply( + # normalize_uk_phone) + # + # # Add in the Hubspot deal data + # hubspot_data = pd.read_csv( + # "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot Upload/Hubspot/" + # "property-status.csv", + # encoding="utf-8-sig" + # ) + # # Merge on contact details + # contact_details = hubspot_data[["Landlord Property ID", "Deal ID"]].merge( + # contact_details, + # how="left", + # right_on="landlord_proprty_id", + # left_on="Landlord Property ID" + # ) + # + # contact_details = contact_details.drop(columns=["landlord_proprty_id"]) + # + # # Store as csv + # contact_details.to_csv( + # "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar " + # "Programme Hubspot Upload/Hubspot/" + # "contact_details.csv", + # index=False, encoding="utf-8-sig" + # ) diff --git a/asset_list/mappings/built_form.py b/asset_list/mappings/built_form.py index 116c3203..c17e0ed4 100644 --- a/asset_list/mappings/built_form.py +++ b/asset_list/mappings/built_form.py @@ -3,7 +3,7 @@ import numpy as np STANDARD_BUILT_FORMS = { "unknown", # Houses - "end-terrace", "semi-detached", "detached", "mid-terrace", + "end-terrace", "semi-detached", "detached", "mid-terrace", "enclosed mid-terrace", "enclosed end-terrace", # Flats "ground floor", "mid-floor", "top-floor", "basement", "low rise", "high rise", } @@ -331,4 +331,46 @@ BUILT_FORM_MAPPINGS = { 'Low Rise': 'low rise', 'Upper Floor': 'top-floor', 'High Rise': 'high rise', + + '2012 ONWARDS DETACHED': 'detached', + '1950-66 END TERRACE': 'end-terrace', + '1976-82 MID TERRACED': 'mid-terrace', + '1950-66 MID TERRACE': 'mid-terrace', + '1991-95 DETACHED': 'detached', + '1976-82 END TERRACED': 'end-terrace', + '1967-75 DETACHED': 'detached', + 'PRE 1900 DETACHED': 'detached', + 'PRE 1900 MID TERRACE': 'mid-terrace', + '1900 DET': 'detached', + '1967-75 MID TERR': 'mid-terrace', + '1930-49 SEMI DET': 'semi-detached', + '1900-29 SEMI DET': 'semi-detached', + '1900-29 MID TERR': 'mid-terrace', + '1983- 90 MID TERR': 'mid-terrace', + '1976-82 MID TERR': 'mid-terrace', + '1983-90 END TERR': 'end-terrace', + '1991-95 SEMI DET': 'semi-detached', + '1983-90 SEMI DET': 'semi-detached', + '1991-95 MID TERR': 'mid-terrace', + '1950-66 SEMI DET': 'semi-detached', + '1900 MID TERR': 'mid-terrace', + '1967-75 SEMI DET': 'semi-detached', + '1983- 90 SEMI DET': 'semi-detached', + '1983-90 MID TERR': 'mid-terrace', + '1976-82 SEMI DET': 'semi-detached', + 'PRE 1900 MID TERR': 'mid-terrace', + None: 'unknown', + + 'SEMI-DETACHED': 'semi-detached', + 'DETACHED': 'detached', + 'MID TERRACE': 'mid-terrace', + 'END TERRACE': 'end-terrace', + 'ENCLOSED MID': 'enclosed mid-terrace', + + 'BUILDING': 'unknown', + 'FLAT COMMUNAL FACILITIES': 'unknown', + 'MAISONETTE': 'unknown', + 'HOUSE': 'unknown', + 'FLAT': 'unknown', + 'BLOCK': 'unknown' } diff --git a/asset_list/mappings/exising_pv.py b/asset_list/mappings/exising_pv.py index 51f5f922..e67fafb4 100644 --- a/asset_list/mappings/exising_pv.py +++ b/asset_list/mappings/exising_pv.py @@ -16,5 +16,6 @@ EXISTING_PV_MAPPINGS = { 'PV: 25% roof area, PV: 3.6kWp array': 'already has PV', 'PV: 10% roof area, PV: 2kWp array': 'already has PV', 'PV: 50% roof area': 'already has PV', - 'Solar PV': 'already has PV' + 'Solar PV': 'already has PV', + 'SOLAR PV': 'already has PV' } diff --git a/asset_list/mappings/heating_systems.py b/asset_list/mappings/heating_systems.py index 92f59f2c..010d49a5 100644 --- a/asset_list/mappings/heating_systems.py +++ b/asset_list/mappings/heating_systems.py @@ -27,7 +27,8 @@ STANDARD_HEATING_SYSTEMS = { "electric ceiling", "electric underfloor", "no heating", - "non-electric underfloor" + "non-electric underfloor", + "warm air heating", } HEATING_MAPPINGS = { @@ -292,4 +293,76 @@ HEATING_MAPPINGS = { 'Communal Heating': 'communal heating', 'No Data': 'unknown', 'Boiler System': 'gas condensing boiler', + 'Storage heating': 'electric storage heaters', + 'Storage heating (HHRSH)': 'high heat retention storage heaters', + + 'ELECTRIC BOILER': 'electric boiler', + 'STORAGE HEATERS': 'electric storage heaters', + 'GREENSTAR 24I JUNIOR': 'gas combi boiler', + 'generic cond combi post98': 'gas condensing combi', + 'SAP TABLE REG COND +98 NO PICTURE OF BOILER': 'gas condensing boiler', + 'ECO TEC PRO 28 H COMBI A': 'gas combi boiler', + 'GREENSTAR 25I ErP': 'gas combi boiler', + 'IDEAL LOGIC MAX COMBI C30': 'gas combi boiler', + 'ECO TEC PRO 28 (286/5-3)': 'gas combi boiler', + 'IDEAL LOGIC HEAT 30': 'gas boiler, radiators', + 'WORCESTER 240': 'gas boiler, radiators', + 'ECO TEC PRO 24 (246/5-3)': 'gas combi boiler', + 'ECO TEC PRO 28 (OLD)': 'gas combi boiler', + 'LOGIC COMBI2 C30': 'gas combi boiler', + 'GREENSTAR 28I JUNIOR': 'gas combi boiler', + 'WORCESTER 24i': 'gas combi boiler', + 'GREENSTAR 30I ErP': 'gas combi boiler', + '25 CDI': 'gas combi boiler', + 'GREENSTAR 28CDI COMPACT ErP': 'gas combi boiler', + 'GREENSTAR 24 RI': 'gas boiler, radiators', + 'BAXI COMBI 105 HE': 'gas combi boiler', + 'ECO TEC PRO 28 (OLD TYPE)': 'gas combi boiler', + 'WORCESTER 28 SI ll RSF': 'gas combi boiler', + 'GREENSTAR 30SI COMPACT ErP': 'gas combi boiler', + 'SAP TABLE REG COND +98 NO PICTURE OF CYLINDER': 'gas condensing boiler', + 'WORCESTER 24 SI ll RSF': 'gas combi boiler', + 'GREENSTAR 4000': 'gas combi boiler', + 'GREENSTAR 24i JUNIOR': 'gas combi boiler', + 'ECO TEC PRO 24 (OLD TYPE)': 'gas combi boiler', + 'GREENSTAR 30SI COMPACT': 'gas combi boiler', + 'BAXI DUO TEC 28 COMBI ErP': 'gas combi boiler', + 'Not applicable for this asset type': 'unknown', + 'Boiler: F rated Regular Boiler': 'gas condensing boiler', + 'Warm Air Systems: Electric warm air: Electricaire system': 'warm air heating', + 'Boiler: B rated Combi': 'gas condensing combi', + 'Boiler: G rated Regular Boiler': 'gas condensing boiler', + 'Electric Storage Systems: Modern (slimline) storage heaters': 'electric storage heaters', + 'Boiler: C rated CPSU': 'gas condensing combi', + 'Boiler: D rated Regular Boiler': 'gas condensing boiler', + 'Warm Air Systems: Gas fired warm air with balanced or open flue: Ducted or stub-ducted, on-off control, ' + 'pre 1998': 'warm air heating', + 'Electric Storage Systems: Integrated storage+direct-acting heater': 'electric storage heaters', + 'Boiler: D rated Combi': 'gas condensing combi', + 'Heat Pump: (from database)': 'air source heat pump', + 'Community Heating Systems: Community CHP and boilers (RdSAP)': 'communal heating', + '': 'unknown', + + 'Solid Fuel Boiler': 'solid fuel', + 'Heating (Other)': 'other', + 'Solid Fuel Fire Only': 'solid fuel', + 'No Main Heat Source': 'no heating', + 'Electric Programmable': 'electric storage heaters', + 'Linked to Communal Boiler': 'communal heating', + 'Bio Mass Boiler': 'solid fuel', + 'Electric Non Programmable': 'electric storage heaters', + + 'Room heaters, Mains gas': 'room heaters', + 'Boiler, Solid fuel': 'solid fuel', + 'Room heaters, Electricity': 'room heaters', + 'Room heaters, Solid fuel': 'room heaters', + 'Boiler, Oil': 'oil boiler', + 'Boiler, Biomass': 'boiler - other fuel', + 'Community heating, Community (non-gas)': 'communal heating', + 'Heat pump (wet), Electricity': 'air source heat pump', + 'Community heating, Community (mains gas)': 'communal gas boiler', + 'Boiler, Electricity': 'electric boiler', + 'Boiler, LPG': 'gas boiler, radiators', + 'Boiler, Mains gas': 'gas boiler, radiators', + 'Storage heating, Electricity': 'electric storage heaters' } diff --git a/asset_list/mappings/property_type.py b/asset_list/mappings/property_type.py index b705d6ef..caca0cf0 100644 --- a/asset_list/mappings/property_type.py +++ b/asset_list/mappings/property_type.py @@ -252,5 +252,24 @@ PROPERTY_MAPPING = { 'Bedsit bungalow semi detached': 'bedsit', 'Bedsit Flat': 'bedsit', 'Semi detached house': 'house', - 'Unit': 'unknown' + 'Unit': 'unknown', + 'HOUSE (3 STOREY)': 'house', + 'FLAT GROUND FLOOR': 'flat', + 'FLAT TOP FLOOR': 'flat', + 'SHARED HOUSE': 'house', + 'MAISONETTE': 'maisonette', + 'DIRECT ACCESS HOSTEL': 'other', + 'Day centre': 'other', + 'Care home': 'other', + 'BLOCK (Communal)': 'block of flats', + 'SHOP': 'other', + 'Office Block': 'other', + 'BLOCK (Non-Communal)': 'block of flats', + 'Refuge': 'other', + None: 'unknown', + 'HFOP FLAT': 'flat', + 'HFOP BEDSIT': 'bedsit', + 'LINKED FLAT': 'flat', + 'LINKED BUNGALOW': 'bungalow' + } diff --git a/asset_list/mappings/roof.py b/asset_list/mappings/roof.py index 03d6f9af..66860bec 100644 --- a/asset_list/mappings/roof.py +++ b/asset_list/mappings/roof.py @@ -6,9 +6,10 @@ STANDARD_ROOF_CONSTRUCTIONS = { "pitched unknown access to loft", "piched unknown insulation", "pitched insulated", - "pitched less than 100mm insulation" + "pitched less than 100mm insulation", "another dwelling above", "flat unknown insulation", + "flat insulated", "unknown insulated", "unknown", } @@ -38,4 +39,140 @@ ROOF_CONSTRUCTION_MAPPINGS = { '200mm': 'pitched insulated', '0-49mm': 'pitched less than 100mm insulation', '50mm': 'pitched less than 100mm insulation', + '': 'unknown', + 'NR': 'unknown', + 'Non-joist': 'unknown', + '25mm': 'pitched less than 100mm insulation', + '400mm+': 'pitched insulated', + '12mm': 'pitched less than 100mm insulation', + + '150MM': 'pitched insulated', + '200MM': 'pitched insulated', + '250MM': 'pitched insulated', + '100MM': 'pitched less than 100mm insulation', + 'U/K': 'unknown', + 'U/K - 250MM RIR FLAT CEILING': 'flat unknown insulation', + 'U/K - 200MM RIR FLAT CEILING': 'flat unknown insulation', + + 'AnotherDwellingAbove: Unknown, PitchedNormalLoftAccess: 100mm': 'another dwelling above', + 'PitchedNormalNoLoftAccess: 150mm': 'pitched insulated', + 'PitchedNormalLoftAccess: As Built, PitchedNormalNoLoftAccess: None': 'pitched insulated', + 'PitchedNormalLoftAccess: 150mm, PitchedNormalNoLoftAccess: Unknown': 'pitched insulated', + 'Flat: As Built, PitchedNormalLoftAccess: 200mm': 'flat unknown insulation', + 'PitchedNormalLoftAccess: 200mm': 'pitched insulated', + 'PitchedNormalNoLoftAccess: 50mm': 'unknown', + 'PitchedNormalNoLoftAccess: No Insulation': 'pitched less than 100mm insulation', + 'PitchedNormalLoftAccess: 50mm, PitchedNormalNoLoftAccess: None': 'pitched less than 100mm insulation', + 'PitchedNormalLoftAccess: 50mm, PitchedNormalLoftAccess: No Insulation': 'pitched less than 100mm insulation', + 'PitchedNormalLoftAccess: 150mm': 'unknown', 'Flat: None': 'pitched insulated', + 'Flat: As Built, PitchedNormalLoftAccess: 150mm, PitchedNormalNoLoftAccess: None': 'flat unknown insulation', + 'PitchedNormalNoLoftAccess: 250mm, PitchedNormalNoLoftAccess: Unknown': 'pitched insulated', + 'PitchedNormalLoftAccess: 75mm': 'pitched less than 100mm insulation', + 'Flat: Unknown, PitchedNormalLoftAccess: 200mm, SameDwellingAbove: Unknown': 'flat unknown insulation', + 'Flat: As Built, PitchedNormalLoftAccess: 100mm, PitchedNormalNoLoftAccess: None': 'flat unknown insulation', + 'PitchedNormalNoLoftAccess: 250mm': 'pitched insulated', + 'PitchedNormalLoftAccess: 150mm, PitchedNormalNoLoftAccess: 100mm': 'pitched insulated', + 'AnotherDwellingAbove: Unknown, PitchedNormalLoftAccess: 300mm': 'another dwelling above', + 'PitchedNormalLoftAccess: 200mm, PitchedNormalNoLoftAccess: 50mm': 'pitched insulated', + 'Flat: As Built, PitchedNormalNoLoftAccess: 100mm': 'flat unknown insulation', + 'PitchedNormalLoftAccess: 250mm, PitchedNormalNoLoftAccess: Unknown': 'pitched insulated', + 'PitchedNormalLoftAccess: 100mm, PitchedNormalLoftAccess: 150mm': 'pitched less than 100mm insulation', + 'PitchedNormalLoftAccess: 100mm, PitchedNormalLoftAccess: 200mm': 'pitched less than 100mm insulation', + 'PitchedNormalNoLoftAccess: 75mm': 'pitched less than 100mm insulation', + 'Flat: As Built, PitchedNormalLoftAccess: 25mm': 'flat unknown insulation', + 'PitchedNormalLoftAccess: 150mm, SameDwellingAbove': 'pitched insulated', + 'PitchedNormalLoftAccess: 150mm, PitchedNormalLoftAccess: 50mm': 'pitched insulated', + 'Flat: As Built, PitchedNormalLoftAccess: 100mm': 'flat unknown insulation', + 'Flat: As Built, PitchedNormalNoLoftAccess: None': 'flat unknown insulation', + 'PitchedNormalLoftAccess: 150mm, PitchedNormalLoftAccess: 200mm': 'pitched insulated', + 'PitchedNormalNoLoftAccess: 300mm': 'pitched insulated', + 'Flat: As Built, PitchedNormalNoLoftAccess: 150mm': 'flat unknown insulation', + 'PitchedNormalLoftAccess: 150mm, PitchedNormalNoLoftAccess: None': 'pitched insulated', + 'PitchedNormalNoLoftAccess: 200mm': 'pitched insulated', + 'PitchedNormalLoftAccess: 300mm, PitchedNormalNoLoftAccess: Unknown': 'pitched insulated', + 'PitchedNormalLoftAccess: None': 'pitched less than 100mm insulation', + 'Flat: As Built': 'flat unknown insulation', + 'PitchedNormalLoftAccess: 100mm, PitchedNormalLoftAccess: 250mm': 'pitched less than 100mm insulation', + 'AnotherDwellingAbove: Unknown, PitchedNormalLoftAccess: 50mm': 'another dwelling above', + 'PitchedNormalLoftAccess: 200mm, PitchedNormalLoftAccess: 250mm, PitchedNormalNoLoftAccess: None': 'pitched ' + 'insulated', + 'PitchedNormalLoftAccess: 200mm, PitchedNormalLoftAccess: 250mm': 'pitched insulated', + 'Flat: 50mm': 'flat unknown insulation', + 'AnotherDwellingAbove: Unknown, PitchedNormalNoLoftAccess: None': 'another dwelling above', + 'PitchedNormalNoLoftAccess: None': 'pitched uninsulated', + 'AnotherDwellingAbove: Unknown, PitchedNormalLoftAccess: 25mm': 'another dwelling above', + 'AnotherDwellingAbove: Unknown, Flat: As Built, PitchedNormalNoLoftAccess: Unknown': 'another dwelling above', + 'Flat: As Built, PitchedNormalLoftAccess: 200mm, PitchedNormalNoLoftAccess: Unknown': 'flat unknown insulation', + 'Flat: Unknown, PitchedNormalLoftAccess: 75mm, PitchedNormalLoftAccess: Unknown': 'flat unknown insulation', + 'PitchedNormalLoftAccess: 100mm, PitchedNormalLoftAccess: Unknown': 'pitched less than 100mm insulation', + 'PitchedNormalLoftAccess: 200mm, PitchedNormalLoftAccess: 300mm': 'pitched insulated', + 'PitchedNormalLoftAccess: 100mm, PitchedNormalNoLoftAccess: 100mm': 'pitched less than 100mm insulation', + 'PitchedNormalLoftAccess: 100mm, PitchedNormalLoftAccess: No Insulation': 'pitched less than 100mm insulation', + 'AnotherDwellingAbove: Unknown, Flat: As Built, PitchedNormalLoftAccess: 150mm': 'another dwelling above', + 'PitchedNormalLoftAccess: 75mm, PitchedNormalNoLoftAccess: Unknown': 'pitched less than 100mm insulation', + 'Flat: As Built, PitchedNormalLoftAccess: 300mm': 'unknown', 'Flat: 100mm': 'flat unknown insulation', + 'PitchedNormalNoLoftAccess: 150mm, PitchedNormalNoLoftAccess: Unknown': 'pitched insulated', + 'PitchedNormalNoLoftAccess: 100mm': 'pitched less than 100mm insulation', + 'PitchedNormalLoftAccess: 12mm': 'pitched less than 100mm insulation', + 'AnotherDwellingAbove: Unknown, PitchedNormalNoLoftAccess: 150mm': 'another dwelling above', + 'PitchedNormalLoftAccess: No Insulation': 'pitched less than 100mm insulation', + + 'PitchedNormalLoftAccess: 25mm, PitchedNormalNoLoftAccess: Unknown': 'pitched less than 100mm insulation', + + 'PitchedNormalLoftAccess: 250mm, PitchedNormalNoLoftAccess: None, PitchedNormalNoLoftAccess: Unknown': 'pitched ' + 'insulated', + 'PitchedNormalNoLoftAccess: 100mm, PitchedNormalNoLoftAccess: Unknown': 'pitched less than 100mm insulation', + 'Flat: As Built, PitchedNormalNoLoftAccess: 100mm, PitchedNormalNoLoftAccess: Unknown': 'flat unknown insulation', + + 'PitchedNormalNoLoftAccess: Unknown, SameDwellingAbove: Unknown': 'pitched no access to loft', + 'PitchedNormalLoftAccess: 200mm, PitchedNormalNoLoftAccess: None': 'pitched insulated', + 'PitchedNormalLoftAccess: 100mm, PitchedNormalNoLoftAccess: As Built': 'pitched less than 100mm insulation', + 'PitchedNormalNoLoftAccess: 50mm, PitchedNormalNoLoftAccess: Unknown': 'pitched less than 100mm insulation', + 'Flat: As Built, Flat: Unknown, PitchedNormalLoftAccess: 250mm': 'flat unknown insulation', + 'PitchedNormalLoftAccess: 50mm': 'pitched less than 100mm insulation', + 'PitchedNormalLoftAccess: 250mm, PitchedNormalNoLoftAccess: None': 'pitched insulated', + 'Flat: 100mm, Flat: As Built': 'flat unknown insulation', + 'PitchedNormalLoftAccess: 100mm, PitchedNormalNoLoftAccess: Unknown': 'pitched less than 100mm insulation', + 'AnotherDwellingAbove: Unknown, PitchedNormalLoftAccess: No Insulation': 'another dwelling above', + 'PitchedNormalLoftAccess: 100mm, PitchedNormalNoLoftAccess: None': 'pitched less than 100mm insulation', + 'PitchedNormalLoftAccess: 300mm': 'pitched insulated', + 'PitchedNormalLoftAccess: 100mm': 'pitched less than 100mm insulation', + 'PitchedNormalLoftAccess: 270mm': 'pitched insulated', + 'PitchedNormalNoLoftAccess: 200mm, PitchedNormalNoLoftAccess: Unknown': 'pitched insulated', + 'PitchedNormalLoftAccess: 200mm, PitchedNormalNoLoftAccess: Unknown': 'pitched insulated', + 'Flat: As Built, PitchedNormalLoftAccess: 250mm': 'flat unknown insulation', + 'PitchedNormalLoftAccess: 50mm, PitchedNormalLoftAccess: 75mm': 'pitched less than 100mm insulation', + 'PitchedNormalLoftAccess: 300mm, PitchedNormalLoftAccess: Unknown': 'pitched insulated', + 'Flat: As Built, PitchedNormalNoLoftAccess: 250mm': 'flat unknown insulation', + 'Flat: As Built, PitchedNormalNoLoftAccess: 50mm': 'flat unknown insulation', + 'Flat: As Built, PitchedNormalLoftAccess: 75mm': 'flat unknown insulation', + 'PitchedNormalLoftAccess: 250mm, PitchedNormalNoLoftAccess: No Insulation': 'pitched insulated', + 'AnotherDwellingAbove: Unknown, PitchedNormalLoftAccess: 150mm': 'another dwelling above', + 'PitchedNormalLoftAccess: 75mm, PitchedNormalNoLoftAccess: No Insulation': 'pitched less than 100mm insulation', + 'AnotherDwellingAbove: Unknown, PitchedNormalNoLoftAccess: Unknown': 'another dwelling above', + 'Flat: As Built, PitchedNormalNoLoftAccess: 200mm': 'flat unknown insulation', + 'Flat: As Built, Flat: Unknown, PitchedNormalLoftAccess: 150mm': 'flat unknown insulation', + 'Flat: As Built, PitchedNormalLoftAccess: 150mm': 'flat unknown insulation', + 'PitchedNormalLoftAccess: 300mm, PitchedNormalNoLoftAccess: 100mm': 'pitched insulated', + 'PitchedNormalLoftAccess: 100mm, PitchedNormalLoftAccess: 75mm': 'pitched less than 100mm insulation', + 'AnotherDwellingAbove: Unknown, PitchedNormalNoLoftAccess: 50mm, PitchedNormalNoLoftAccess: No Insulation': + 'another dwelling above', + 'Flat: As Built, PitchedNormalLoftAccess: 50mm': 'flat unknown insulation', + 'PitchedNormalLoftAccess: 25mm': 'pitched less than 100mm insulation', + 'PitchedNormalLoftAccess: 50mm, PitchedNormalNoLoftAccess: Unknown': 'pitched less than 100mm insulation', + 'PitchedNormalNoLoftAccess: 150mm, PitchedNormalNoLoftAccess: Unknown, PitchedThatched: 25mm': 'pitched insulated', + 'Flat: 150mm+': 'flat insulated', + 'Flat: Unknown, PitchedNormalLoftAccess: 100mm, PitchedNormalNoLoftAccess: Unknown': 'flat unknown insulation', + 'PitchedNormalLoftAccess: 150mm, PitchedNormalLoftAccess: Unknown': 'pitched insulated', + 'PitchedNormalLoftAccess: 150mm, PitchedNormalLoftAccess: 250mm': 'pitched insulated', + 'Flat: As Built, PitchedNormalLoftAccess: 100mm, PitchedNormalNoLoftAccess: Unknown': 'flat unknown insulation', + 'PitchedNormalLoftAccess: 250mm': 'pitched insulated', + 'PitchedNormalLoftAccess: 250mm, PitchedNormalLoftAccess: 75mm': 'pitched insulated', + 'PitchedNormalLoftAccess: 250mm, PitchedNormalLoftAccess: 50mm': 'pitched insulated', + 'AnotherDwellingAbove: Unknown, PitchedNormalLoftAccess: 200mm': 'another dwelling above', + + 'PitchedNormalNoLoftAccess: Unknown': 'pitched no access to loft', + 'PitchedNormalLoftAccess: Unknown': 'pitched unknown insulation', + 'AnotherDwellingAbove: Unknown': 'another dwelling above' + } diff --git a/asset_list/mappings/walls.py b/asset_list/mappings/walls.py index 5e32531f..245b7f88 100644 --- a/asset_list/mappings/walls.py +++ b/asset_list/mappings/walls.py @@ -7,122 +7,163 @@ STANDARD_WALL_CONSTRUCTIONS = { "uninsulated solid brick", "insulated solid brick", "solid brick unknown insulation", # Timber Frame "timber frame unknown insulation", "insulated timber frame", "uninsulated timber frame", - "system built", "granite or whinstone", "other", - "unknown", "sandstone or limestone", + # System + "system built unknown insulation", "insulated system built", "uninsulated system built", + # Granite or Whinstone + "granite or whinstone unknown insulation", "insulated granite or whinstone", "uninsulated granite or whinstone", + # Sandstone or Limestone + "sandstone or limestone unknown insulation", "insulated sandstone or limestone", + "uninsulated sandstone or limestone", + # Other + "other", "cob", "new build - average thermal transmittance", } WALL_CONSTRUCTION_MAPPINGS = { "New Build - Average Thermal Transmittance": "new build - average thermal transmittance", - 'Average thermal transmittance 0.25 W/m?K': 'unknown', + 'Average thermal transmittance 0.25 W/m?K': 'new build - average thermal transmittance', 'Cavity wall, as built, insulated (assumed)': 'filled cavity', 'Average thermal transmittance 0.31 W/m?K': 'unknown', 'Cavity wall, as built, no insulation (assumed)': 'uninsulated cavity', - 'Average thermal transmittance 0.30 W/m?K': 'unknown', 'Average thermal transmittance 0.28 W/m-¦K': 'unknown', - 'Average thermal transmittance 0.25 W/m-¦K': 'unknown', 'Average thermal transmittance 0.21 W/m-¦K': 'unknown', - 'Average thermal transmittance 0.20 W/m-¦K': 'unknown', 'Average thermal transmittance 0.29 W/m?K': 'unknown', - 'Average thermal transmittance 0.16 W/m?K': 'unknown', - 'Average thermal transmittance 0.27 W/m²K': 'unknown', - 'Average thermal transmittance 0.15 W/m-¦K': 'unknown', 'Average thermal transmittance 0.23 W/m-¦K': 'unknown', - 'Average thermal transmittance 0.18 W/m?K': 'unknown', - 'Granite or whin, with internal insulation': 'granite or whinstone', - "Granite or whinstone, as built, insulated (assumed)": "granite or whinstone", - 'Average thermal transmittance 0.22 W/m-¦K': 'unknown', 'Average thermal transmittance 0.24 W/m?K': 'unknown', - 'Average thermal transmittance 0.16 W/m-¦K': 'unknown', 'Average thermal transmittance 0.35 W/m?K': 'unknown', - 'Average thermal transmittance 0.26 W/m-¦K': 'unknown', 'Average thermal transmittance 0.62 W/m?K': 'unknown', - 'Average thermal transmittance 0.64 W/m?K': 'unknown', 'Average thermal transmittance 0.61 W/m?K': 'unknown', - 'Sandstone or limestone, as built, no insulation (assumed)': 'sandstone or limestone', - 'Average thermal transmittance 0.33 W/m?K': 'unknown', + 'Average thermal transmittance 0.30 W/m?K': 'new build - average thermal transmittance', + 'Average thermal transmittance 0.28 W/m-¦K': 'new build - average thermal transmittance', + 'Average thermal transmittance 0.25 W/m-¦K': 'new build - average thermal transmittance', + 'Average thermal transmittance 0.21 W/m-¦K': 'new build - average thermal transmittance', + 'Average thermal transmittance 0.20 W/m-¦K': 'new build - average thermal transmittance', + 'Average thermal transmittance 0.29 W/m?K': 'new build - average thermal transmittance', + 'Average thermal transmittance 0.16 W/m?K': 'new build - average thermal transmittance', + 'Average thermal transmittance 0.27 W/m²K': 'new build - average thermal transmittance', + 'Average thermal transmittance 0.15 W/m-¦K': 'new build - average thermal transmittance', + 'Average thermal transmittance 0.23 W/m-¦K': 'new build - average thermal transmittance', + 'Average thermal transmittance 0.18 W/m?K': 'new build - average thermal transmittance', + 'Granite or whin, with internal insulation': 'insulated granite or whinstone', + "Granite or whinstone, as built, insulated (assumed)": "uninsulated granite or whinstone", + 'Average thermal transmittance 0.22 W/m-¦K': 'new build - average thermal transmittance', + 'Average thermal transmittance 0.24 W/m?K': 'new build - average thermal transmittance', + 'Average thermal transmittance 0.16 W/m-¦K': 'new build - average thermal transmittance', + 'Average thermal transmittance 0.35 W/m?K': 'new build - average thermal transmittance', + 'Average thermal transmittance 0.26 W/m-¦K': 'new build - average thermal transmittance', + 'Average thermal transmittance 0.62 W/m?K': 'new build - average thermal transmittance', + 'Average thermal transmittance 0.64 W/m?K': 'new build - average thermal transmittance', + 'Average thermal transmittance 0.61 W/m?K': 'new build - average thermal transmittance', + 'Sandstone or limestone, as built, no insulation (assumed)': 'uninsulated sandstone or limestone', + 'Average thermal transmittance 0.33 W/m?K': 'new build - average thermal transmittance', 'Cavity wall,': "cavity unknown insulation", 'Cavity wall, as built, partial insulation (assumed)': 'partial insulated cavity', - 'Average thermal transmittance 0.29 W/m-¦K': 'unknown', 'Average thermal transmittance 0.32 W/m-¦K': 'unknown', - 'Average thermal transmittance 0.19 W/m-¦K': 'unknown', 'Average thermal transmittance 0.27 W/m?K': 'unknown', - 'Average thermal transmittance 0.22 W/m?K': 'unknown', 'Average thermal transmittance 0.38 W/m?K': 'unknown', - 'Average thermal transmittance 0.26 W/m?K': 'unknown', 'Average thermal transmittance 0.27 W/m-¦K': 'unknown', - 'Average thermal transmittance 0.18 W/m-¦K': 'unknown', 'Average thermal transmittance = 0.27 W/m?K': 'unknown', - 'Cavity wall, with external insulation': 'filled cavity', 'Average thermal transmittance 0.21 W/m?K': 'unknown', - 'Average thermal transmittance 0.23 W/m?K': 'unknown', 'Average thermal transmittance 0.20 W/m?K': 'unknown', - 'Average thermal transmittance 0.32 W/m?K': 'unknown', 'Average thermal transmittance 0.24 W/m-¦K': 'unknown', + 'Average thermal transmittance 0.29 W/m-¦K': 'new build - average thermal transmittance', + 'Average thermal transmittance 0.32 W/m-¦K': 'new build - average thermal transmittance', + 'Average thermal transmittance 0.19 W/m-¦K': 'new build - average thermal transmittance', + 'Average thermal transmittance 0.27 W/m?K': 'new build - average thermal transmittance', + 'Average thermal transmittance 0.22 W/m?K': 'new build - average thermal transmittance', + 'Average thermal transmittance 0.38 W/m?K': 'new build - average thermal transmittance', + 'Average thermal transmittance 0.26 W/m?K': 'new build - average thermal transmittance', + 'Average thermal transmittance 0.27 W/m-¦K': 'new build - average thermal transmittance', + 'Average thermal transmittance 0.18 W/m-¦K': 'new build - average thermal transmittance', + 'Average thermal transmittance = 0.27 W/m?K': 'new build - average thermal transmittance', + 'Cavity wall, with external insulation': 'filled cavity', + 'Average thermal transmittance 0.21 W/m?K': 'new build - average thermal transmittance', + 'Average thermal transmittance 0.23 W/m?K': 'new build - average thermal transmittance', + 'Average thermal transmittance 0.20 W/m?K': 'new build - average thermal transmittance', + 'Average thermal transmittance 0.32 W/m?K': 'new build - average thermal transmittance', + 'Average thermal transmittance 0.24 W/m-¦K': 'new build - average thermal transmittance', 'Cavity wall, with internal insulation': 'filled cavity', - 'Average thermal transmittance 0.17 W/m-¦K': 'unknown', 'Average thermal transmittance 0.28 W/m?K': 'unknown', + 'Average thermal transmittance 0.17 W/m-¦K': 'new build - average thermal transmittance', + 'Average thermal transmittance 0.28 W/m?K': 'new build - average thermal transmittance', 'new build - average thermal transmittance': 'new build - average thermal transmittance', - 'average thermal transmittance 0.25 w/m?k': 'unknown', + 'average thermal transmittance 0.25 w/m?k': 'new build - average thermal transmittance', 'cavity wall, as built, insulated (assumed)': 'filled cavity', - 'average thermal transmittance 0.31 w/m?k': 'unknown', + 'average thermal transmittance 0.31 w/m?k': 'new build - average thermal transmittance', 'cavity wall, as built, no insulation (assumed)': 'uninsulated cavity', - 'average thermal transmittance 0.30 w/m?k': 'unknown', 'average thermal transmittance 0.28 w/m-¦k': 'unknown', - 'average thermal transmittance 0.25 w/m-¦k': 'unknown', 'average thermal transmittance 0.21 w/m-¦k': 'unknown', - 'average thermal transmittance 0.20 w/m-¦k': 'unknown', 'average thermal transmittance 0.29 w/m?k': 'unknown', - 'average thermal transmittance 0.16 w/m?k': 'unknown', 'average thermal transmittance 0.27 w/m²k': 'unknown', - 'average thermal transmittance 0.15 w/m-¦k': 'unknown', 'average thermal transmittance 0.23 w/m-¦k': 'unknown', - 'average thermal transmittance 0.18 w/m?k': 'unknown', - 'granite or whin, with internal insulation': 'granite or whinstone', - 'average thermal transmittance 0.22 w/m-¦k': 'unknown', 'average thermal transmittance 0.24 w/m?k': 'unknown', - 'average thermal transmittance 0.16 w/m-¦k': 'unknown', 'average thermal transmittance 0.35 w/m?k': 'unknown', - 'average thermal transmittance 0.26 w/m-¦k': 'unknown', 'average thermal transmittance 0.62 w/m?k': 'unknown', - 'average thermal transmittance 0.64 w/m?k': 'unknown', 'average thermal transmittance 0.61 w/m?k': 'unknown', - 'sandstone or limestone, as built, no insulation (assumed)': 'sandstone or limestone', - 'average thermal transmittance 0.33 w/m?k': 'unknown', 'cavity wall,': "cavity unknown insulation", + 'average thermal transmittance 0.30 w/m?k': 'new build - average thermal transmittance', + 'average thermal transmittance 0.28 w/m-¦k': 'new build - average thermal transmittance', + 'average thermal transmittance 0.25 w/m-¦k': 'new build - average thermal transmittance', + 'average thermal transmittance 0.21 w/m-¦k': 'new build - average thermal transmittance', + 'average thermal transmittance 0.20 w/m-¦k': 'new build - average thermal transmittance', + 'average thermal transmittance 0.29 w/m?k': 'new build - average thermal transmittance', + 'average thermal transmittance 0.16 w/m?k': 'new build - average thermal transmittance', + 'average thermal transmittance 0.27 w/m²k': 'new build - average thermal transmittance', + 'average thermal transmittance 0.15 w/m-¦k': 'new build - average thermal transmittance', + 'average thermal transmittance 0.23 w/m-¦k': 'new build - average thermal transmittance', + 'average thermal transmittance 0.18 w/m?k': 'new build - average thermal transmittance', + 'granite or whin, with internal insulation': 'insulated granite or whinstone', + 'average thermal transmittance 0.22 w/m-¦k': 'new build - average thermal transmittance', + 'average thermal transmittance 0.24 w/m?k': 'new build - average thermal transmittance', + 'average thermal transmittance 0.16 w/m-¦k': 'new build - average thermal transmittance', + 'average thermal transmittance 0.35 w/m?k': 'new build - average thermal transmittance', + 'average thermal transmittance 0.26 w/m-¦k': 'new build - average thermal transmittance', + 'average thermal transmittance 0.62 w/m?k': 'new build - average thermal transmittance', + 'average thermal transmittance 0.64 w/m?k': 'new build - average thermal transmittance', + 'average thermal transmittance 0.61 w/m?k': 'new build - average thermal transmittance', + 'sandstone or limestone, as built, no insulation (assumed)': 'uninsulated sandstone or limestone', + 'average thermal transmittance 0.33 w/m?k': 'new build - average thermal transmittance', + 'cavity wall,': "cavity unknown insulation", 'cavity wall, as built, partial insulation (assumed)': 'partial insulated cavity', - 'average thermal transmittance 0.29 w/m-¦k': 'unknown', 'average thermal transmittance 0.32 w/m-¦k': 'unknown', - 'average thermal transmittance 0.19 w/m-¦k': 'unknown', 'average thermal transmittance 0.27 w/m?k': 'unknown', - 'average thermal transmittance 0.22 w/m?k': 'unknown', 'average thermal transmittance 0.38 w/m?k': 'unknown', - 'average thermal transmittance 0.26 w/m?k': 'unknown', 'average thermal transmittance 0.27 w/m-¦k': 'unknown', - 'average thermal transmittance 0.18 w/m-¦k': 'unknown', 'average thermal transmittance = 0.27 w/m?k': 'unknown', - 'cavity wall, with external insulation': 'filled cavity', 'average thermal transmittance 0.21 w/m?k': 'unknown', - 'average thermal transmittance 0.23 w/m?k': 'unknown', 'average thermal transmittance 0.20 w/m?k': 'unknown', - 'average thermal transmittance 0.32 w/m?k': 'unknown', 'average thermal transmittance 0.24 w/m-¦k': 'unknown', - 'cavity wall, with internal insulation': 'filled cavity', 'average thermal transmittance 0.17 w/m-¦k': 'unknown', - 'average thermal transmittance 0.28 w/m?k': 'unknown', + 'average thermal transmittance 0.29 w/m-¦k': 'new build - average thermal transmittance', + 'average thermal transmittance 0.32 w/m-¦k': 'new build - average thermal transmittance', + 'average thermal transmittance 0.19 w/m-¦k': 'new build - average thermal transmittance', + 'average thermal transmittance 0.27 w/m?k': 'new build - average thermal transmittance', + 'average thermal transmittance 0.22 w/m?k': 'new build - average thermal transmittance', + 'average thermal transmittance 0.38 w/m?k': 'new build - average thermal transmittance', + 'average thermal transmittance 0.26 w/m?k': 'new build - average thermal transmittance', + 'average thermal transmittance 0.27 w/m-¦k': 'new build - average thermal transmittance', + 'average thermal transmittance 0.18 w/m-¦k': 'new build - average thermal transmittance', + 'average thermal transmittance = 0.27 w/m?k': 'new build - average thermal transmittance', + 'cavity wall, with external insulation': 'filled cavity', + 'average thermal transmittance 0.21 w/m?k': 'new build - average thermal transmittance', + 'average thermal transmittance 0.23 w/m?k': 'new build - average thermal transmittance', + 'average thermal transmittance 0.20 w/m?k': 'new build - average thermal transmittance', + 'average thermal transmittance 0.32 w/m?k': 'new build - average thermal transmittance', + 'average thermal transmittance 0.24 w/m-¦k': 'new build - average thermal transmittance', + 'cavity wall, with internal insulation': 'filled cavity', + 'average thermal transmittance 0.17 w/m-¦k': 'new build - average thermal transmittance', + 'average thermal transmittance 0.28 w/m?k': 'new build - average thermal transmittance', 'Cavity wall, filled cavity': 'filled cavity', 'Cavity wall, filled cavity and external insulation': 'filled cavity', - 'Granite or whinstone, as built, no insulation (assumed)': 'granite or whinstone', + 'Granite or whinstone, as built, no insulation (assumed)': 'uninsulated granite or whinstone', 'Solid brick, as built, insulated (assumed)': 'insulated solid brick', 'Solid brick, as built, no insulation (assumed)': 'uninsulated solid brick', 'Solid brick, with external insulation': 'insulated solid brick', 'Solid brick, with internal insulation': 'insulated solid brick', - 'System built, as built, insulated (assumed)': 'system built', - 'System built, as built, no insulation (assumed)': 'system built', - 'System built, with external insulation': 'system built', - 'System built, with internal insulation': 'system built', - 'Timber frame, as built, insulated (assumed)': 'timber frame', - 'Timber frame, as built, no insulation (assumed)': 'timber frame', - 'Timber frame, as built, partial insulation (assumed)': 'timber frame', - 'Timber frame, with additional insulation': 'timber frame', + 'System built, as built, insulated (assumed)': 'insulated system built', + 'System built, as built, no insulation (assumed)': 'uninsulated system built', + 'System built, with external insulation': 'insulated system built', + 'System built, with internal insulation': 'insulated system built', + 'Timber frame, as built, insulated (assumed)': 'insulated timber frame', + 'Timber frame, as built, no insulation (assumed)': 'uninsulated timber frame', + 'Timber frame, as built, partial insulation (assumed)': 'insulated timber frame', + 'Timber frame, with additional insulation': 'insulated timber frame', 'CAVITY': 'cavity unknown insulation', 'COMB': 'unknown', 'NONE': 'unknown', 'NOTKNOWN': 'unknown', 'SOLID': 'solid brick unknown insulation', np.nan: 'unknown', - 'RENDER/TIMBER FRAME': 'timber frame', - 'SYSTEM BUILT': 'system built', + 'RENDER/TIMBER FRAME': 'timber frame unknown insulation', + 'SYSTEM BUILT': 'system built unknown insulation', 'PCC PANELS': 'other', 'NOT APPLICABLE - FLAT': 'unknown', - 'BRICK/TIMBER FRAME': 'timber frame', + 'BRICK/TIMBER FRAME': 'timber frame unknown insulation', 'BRICK/BLOCK CAVITY': 'cavity unknown insulation', - 'STONE SOLID': 'sandstone or limestone', - 'EXT CLADDING SYSTEM': 'system built', + 'STONE SOLID': 'sandstone or limestone unknown insulation', + 'EXT CLADDING SYSTEM': 'system built unknown insulation', 'BRICK/BLOCK SOLID': 'solid brick unknown insulation', - 'Cavity Filled cavity (with internal/external)': 'filled cavity', 'ND (inferred) Filled cavity': 'filled cavity', 'Cavity Filled cavity': 'filled cavity', 'Cavity Unknown insulation': 'cavity unknown insulation', - 'Timber frame As-built': 'timber frame', - 'System build Unknown insulation': 'system built', + 'Timber frame As-built': 'uninsulated timber frame', + 'System build Unknown insulation': 'system built unknown insulation', 'Cavity As-built': 'uninsulated cavity', - 'System build External': 'system built', + 'System build External': 'insulated system built', 'ND (inferred) ND (inferred)': 'unknown', 'Solid brick External': 'insulated solid brick', 'Cavity External': 'filled cavity', - 'System build As-built': 'system built', + 'System build As-built': 'uninsulated system built', 'Solid brick Internal': 'insulated solid brick', 'Cavity Internal': 'filled cavity', - 'System build Internal': 'system built', - 'Solid brick As-built': 'solid brick unknown insulation', - + 'System build Internal': 'insulated system built', + 'Solid brick As-built': 'uninsulated solid brick', 'Cavity ': 'cavity unknown insulation', 'Solid brick ': 'solid brick unknown insulation', 'Timber frame Timber frame (good insulation)': 'insulated timber frame', @@ -141,88 +182,156 @@ WALL_CONSTRUCTION_MAPPINGS = { 'Cavity: Unknown': 'cavity unknown insulation', 'Cavity: AsBuilt (Post 1995)': 'filled cavity', 'Cavity: AsBuilt (1976-1982)': 'cavity unknown insulation', - 'SystemBuilt: AsBuilt': 'system built', - 'TimberFrame: AsBuilt': "timber frame unknown insulation", - 'Cavity: AsBuilt (1983-1995)': 'cavity unknown insulation', + 'SystemBuilt: AsBuilt': 'uninsulated system built', + 'TimberFrame: AsBuilt': "uninsulated timber frame", + 'Cavity: AsBuilt (1983-1995)': 'filled cavity', 'Cavity: AsBuilt (1983-1995), Cavity: FilledCavity': 'filled cavity', - 'SolidBrick: AsBuilt': 'solid brick unknown insulation', + 'SolidBrick: AsBuilt': 'uninsulated solid brick', 'Cavity: FilledCavity': 'filled cavity', 'SolidBrick: Internal': 'insulated solid brick', 'Cavity: External': 'filled cavity', - 'Sandstone: Internal': 'sandstone or limestone', - 'Cavity: AsBuilt (Pre 1976)': 'cavity unknown insulation', - 'System build': 'system built', + 'Sandstone: Internal': 'insulated sandstone or limestone', + 'Cavity: AsBuilt (Pre 1976)': 'uninsulated cavity', + 'System build': 'system built unknown insulation', 'Solid brick': 'solid brick unknown insulation', - 'Stone': 'sandstone or limestone', + 'Stone': 'sandstone or limestone unknown insulation', 'Timber frame': 'timber frame unknown insulation', '2017 onwards': 'new build - average thermal transmittance', 'ND (inferred)': 'unknown', - 'Flat / maisonette': 'other', - 'Other': 'other', + 'Flat / maisonette': 'unknown', + 'Other': 'unknown', 'Timber Frame': 'timber frame unknown insulation', 'Cavity Wall': 'cavity unknown insulation', - 'Non-Traditional': 'system built', - 'PRC': 'system built', - 'Cross Wall': 'system built', + 'Non-Traditional': 'system built unknown insulation', + 'PRC': 'system built unknown insulation', + 'Cross Wall': 'system built unknown insulation', 'Solid Wall': 'solid brick unknown insulation', 'Traditional': 'unknown', 'Solid': 'solid brick unknown insulation', - 'Wates no fines': 'system built', - 'Concrete Frame': 'system built', - 'PRCWATES': 'system built', - 'Refurbished Cornish': 'system built', + 'Wates no fines': 'system built unknown insulation', + 'Concrete Frame': 'system built unknown insulation', + 'PRCWATES': 'system built unknown insulation', + 'Refurbished Cornish': 'system built unknown insulation', 'Bailey Stratton': 'other', - 'Refurbished Reema': 'system built', - 'PRCREEMA': 'system built', - 'Trustsell Type': 'system built', + 'Refurbished Reema': 'system built unknown insulation', + 'PRCREEMA': 'system built unknown insulation', + 'Trustsell Type': 'system built unknown insulation', 'Petra Nissan': 'unknown', - 'Reinstated Airey': 'system built', - 'Refurbished Airey': 'system built', + 'Reinstated Airey': 'system built unknown insulation', + 'Refurbished Airey': 'system built unknown insulation', # From Abri- slightly unclear on types but not a large portion of the data - 'No Fines Type': 'system built', - 'Refurbished Unity': 'system built', + 'No Fines Type': 'system built unknown insulation', + 'Refurbished Unity': 'system built unknown insulation', 'Timber Framed': 'timber frame unknown insulation', - 'Refurbished Woolaway': 'system built', + 'Refurbished Woolaway': 'system built unknown insulation', 'Modern Methods of Construction': 'other', - 'BISF - Brit Iron & Steel Federation': 'system built', - 'Steel Framed': 'system built', + 'BISF - Brit Iron & Steel Federation': 'system built unknown insulation', + 'Steel Framed': 'system built unknown insulation', 'Timber Framed with confirmed Fire Stopping': 'timber frame unknown insulation', - 'Sipporex': 'system built', + 'Sipporex': 'system built unknown insulation', - 'Wates': 'system built', - 'Bryants': 'system built', - 'Gregory (Crosswall)': 'system built', - 'Rsmit': 'system built', - 'Dorman Long': 'system built', - 'Tarmac': 'system built', - 'RBIS': 'system built', - 'Five Oaks': 'system built', + 'Wates': 'system built unknown insulation', + 'Bryants': 'system built unknown insulation', + 'Gregory (Crosswall)': 'system built unknown insulation', + 'Rsmit': 'system built unknown insulation', + 'Dorman Long': 'system built unknown insulation', + 'Tarmac': 'system built unknown insulation', + 'RBIS': 'system built unknown insulation', + 'Five Oaks': 'system built unknown insulation', 'Not known': 'unknown', - 'Smiths': 'system built', - 'Kendrick': 'system built', - 'IDC': 'system built', - 'Wimpey (Part Brick)': 'system built', - 'Whitehall': 'system built', - 'Wimpey': 'system built', - 'Bison': 'system built', - 'Zinns': 'system built', - 'Bisf': 'system built', - 'Integer': 'system built', - 'Cornish': 'system built', - 'Rwate': 'system built', - 'Hill Presweld Steel': 'system built', + 'Smiths': 'system built unknown insulation', + 'Kendrick': 'system built unknown insulation', + 'IDC': 'system built unknown insulation', + 'Wimpey (Part Brick)': 'system built unknown insulation', + 'Whitehall': 'system built unknown insulation', + 'Wimpey': 'system built unknown insulation', + 'Bison': 'system built unknown insulation', + 'Zinns': 'system built unknown insulation', + 'Bisf': 'system built unknown insulation', + 'Integer': 'system built unknown insulation', + 'Cornish': 'system built unknown insulation', + 'Rwate': 'system built unknown insulation', + 'Hill Presweld Steel': 'system built unknown insulation', 'Cavity Filled Cavity': 'filled cavity', 'Cavity Unknown': 'cavity unknown insulation', 'Cavity Filled Cavity (internal)': 'filled cavity', '': 'unknown', 'Cavity Internal Insulation': 'filled cavity', 'Cavity As Built': "uninsulated cavity", - 'Non Trad Large Panel System': 'system built', - 'Non Trad Cornish': 'system built', - 'Non Trad Reema': 'system built', + 'Non Trad Large Panel System': 'system built unknown insulation', + 'Non Trad Cornish': 'system built unknown insulation', + 'Non Trad Reema': 'system built unknown insulation', 'Traditional Cavity Brickwork': 'cavity unknown insulation', - 'System build (undefined)': 'system built', - 'Non Trad Wimpey': 'system built', - 'Non Trad Wates': 'system built' + 'System build (undefined)': 'system built unknown insulation', + 'Non Trad Wimpey': 'system built unknown insulation', + 'Non Trad Wates': 'system built unknown insulation', + 'CAVITY FILLED 270MM': 'filled cavity', + 'CAVITY FILLED 270MM': 'filled cavity', + 'CAVITY FILLED 250MM': 'filled cavity', + 'CAVITY FILLED 260MM': 'filled cavity', + 'CAVITY FILLED 260MM': 'filled cavity', + 'SOLID A/B 220MM': 'solid brick unknown insulation', + 'CAVITY A/B 300MM': "uninsulated cavity", + 'CAVITY A/B 250MM': "uninsulated cavity", + 'CAVITY A/B 260MM': "uninsulated cavity", + 'CAVITY A/B 270MM': "uninsulated cavity", + 'SOLID BRICK/CAVITY EXT': 'solid brick unknown insulation', + 'CAVITY EWI': 'filled cavity', + 'SANDSTONE/CAVITY EXT': 'sandstone or limestone unknown insulation', + 'SYSTEM BUILD 100MM EWI': 'insulated system built', + 'CAVITY A/B 260MM': "uninsulated cavity", + 'CAVITY A/B 270MM': "uninsulated cavity", + 'CAVITY A/B 250MM': "uninsulated cavity", + 'System': 'system built unknown insulation', + 'Sandstone/Limestone': 'sandstone or limestone unknown insulation', + 'No Fines': 'system built unknown insulation', + 'Granite/Whinstone': 'granite or whinstone unknown insulation', + 'Not applicable to this asset type': 'unknown', + 'Steel Frame': 'system built unknown insulation', + 'Solid Wall As Built': 'uninsulated solid brick', + 'Solid As Built': 'uninsulated solid brick', + 'Cavity: FilledCavity, Cavity: Unknown': 'filled cavity', + 'Cavity: AsBuilt (Pre 1976), TimberFrame: Unknown': 'uninsulated cavity', + 'SolidBrick: AsBuilt, SolidBrick: Unknown': 'uninsulated solid brick', + 'Cavity: FilledCavity, SolidBrick: Unknown': 'filled cavity', + 'Cavity: AsBuilt (Pre 1976), SolidBrick: Unknown': 'uninsulated cavity', + 'Cavity: FilledCavity, TimberFrame: Unknown': 'filled cavity', + 'Cavity: AsBuilt (1976-1982), Cavity: Unknown': 'uninsulated cavity', + 'Cavity: Unknown, SolidBrick: AsBuilt': 'cavity unknown insulation', + 'Cavity: AsBuilt (1976-1982), Cavity: FilledCavity': 'filled cavity', + 'Cavity: External, Cavity: FilledCavity': 'filled cavity', + 'Cavity: AsBuilt (Post 1995), TimberFrame: AsBuilt': 'filled cavity', + 'TimberFrame: AsBuilt, TimberFrame: Internal': 'timber frame unknown insulation', + 'GraniteOrWhinstone: AsBuilt': 'uninsulated granite or whinstone', + 'Cavity: AsBuilt (Post 1995), Cavity: FilledCavity, SolidBrick: Internal': 'filled cavity', + 'Cavity: AsBuilt (Pre 1976), Cavity: FilledCavity': 'filled cavity', + 'SolidBrick: AsBuilt, SolidBrick: External': 'insulated solid brick', + 'Cavity: AsBuilt (Post 1995), Cavity: FilledCavity': 'filled cavity', + 'Cavity: FilledCavity, SolidBrick: Internal': 'filled cavity', + 'Cavity: AsBuilt (Post 1995), Cavity: FilledCavity, SolidBrick: Unknown': 'filled cavity', + 'Cavity: AsBuilt (Pre 1976), SolidBrick: AsBuilt': 'uninsulated cavity', + 'Cavity: AsBuilt (1976-1982), SolidBrick: AsBuilt': 'filled cavity', + + 'Cavity: FilledCavity, SolidBrick: AsBuilt': 'filled cavity', + 'SolidBrick: External': 'insulated solid brick', + 'Cavity: FilledCavity, Cavity: Internal': 'filled cavity', + 'Cavity: External, SolidBrick: AsBuilt': 'filled cavity', + 'SolidBrick: AsBuilt, TimberFrame: AsBuilt': 'uninsulated solid brick', + 'Cavity: FilledCavity, SystemBuilt: AsBuilt': 'filled cavity', + 'Cavity: AsBuilt (1976-1982), SystemBuilt: AsBuilt': 'system built', + 'Cavity: AsBuilt (Post 1995), SolidBrick: AsBuilt': 'filled cavity', + 'Cavity: AsBuilt (1983-1995), TimberFrame: AsBuilt': 'filled cavity', + 'SystemBuilt: AsBuilt, TimberFrame: AsBuilt': 'uninsulated system built', + 'TimberFrame: Internal': 'insulated timber frame', + 'Cavity: Internal': 'filled cavity', + 'SystemBuilt: External': 'filled cavity', + 'Cavity: AsBuilt (Pre 1976), SystemBuilt: AsBuilt': 'uninsulated cavity', + 'SystemBuilt: Internal': 'insulated system built', + 'Cavity: AsBuilt (1983-1995), SolidBrick: AsBuilt': 'solid brick unknown insulation', + 'Cavity: AsBuilt (Pre 1976), TimberFrame: AsBuilt': 'timber frame unknown insulation', + 'SolidBrick: AsBuilt, SolidBrick: Internal': 'uninsulated solid brick', + 'Cavity: FilledCavity, TimberFrame: AsBuilt': 'filled cavity', + 'Cavity: FilledCavity, SolidBrick: AsBuilt, SolidBrick: Internal': 'filled cavity', + 'Cavity: Internal, SolidBrick: AsBuilt': 'filled cavity', } diff --git a/asset_list/requirements.txt b/asset_list/requirements.txt index 99943397..b68706be 100644 --- a/asset_list/requirements.txt +++ b/asset_list/requirements.txt @@ -6,7 +6,10 @@ epc-api-python==1.0.2 thefuzz boto3 openpyxl -openai +openai>=1.3.5 tiktoken msgpack -beautifulsoup4 \ No newline at end of file +beautifulsoup4 +pydantic>=1.10.7 +typing-extensions>=4.5.0 +requests>=2.28.2 diff --git a/asset_list/utils.py b/asset_list/utils.py index ff9db3f8..fe2b7d14 100644 --- a/asset_list/utils.py +++ b/asset_list/utils.py @@ -1,5 +1,5 @@ import time -import numpy as np +import random import pandas as pd from backend.SearchEpc import SearchEpc from etl.find_my_epc.RetrieveFindMyEpc import RetrieveFindMyEpc @@ -37,7 +37,9 @@ def get_data( "mid-terrace": "Mid-Terrace", "end-terrace": "End-Terrace", "semi-detached": "Semi-Detached", - "detached": "Detached" + "detached": "Detached", + "enclosed end-terrace": "End-Terrace", + "enclosed mid-terrace": "Mid-Terrace", } epc_data = [] @@ -79,7 +81,13 @@ def get_data( uprn=uprn ) # Force the skipping of estimating the EPC - searcher.ordnance_survey_client.property_type = None + # We check if the property was split + if home.get("is_expended_block"): + searcher.ordnance_survey_client.property_type = "Flat" + searcher.property_type = "Flat" + searcher.set_strict_property_type_search() + else: + searcher.ordnance_survey_client.property_type = None searcher.ordnance_survey_client.built_form = None searcher.find_property(skip_os=True) @@ -95,7 +103,6 @@ def get_data( else: # Try splitting on space add1 = full_address.split(" ")[0].strip() - else: add1 = str(house_number) searcher = SearchEpc( @@ -166,7 +173,7 @@ def get_data( find_epc_data = {} except Exception as e: raise Exception(f"Error retrieving FindMyEPC data: {e}") - time.sleep(np.random.uniform(0.1, 1)) + time.sleep(random.sample(range(50, 100), 1)[0] / 100) epc = { row_id_name: home[row_id_name], @@ -176,6 +183,11 @@ def get_data( } epc_data.append(epc) + + if len(epc_data) % 50 == 0 and len(epc_data) > 0: + logger.info("Sleeping for 10 seconds to avoid hitting API rate limit") + time.sleep(10) + except Exception as e: errors.append(home[row_id_name]) time.sleep(5) diff --git a/backend/Funding.py b/backend/Funding.py index f5f85b9f..49d2d293 100644 --- a/backend/Funding.py +++ b/backend/Funding.py @@ -5,7 +5,7 @@ from typing import List from backend.app.plan.schemas import HousingType -class Funding: +class FundingOld: """ Given a property, this class identifies if the home is possibly eligible for funding under the various funding schemes. It will also calculate the expected amount of funding available @@ -411,3 +411,190 @@ class Funding: self.gbis() # self.eco4() self.whlg() + + +class Funding: + """ + New class to handle funding calculation + """ + + def __init__( + self, + tenure: HousingType, + social_cavity_abs_rate: float, + social_solid_abs_rate: float, + private_cavity_abs_rate: float, + private_solid_abs_rate: float, + project_scores_matrix, + whlg_eligible_postcodes + ): + self.tenure = tenure + self.social_cavity_abs_rate = social_cavity_abs_rate + self.social_solid_abs_rate = social_solid_abs_rate + self.private_cavity_abs_rate = private_cavity_abs_rate + self.private_solid_abs_rate = private_solid_abs_rate + + self.starting_sap_band = None + self.ending_sap_band = None + self.floor_area_band = None + self.project_scores_matrix = project_scores_matrix + self.whlg_eligible_postcodes = whlg_eligible_postcodes + + @staticmethod + def get_sap_band(sap_score_number): + bands = [ + ("High_A", 96, float("inf")), + ("Low_A", 92, 96), + ("High_B", 86, 92), + ("Low_B", 81, 86), + ("High_C", 74.5, 81), + ("Low_C", 69, 74.5), + ("High_D", 61.5, 69), + ("Low_D", 55, 61.5), + ("High_E", 46.5, 55), + ("Low_E", 39, 46.5), + ("High_F", 29.5, 39), + ("Low_F", 21, 29.5), + ("High_G", 10.5, 21), + ("Low_G", 1, 10.5), + ] + + for band, lower, upper in bands: + if lower <= sap_score_number < upper: + return band + + return None + + @staticmethod + def get_floor_area_band(floor_area): + if floor_area <= 72: + return "0-72" + + if floor_area <= 97: + return "73-97" + + if floor_area <= 199: + return "98-199" + + return "200" + + @staticmethod + def eco4_prs_eligibility( + starting_sap: int, measures: List, mainheat_description: str, heating_control_description: str + ): + """ + Handles the eligibility criteria for private rental properties under eco + :return: + """ + + # Help to heat group + # 1) EPC E - G + # 2) Must receive one of SWI, FTCH, renewable heating or DHC + # 3) Tenant must be on benefits + + # We don't consider the tenant being on benefits - we just notify the end user that this is a requirement + + meets_epc = starting_sap <= 54 + has_solid_wall = "internal_wall_insulation" in measures or "external_wall_insulation" in measures + # We check if the property has a heating system that means solar pv counts as a renewable heating system + + has_eligible_electric_heating = any(x in mainheat_description for x in [ + "air source heat pump", "ground source heat pump", "boiler and radiators, electric" + ]) | (("electric storage heaters" in mainheat_description) and + (heating_control_description.lower() == "controls for high heat retention storage heaters") + ) + + # Counts as renewable heating + solar_renweable_heating = has_eligible_electric_heating & ("solar_pv" in measures) + # Is a renewable heating + ashp = "air_source_heat_pump" in measures + + if meets_epc & (solar_renweable_heating or ashp or has_solid_wall): + return True + + return False + + def calculate_full_project_abs(self): + + # Filter the project scores matrix + data = self.project_scores_matrix[ + (self.project_scores_matrix["Floor Area Segment"] == self.floor_area_band) & + (self.project_scores_matrix["Starting Band"] == self.starting_sap_band) & + (self.project_scores_matrix["Finishing Band"] == self.ending_sap_band) + ] + + if data.emtpy: + raise ValueError("Missing abs rate, check the project scores matrix") + + return data["Cost Savings"].values[0] + + def check_funding( + self, measures: List, + starting_sap: int, + ending_sap: int, + floor_area: float, + mainheat_description: str, + heating_control_description: str, + is_cavity: bool + ): + """ + Given a list of measures, this function will check if the package of measures is fundable + :param measures: + :param starting_sap: + :param ending_sap: + :param floor_area: + :param mainheat_description: + :param heating_control_description: + :param is_cavity: Indicates if the property has cavity wall insulation + :return: + """ + + # If it's an E or D, should get to an EPC C + if starting_sap >= 55 and ending_sap < 69: + raise NotImplementedError("This property doesn't have sufficient SAP movement") + + if starting_sap <= 38 & ending_sap <= 55: + # F or G should get to D + raise NotImplementedError("Implement F or G to D eligibility") + + self.starting_sap_band = self.get_sap_band(starting_sap) + self.ending_sap_band = self.get_sap_band(ending_sap) + self.floor_area_band = self.get_floor_area_band(floor_area) + + ######################## + # Private + ######################## + # 1) ECO4 + # 2) GBIS + + if self.tenure == "Private": + is_eco4_eligible = self.eco4_prs_eligibility( + starting_sap=starting_sap, + measures=measures, + mainheat_description=mainheat_description, + heating_control_description=heating_control_description + ) + + # Need to implement + # 1) Package has to include an insulation measure + # 2) We should use the funding for the measure that has the largest partial project score + is_gbis_eligible = () + + if not is_eco4_eligible: + return + eco4_abs = self.calculate_full_project_abs() + # We estimate rates now + eco4_funding = ( + eco4_abs * self.private_cavity_abs_rate if is_cavity else eco4_abs & self.private_solid_abs_rate + ) + + ######################## + # Social + ######################## + # 1) ECO4 + # 2) GBIS + + if self.tenure == "Social": + pass + + raise NotImplementedError("Only implemented for Private or Social housing") diff --git a/backend/Property.py b/backend/Property.py index 91c1265a..22eb2fc3 100644 --- a/backend/Property.py +++ b/backend/Property.py @@ -217,6 +217,9 @@ class Property: self.eco4_eligibility = None self.whlg_eligibility = None + # Ventilation + self.has_ventilation = self.identify_ventilation() + @classmethod def extract_kwargs(cls, kwargs): """ @@ -1197,7 +1200,7 @@ class Property: self.heating_energy_source = self.heating_energy_source[0] if self.heating_energy_source == "Varied (Community Scheme)": - if self.main_fuel["fuel_type"] == "mains gas": + if self.main_fuel["fuel_type"] in ["mains gas", None]: # We assume when None as it's unknown self.heating_energy_source = "Natural Gas (Community Scheme)" else: raise Exception("Implement me") @@ -1233,6 +1236,13 @@ class Property: if "air_source_heat_pump" not in measures: return False + # If we have a house over a floor area threshold, we recommend an ASHP + if ( + self.data["property-type"] in ["House", "Bungalow"] and + self.floor_area > assumptions.ASHP_FLOOR_AREA_THRESHOLD + ): + return True + suitable_house = self.data["property-type"] == "House" and self.data["built-form"] in [ "Detached", "Semi-Detached", "End-Terrace", ] @@ -1342,3 +1352,12 @@ class Property: self.gbis_eligibiltiy = funding_calulator.gbis_eligibiltiy self.eco4_eligibility = funding_calulator.eco4_eligibility self.whlg_eligibility = funding_calulator.whlg_eligibility + + def identify_ventilation(self): + + ventilation_descriptions = [ + 'mechanical, extract only', + 'mechanical, supply and extract' + ] + + return self.data["mechanical-ventilation"] in ventilation_descriptions diff --git a/backend/SearchEpc.py b/backend/SearchEpc.py index 0010191a..16dd8f04 100644 --- a/backend/SearchEpc.py +++ b/backend/SearchEpc.py @@ -160,6 +160,9 @@ class SearchEpc: """ Address lines 1 and postcode are mandatory fields. The other address lines are optional but can be used to find the epc for the home, if address1 and postcode are insufficient + + If you wish to run a strict property type search, please run set_strict_property_type_search() + :param address1: string, propery's address line 1 :param postcode: string, propery's postcode :param full_address: string, optional parameter, the full address of the property @@ -189,6 +192,7 @@ class SearchEpc: self.older_epcs = None self.full_sap_epc = None self.metadata = None + self.strict_property_type_search = False # These are the address and postcode values, which we store in the database self.address_clean = None @@ -199,6 +203,14 @@ class SearchEpc: self.property_type = property_type self.fast = fast + def set_strict_property_type_search(self): + """ + This method sets the strict property type search flag to True. When this flag is set, the search will + only return results that match the specified property type. + :return: + """ + self.strict_property_type_search = True + @staticmethod def get_house_number(address: str, postcode=None) -> str | None: """ @@ -315,6 +327,8 @@ class SearchEpc: address_params["address"] = self.address1 if self.postcode: address_params["postcode"] = self.postcode + if self.strict_property_type_search and self.property_type: + address_params["property-type"] = self.property_type.lower() # We attempt the search with uprn params @@ -365,11 +379,16 @@ class SearchEpc: unique_property_types = {r["property-type"] for r in rows} + is_just_a_house = (len(unique_property_types) == 1) & ( + ("House" in unique_property_types) | ("Bungalow" in unique_property_types) + ) + # We allow for variation in property type across flats/maisonettes # If we know that we have a flat/maisonette, we allow for both property types - if property_type in ["Flat", "Maisonette"]: - if ((len(uprns) == 1) and ((len(unique_property_types) == 1) - ) or unique_property_types == {"Flat", "Maisonette"}): + # Make sure we have not JUST a house, or not JUST a flat/maisonette + if property_type in ["Flat", "Maisonette"] and not is_just_a_house: + if (((len(uprns) == 1) and ((len(unique_property_types) == 1) + ) or unique_property_types == {"Flat", "Maisonette"})): return rows if property_type is not None: @@ -424,6 +443,8 @@ class SearchEpc: return rows + raise ValueError("property type and address cannot both be None, at least one must be provided") + @staticmethod def format_address(newest_epc): """ @@ -702,6 +723,18 @@ class SearchEpc: exclude_old=exclude_old ) + # Check if it's a new build EPC. A property that doesn't have an EPC is not going to be a new build + # so we avoid comparing it to new builds + # TODO - this is experimental + newer_age_bands = [ + "England and Wales: 1996-2002", "England and Wales: 2003-2006", "England and Wales: 2007-2011", + "England and Wales: 2012 onwards" + ] + + if (~epc_data["construction-age-band"].isin(newer_age_bands)).sum(): + # We have some older age bands, so we need to filter them out + epc_data = epc_data[~epc_data["construction-age-band"].isin(newer_age_bands)].copy() + # If we have missing lodgment date, we fill it with inspection-date epc_data["lodgement-datetime"] = epc_data["lodgement-datetime"].fillna(epc_data["inspection-date"]) # If we still have missing dates, we set it to the mean of the non NA dates diff --git a/backend/app/assumptions.py b/backend/app/assumptions.py index f1090ef3..d36266d3 100644 --- a/backend/app/assumptions.py +++ b/backend/app/assumptions.py @@ -58,6 +58,19 @@ DESCRIPTIONS_TO_FUEL_TYPES = { "Room heaters, wood logs": {"fuel": "Wood Logs", "cop": 1}, "Boiler and radiators, coal": {"fuel": "Coal", "cop": 0.85}, "From main system, no cylinderstat": {"fuel": "Natural Gas", "cop": 0.85}, + "Room heaters, coal": {"fuel": "Coal", "cop": 0.85}, + "Electric underfloor heating, Electric storage heaters": {"fuel": "Electricity", "cop": 1}, + 'Room heaters, electric, Boiler and radiators, mains gas': {"fuel": "Natural Gas", "cop": 0.85}, + 'Boiler and radiators, mains gas, Boiler and radiators, mains gas': {"fuel": "Natural Gas", "cop": 0.85}, + 'Room heaters, electric, Electric storage heaters': {"fuel": "Electricity", "cop": 1}, + "Boiler and radiators, mains gas, Electric storage heaters": {"fuel": "Natural Gas", "cop": 0.85}, + "Boiler and radiators, anthracite": {"fuel": "Anthracite", "cop": 0.85}, + 'Electric immersion, off-peak, plus solar': {"fuel": "Electricity + Solar Thermal", "cop": 1}, + 'Ground source heat pump, radiators, electric': { + "fuel": "Electricity", "cop": AVERAGE_ASHP_EFFICIENCY / 100 + }, + 'Electric instantaneous at point of use, plus solar': {"fuel": "Electricity + Solar Thermal", "cop": 1}, + "Electric storage heaters, Room heaters, electric": {"fuel": "Electricity", "cop": 1}, } # These are the measure types where if there is a ventilation recommendation, we force the inclusion of it @@ -65,3 +78,6 @@ DESCRIPTIONS_TO_FUEL_TYPES = { measures_needing_ventilation = [ "internal_wall_insulation", "external_wall_insulation", "cavity_wall_insulation" ] + +# If we have a property beyond this size, we assume it's likely large enough to have an ASHP +ASHP_FLOOR_AREA_THRESHOLD = 120 # m2 diff --git a/backend/app/plan/schemas.py b/backend/app/plan/schemas.py index 48300f2a..6b8b192d 100644 --- a/backend/app/plan/schemas.py +++ b/backend/app/plan/schemas.py @@ -96,3 +96,7 @@ class PlanTriggerRequest(BaseModel): # When performing a remote assessment, if this has been set, it will allow the engine to # pull data from the find my epc website, to utilise as part of a remote assessment event_type: Optional[Literal["remote_assessment"]] = None + + # If true, before optimising the engine will select a slightly larger package, to account for the SAP 10 causing + # scores to drop by a few points + simulate_sap_10: Optional[bool] = False diff --git a/backend/engine/engine.py b/backend/engine/engine.py index 58c3dc8e..d631e349 100644 --- a/backend/engine/engine.py +++ b/backend/engine/engine.py @@ -30,7 +30,6 @@ import backend.app.assumptions as assumptions from backend.ml_models.api import ModelApi from backend.Property import Property -from backend.Funding import Funding from backend.apis.GoogleSolarApi import GoogleSolarApi from recommendations.optimiser.CostOptimiser import CostOptimiser @@ -507,7 +506,7 @@ async def model_engine(body: PlanTriggerRequest): ) # if we have a remote assment data type, we pull the additional data and include it - if body.event_type == "remote_assessment": + if (body.event_type == "remote_assessment") and not (epc_searcher.newest_epc.get("estimated")): logger.info("Retrieving find my epc data") try: property_non_invasive_recommendations, patch = RetrieveFindMyEpc.get_from_epc( @@ -728,7 +727,8 @@ async def model_engine(body: PlanTriggerRequest): # Additionally, if we have required measures, they should also be included. Therefore # we can discount the number of points required to get to the target SAP band (or increase) # in the case of ventilation - needs_ventilation = any(x in property_measure_types for x in assumptions.measures_needing_ventilation) + needs_ventilation = any( + x in property_measure_types for x in assumptions.measures_needing_ventilation) and not p.has_ventilation input_measures = prepare_input_measures(measures_to_optimise, body.goal, needs_ventilation) @@ -772,6 +772,10 @@ async def model_engine(body: PlanTriggerRequest): epc_to_sap_lower_bound(body.goal_value) - current_sap_points ) - fixed_gain + if body.simulate_sap_10: + # We add 3 additional SAP points to the required gain to account for SAP 10 + sap_gain += 3 + if not body.optimise: if body.goal != "Increasing EPC": raise NotImplementedError("Only EPC optimisation is currently supported") @@ -826,7 +830,11 @@ async def model_engine(body: PlanTriggerRequest): ) # If wall insulation is selected, we also include mechanical ventilation as a best practice measure - if any(x in [r["type"] for r in solution] for x in assumptions.measures_needing_ventilation): + ventilation_selected = [ + r for r in solution if "+mechanical_ventilation" in r["type"] + ] + if (any(x in [r["type"] for r in solution] for x in assumptions.measures_needing_ventilation) or + len(ventilation_selected)): ventilation_rec = next( (r[0] for r in recommendations[p.id] if r[0]["type"] == "mechanical_ventilation"), None diff --git a/backend/ml_models/AnnualBillSavings.py b/backend/ml_models/AnnualBillSavings.py index b22837d8..4291b1d1 100644 --- a/backend/ml_models/AnnualBillSavings.py +++ b/backend/ml_models/AnnualBillSavings.py @@ -28,8 +28,8 @@ class AnnualBillSavings: # Latest price cap figures from Ofgem are for April 2024 # https://www.ofgem.gov.uk/energy-price-cap - ELECTRICITY_PRICE_CAP = 0.2486 - GAS_PRICE_CAP = 0.0634 + ELECTRICITY_PRICE_CAP = 0.2573 + GAS_PRICE_CAP = 0.0633 # This is the most recent export payment figure, at 9.28p/kWh # Smart export guarantee rates can be found here: # https://www.sunsave.energy/solar-panels-advice/exporting-to-the-grid/best-seg-rates @@ -39,8 +39,8 @@ class AnnualBillSavings: PRICE_FACTOR = 0.09549999999999999 # Daily standard charge, based on average across England, Scotland and Wales, and includes VAT - DAILY_STANDARD_CHARGE_GAS = 0.3165 - DAILY_STANDARD_CHARGE_ELECTRICITY = 0.6097 + DAILY_STANDARD_CHARGE_GAS = 0.2982 + DAILY_STANDARD_CHARGE_ELECTRICITY = 0.5137 # Based on https://www.nottenergy.com/advice-and-tools/project-energy-cost-comparison # For July 2024. These quotes are based on the east midlands region, so we diff --git a/backend/tests/test_funding.py b/backend/tests/test_funding.py new file mode 100644 index 00000000..311ab589 --- /dev/null +++ b/backend/tests/test_funding.py @@ -0,0 +1,52 @@ +import pytest +import pandas as pd +from utils.s3 import read_csv_from_s3 +from backend.Funding import Funding + + +def get_funding_data(): + """ + This function retrieves the eco project scores matrix and the warm homes local grant funding data + :return: + """ + project_scores_matrix = read_csv_from_s3( + bucket_name="retrofit-data-dev", + filepath="funding/ECO4 Full Project Scores Matrix.csv", + ) + project_scores_matrix = pd.DataFrame(project_scores_matrix) + project_scores_matrix.columns = ['Floor Area Segment', 'Starting Band', 'Finishing Band', 'Cost Savings'] + project_scores_matrix["Cost Savings"] = project_scores_matrix["Cost Savings"].astype(float) + + whlg_eligible_postcodes = read_csv_from_s3( + bucket_name="retrofit-data-dev", + filepath="funding/whlg eligible postcodes.csv", + ) + whlg_eligible_postcodes = pd.DataFrame(whlg_eligible_postcodes) + + return project_scores_matrix, whlg_eligible_postcodes + + +class TestFunding: + + def test_prs(self): + eco_project_scores_matrix, whlg_eligible_postcodes = get_funding_data() + funding = Funding( + project_scores_matrix=eco_project_scores_matrix, + whlg_eligible_postcodes=whlg_eligible_postcodes, + social_cavity_abs_rate=13.5, + social_solid_abs_rate=17, + private_cavity_abs_rate=13.5, + private_solid_abs_rate=17, + tenure="Private", + ) + + measures_1 = ["internal_wall_insulation", "solar_pv"] + funding.check_funding( + measures=measures_1, + starting_sap=54, + ending_sap=69, + floor_area=73, + mainheat_description="Boiler and radiators, mains gas", + heating_control_description="Programmer, room thermostat and TRVs", + is_cavity=True + ) diff --git a/etl/customers/Brentwood/compile_new_asset_list.py b/etl/customers/Brentwood/compile_new_asset_list.py new file mode 100644 index 00000000..e3ced5ab --- /dev/null +++ b/etl/customers/Brentwood/compile_new_asset_list.py @@ -0,0 +1,38 @@ +""" +Brentwood sent us a new asset list in July 2025. This script will combine the data in the new asset list with the +old, so we have a single picture +""" + +import pandas as pd + +new_asset_list = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Brentwood/July 2025 New Programme/All Assets " + "29.05.2025.xlsx", + sheet_name="Sheet1", + header=1 +) + +old_asset_list = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Brentwood/July 2025 New Programme/BRENTWOOD Asset " + "list.xlsx", + sheet_name="Asset List" +) + +# We combine based on the data we want +compiled = new_asset_list.merge( + old_asset_list[["UPRN", "Asset Type", "Year Built", "Dwelling", "Bedrooms", "Ownership", 'Asbestos Full Survey', + 'Stock Condition Survey', 'Cat', 'Heating', + 'WFT Findings', 'ECO Eligibility', 'CIGA Requested', 'CIGA Guarantee', + 'ECO Survey completed']], + how="left", + on="UPRN" +) + +compiled["WFT Findings"] = compiled["WFT Findings"].fillna("Not Inspected") + +# Store this data +compiled.to_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Brentwood/July 2025 New Programme/20250710 Asset List " + "Brentwood.xlsx", + index=False +) diff --git a/etl/customers/Colchester/July 2025 Finalised Route.py b/etl/customers/Colchester/July 2025 Finalised Route.py new file mode 100644 index 00000000..f3ecf2d9 --- /dev/null +++ b/etl/customers/Colchester/July 2025 Finalised Route.py @@ -0,0 +1,54 @@ +import pandas as pd + +comments_df = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Colchester/July Finalised " + "Project/CBH_RetroTeamList_amended_25-06-05.xlsx", +) + +cavity_route = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Colchester/July Finalised " + "Project/20250708 Colchester Borough Homes- Standardised.xlsx", + sheet_name="July 2025 Route - Cavity" +) + +solar_route = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Colchester/July Finalised " + "Project/20250708 Colchester Borough Homes- Standardised.xlsx", + sheet_name="July 2025 Route - Solar" +) + +# Merge on the comments +comments = comments_df[ + ["URPN", 'Unnamed: 6', 'SHDF Live', 'SHDF Removed', 'SHDF Reserve', '25-26 List (138 to EPC)'] +].copy() + +cavity_route = cavity_route.merge( + comments, left_on="landlord_property_id", right_on="URPN", how="left" +) +solar_route = solar_route.merge( + comments, left_on="landlord_property_id", right_on="URPN", how="left" +) + +# Get properties that are not on either route +not_on_routes = comments_df[ + ~comments_df["URPN"].isin(cavity_route["landlord_property_id"]) & + ~comments_df["URPN"].isin(solar_route["landlord_property_id"]) + ] + +# Store +not_on_routes.to_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Colchester/July Finalised " + "Project/Properties not on routes.xlsx", + index=False +) +# Save the routes +cavity_route.to_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Colchester/July Finalised " + "Project/Cavity Route.xlsx", + index=False +) +solar_route.to_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Colchester/July Finalised " + "Project/Solar Route.xlsx", + index=False +) diff --git a/etl/customers/Futures Housing/validation_surveys.py b/etl/customers/Futures Housing/validation_surveys.py new file mode 100644 index 00000000..1f8e6cfa --- /dev/null +++ b/etl/customers/Futures Housing/validation_surveys.py @@ -0,0 +1,167 @@ +import pandas as pd + + +def get_band(sap_score_number): + bands = [ + ("High_A", 96, float("inf")), + ("Low_A", 92, 96), + ("High_B", 86, 92), + ("Low_B", 81, 86), + ("High_C", 74.5, 81), + ("Low_C", 69, 74.5), + ("High_D", 61.5, 69), + ("Low_D", 55, 61.5), + ("High_E", 46.5, 55), + ("Low_E", 39, 46.5), + ("High_F", 29.5, 39), + ("Low_F", 21, 29.5), + ("High_G", 10.5, 21), + ("Low_G", 1, 10.5), + ] + + for band, lower, upper in bands: + if lower <= sap_score_number < upper: + return band + + return None + + +def classify_floor_area(floor_area): + if floor_area <= 72: + return "0-72" + + if floor_area <= 97: + return "73-97" + + if floor_area <= 199: + return "98-199" + + return "200+" + + +asset_list = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Wates - Futures Housing/ECO 4 Wates - Standardised.xlsx", + sheet_name="Standardised Asset List" +) + +asset_list["starting_sap_band"] = asset_list["epc_sap_score_on_register"].apply(get_band) +asset_list["floor_area_band"] = asset_list["epc_total_floor_area"].apply(classify_floor_area) + +# Objective: +# We need to get a reasonable estimate for the cost of works for properties that are EPC D or below +# +# Therefore: +# 1) We know that some properties that are currently EPC C may* qualify for ECO4 funding. Right now, we aren't trying +# to determine which EPC C properties or above will qualify, just how much works will cost for properties that do +# qualify +# 2) We cannot survey everything, so before we undetake too much risk we should produce some costings for each of the +# archetypes +# +# Driving Factors: +# 1) Floor area band & starting SAP band - this will determine how much funding is produced +# 2) Heating system - this will determine if the property needs a heating upgrade or not + + +archetypes = asset_list[asset_list["epc_sap_score_on_register"] <= 68].groupby( + ["floor_area_band", "starting_sap_band", "landlord_heating_system"] +)["landlord_property_id"].nunique().reset_index() +archetypes = archetypes.rename(columns={"landlord_property_id": "n_properties"}) +archetypes = archetypes.sort_values("n_properties", ascending=False) +archetypes["running_total"] = archetypes["n_properties"].cumsum() +archetypes["cumulative_percentage"] = archetypes["running_total"] / archetypes["n_properties"].sum() * 100 + +archetypes["is_electric"] = archetypes["landlord_heating_system"] != "boiler - other fuel" +archetypes["needs_heating_upgrade"] = archetypes["landlord_heating_system"].isin( + ["boiler - other fuel", "electric storage heaters"] +) +archetypes = archetypes.reset_index(drop=True) + +# Right now, they don't want to treat the oil properties so we'll exclude them for the moment +electric_heated_archetypes = ( + archetypes[archetypes["landlord_heating_system"] != "boiler - other fuel"].copy().reset_index(drop=True) +) +electric_heated_archetypes["running_total"] = electric_heated_archetypes["n_properties"].cumsum() +electric_heated_archetypes["cumulative_percentage"] = ( + electric_heated_archetypes["running_total"] / electric_heated_archetypes["n_properties"].sum() * 100 +) + +# The main properties that need validation surveys are properties that require a heating upgrade +electric_heated_archetypes = electric_heated_archetypes[electric_heated_archetypes["needs_heating_upgrade"]] +electric_heated_archetypes = electric_heated_archetypes.merge( + archetypes[["starting_sap_band", "floor_area_band", "landlord_heating_system", "archetype_id"]], + how="left", on=["starting_sap_band", "floor_area_band", "landlord_heating_system"] +) + +oil_archetypes = archetypes[ + archetypes["landlord_heating_system"] == "boiler - other fuel" + ].copy().reset_index(drop=True) + +archetypes["archetype_id"] = archetypes.index + +asset_list = asset_list.merge( + archetypes[["starting_sap_band", "floor_area_band", "landlord_heating_system", "archetype_id"]], + how="left", on=["starting_sap_band", "floor_area_band", "landlord_heating_system"] +) + +properties_for_verification = asset_list[ + asset_list["archetype_id"].isin(electric_heated_archetypes["archetype_id"].values) +].copy() +properties_for_verification["postal_region"] = properties_for_verification["domna_postcode"].str.split(" ").str[ + 0].str.strip() + +properties_for_verification["epc_age"] = ( + pd.Timestamp.now() - pd.to_datetime(properties_for_verification["epc_inspection_date"]) +).dt.days + +# We also survey 2 oil heater properties, so we take the 2 most prevelant archetypes +archetypes_for_survey = pd.concat( + [electric_heated_archetypes, oil_archetypes.head(2)] +) + +# Take the property with the oldest EPC, by region. Prioritise estimated properties +sample = [] +for _, config in archetypes_for_survey.iterrows(): + properties = asset_list[ + (asset_list["archetype_id"] == config["archetype_id"]) & + (asset_list["floor_area_band"] == config["floor_area_band"]) & + (asset_list["starting_sap_band"] == config["starting_sap_band"]) + ] + + if pd.isnull(properties["epc_inspection_date"]).sum(): + sample_property = properties[pd.isnull(properties["epc_inspection_date"])].head(1).to_dict("records") + else: + # Take the property with the oldest EPC + sample_property = properties.sort_values("epc_inspection_date", ascending=True).head(1).to_dict("records") + + sample.extend(sample_property) + +sample = pd.DataFrame(sample) + +sample = sample[ + [ + "landlord_property_id", "epc_inspection_date", "epc_sap_score_on_register", "starting_sap_band", + "floor_area_band", "landlord_heating_system", "domna_postcode", "domna_full_address", "archetype_id" + ] +] + +archetypes = asset_list[["landlord_property_id", "archetype_id"]].copy() +archetypes["archetype_id"] = archetypes["archetype_id"].astype(str) + +filename = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Wates - Futures Housing/archetypes.xlsx" +# Store the data in two tabs. One for the asset list with the EPC data and the second with the flat data + +with pd.ExcelWriter(filename) as writer: + archetypes.to_excel(writer, sheet_name="Archetypes", index=False) + sample.to_excel(writer, sheet_name="Survey Sample", index=False) + +# We store this + +# Questions: +# 1) If futures are considering changing properties that have oil heating systems, we could include them and +# we have 39 total archetypes. Otherwise, we have 25 archetypes +# 2) Can futures provide us with any information on the model of air source heat pumps and associated controls they're +# using + +# Recommendations: +# 1) If they are willing to upgrade the heating systems of the oil properties, surveying 18 properties will cover +# diff --git a/etl/customers/acis/solid_wall_funding.py b/etl/customers/acis/solid_wall_funding.py new file mode 100644 index 00000000..5515b29c --- /dev/null +++ b/etl/customers/acis/solid_wall_funding.py @@ -0,0 +1,144 @@ +import os +import pandas as pd +import numpy as np +from dotenv import load_dotenv +from etl.find_my_epc.AssetListEpcData import AssetListEpcData +from backend.Funding import Funding +from backend.app.utils import sap_to_epc +from recommendations.recommendation_utils import estimate_external_wall_area + +load_dotenv(dotenv_path="backend/.env") +EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN") + +abs_matrix = pd.read_csv( + "/Users/khalimconn-kowlessar/Downloads/ECO4 Full Project Scores Matrix.csv" +) +pps_matrix = pd.read_excel( + "/Users/khalimconn-kowlessar/Downloads/ECO4 Partial Project Scores Matrix v5.xlsx", + header=1 +) +pps_matrix.columns = [c.strip() for c in pps_matrix.columns] + +asset_list = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/ACIS/Solid Wall Properties - Standardised_2.xlsx", + sheet_name="Standardised Asset List" +) + +asset_list = asset_list.rename( + columns={"domna_address_1": "address", "domna_postcode": "postcode"} +) +asset_list["address"] = asset_list["address"].astype(str) + +# Pull the find my EPC data and get the SAP points for solid wall +asset_list_epc_client = AssetListEpcData( + asset_list=asset_list, + epc_auth_token=EPC_AUTH_TOKEN +) +asset_list_epc_client.get_data() +asset_list_epc_client.get_non_invasive_recommendations() +# We pull out solid wall insulation +solid_wall_sap_points = [] +for r in asset_list_epc_client.non_invasive_recommendations: + solid_recommendations = [ + x for x in r["recommendations"] if ("internal_wall_insulation" in x["type"]) or ( + "external_wall_insulation" in x["type"] + ) + ] + if solid_recommendations: + solid_recommendations = solid_recommendations[0] + else: + continue + + address = r["address"] + postcode = r["postcode"] + + solid_wall_sap_points.append( + { + "address": address, + "postcode": postcode, + "sap_points": solid_recommendations["sap_points"] + } + ) + +solid_wall_sap_points = pd.DataFrame(solid_wall_sap_points) +avg_points = solid_wall_sap_points["sap_points"].median() + +asset_list = asset_list.merge(solid_wall_sap_points, how="left", on=["address", "postcode"]) +asset_list["sap_points"] = asset_list["sap_points"].fillna(avg_points) +asset_list["post_works_sap"] = asset_list["epc_sap_score_on_register"] + asset_list["sap_points"] +asset_list["post_works_epc"] = asset_list["post_works_sap"].apply(lambda x: sap_to_epc(x)) +asset_list["starting_half_band"] = asset_list["epc_sap_score_on_register"].apply(lambda x: Funding.get_sap_band(x)) +asset_list["ending_half_band"] = asset_list["post_works_sap"].apply(lambda x: Funding.get_sap_band(x)) +asset_list["floor_area_band"] = asset_list["epc_total_floor_area"].apply(lambda x: Funding.get_floor_area_band(x)) + +asset_list["funding_scheme"] = np.where( + ( + (asset_list["post_works_epc"] == asset_list["epc_rating_on_register"]) + ), + "GBIS", + "ECO4" +) + +# Merge on the ABS matrix +asset_list = asset_list.merge( + abs_matrix, how="left", left_on=["starting_half_band", "ending_half_band", "floor_area_band"], + right_on=['Starting Band', 'Finishing Band', 'Floor Area Segment', ] +) +asset_list = asset_list.drop(columns=['Starting Band', 'Finishing Band', 'Floor Area Segment']) + +# store for backup +# asset_list.to_csv( +# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/ACIS/Solid Wall Properties - +# Standardised_2_with_funding.csv", +# index=False +# ) + +# For GBIS, we use the PPS +# Almost all properties are gas + +# Using IWI solid 1.7 -> 0.3 rates +pps_matrix = pps_matrix[ + pps_matrix["Measure_Type"].isin(["IWI_solid_1.7_0.3"]) +] + +# Merge on +asset_list = asset_list.merge( + pps_matrix[['Starting Band', 'Total Floor Area Band', 'Cost Savings']].rename( + columns={ + "Cost Savings": "partial_project_score", + "Starting Band": "starting_half_band", + "Total Floor Area Band": "floor_area_band" + } + ), + how="left", + on=["starting_half_band", "floor_area_band"], +) +asset_list["partial_project_score"] = np.where( + asset_list["starting_half_band"].isin(["Low_C", "High_C"]), + None, + asset_list["partial_project_score"] +) + +asset_list["funding_abs"] = np.where( + asset_list["funding_scheme"] == "GBIS", + asset_list["partial_project_score"], + asset_list["Cost Savings"] +) + +asset_list["heat_loss_area"] = asset_list.apply( + lambda x: estimate_external_wall_area( + num_floors=x["attribute_est_number_floors"], + floor_height=( + float(x["epc_floor_height"]) if + not pd.isnull(x["epc_floor_height"]) else 2.5 + ), + perimeter=x["attribute_est_perimter"], + built_form=x["epc_archetype"] + ), + axis=1 +) + +filename = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/ACIS/20250624 ACIS solid wall - standardised.xlsx" + +with pd.ExcelWriter(filename) as writer: + asset_list.to_excel(writer, sheet_name="Standardised Asset List", index=False) diff --git a/etl/customers/blakeridge_mill/data.py b/etl/customers/blakeridge_mill/data.py new file mode 100644 index 00000000..c9d7f9e6 --- /dev/null +++ b/etl/customers/blakeridge_mill/data.py @@ -0,0 +1,49 @@ +# Get units for postcodes WF17 8RA, WF17 8RB +import os + +import pandas as pd +from epc_api.client import EpcClient +from dotenv import load_dotenv + +load_dotenv(dotenv_path="backend/.env") +EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN") + +postcodes = [ + "WF17 8RA", + "WF17 8RB", +] + +client = EpcClient(auth_token=EPC_AUTH_TOKEN) + +data = [] +for postcode in postcodes: + resp = client.domestic.search( + params={"postcode": postcode, "address": None, "local-authority": None, "property-type": None, + "floor-area": None, + "energy-band": None, "from-month": None, "from-year": None, "to-month": None, "to-year": None, + 'constituency': None}, + size=1000 + ) + data.extend(resp["rows"]) + +df = pd.DataFrame(data) +# Get newest field by UPRN, inspection-date +df["inspection-date"] = pd.to_datetime(df["inspection-date"]) +df = df.sort_values(by=["uprn", "inspection-date"], ascending=[True, False]) +df = df.drop_duplicates(subset=["uprn"], keep="first") + +df.to_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Blakeridge Mill/blakeridge_mill_epc_data.xlsx", index=False +) + +df = df[df["address"] != "The Tower Blakeridge Mill, Upper Blakeridge Lane"] +df["walls-description"].value_counts() +df["roof-description"].value_counts() + +df["total-floor-area"].astype(float).mean() +df["current-energy-efficiency"] = pd.to_numeric(df["current-energy-efficiency"], errors='coerce') + +df.groupby("transaction-type")["current-energy-efficiency"].mean() +df["transaction-type"].value_counts() + +df[df["transaction-type"] == "rental"]["built-form"].value_counts() diff --git a/etl/customers/bromford/solar_pv_cleanup.py b/etl/customers/bromford/solar_pv_cleanup.py new file mode 100644 index 00000000..c2c541da --- /dev/null +++ b/etl/customers/bromford/solar_pv_cleanup.py @@ -0,0 +1,289 @@ +import pandas as pd +from tqdm import tqdm +from backend.SearchEpc import SearchEpc +import numpy as np + +contact_list = pd.read_csv( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot Upload/Bromford - Solar " + "PV address list - second wave KLD - PP.csv" +) +contact_list["house_no"] = contact_list.apply(lambda x: SearchEpc.get_house_number( + address=str(x["Address 1: Street 1"]).strip(), + postcode=str(x["Postal Code"]).strip(), +), axis=1) + +asset_list = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot Upload/asset_list - " + "Standardised (1).xlsx", + sheet_name="Standardised Asset List" +) + +lookup = [] +missed = [] +for _, x in tqdm(contact_list.iterrows(), total=len(contact_list)): + + if x["Address 1: Street 1"] == '1 The Beck': + lookup.append( + { + "UPRN": x["UPRN"], + "landlord_property_id": 40692, + } + ) + continue + + if x["Address 1: Street 1"] == '3 The Beck ': + lookup.append( + { + "UPRN": x["UPRN"], + "landlord_property_id": 40693, + } + ) + continue + + if x["Address 1: Street 1"] == '2 Orchard Close ': + lookup.append( + { + "UPRN": x["UPRN"], + "landlord_property_id": 7924, + } + ) + continue + + if x["Address 1: Street 1"] == '2 Orchard Close ': + lookup.append( + { + "UPRN": x["UPRN"], + "landlord_property_id": 7924, + } + ) + continue + + if x["Address 1: Street 1"] == '3 Croxall Road': + lookup.append( + { + "UPRN": x["UPRN"], + "landlord_property_id": 40650, + } + ) + continue + + if x["Address 1: Street 1"] == '4 Ward Road ': + lookup.append( + { + "UPRN": x["UPRN"], + "landlord_property_id": 33175, + } + ) + continue + + df = asset_list[ + asset_list["domna_full_address"].str.replace(",", "").str.contains(x["Address 1: Street 1"].strip()) & + asset_list["domna_postcode"].str.contains(x["Postal Code"].strip()) + ] + + if df.shape[0] != 1: + df = asset_list[ + asset_list["domna_full_address"].str.replace(",", "") == x["Address 1: Street 1"].strip() & + asset_list["domna_postcode"].str.contains(x["Postal Code"].strip()) + ] + + if df.shape[0] != 1: + df = asset_list[ + (asset_list["domna_address_1"].astype(str) == str(x["house_no"])) & + (asset_list["domna_postcode"].str.contains(x["Postal Code"].strip()) == True) + ] + + if df.shape[0] != 1: + missed.append(x["UPRN"]) + continue + + lookup.append( + { + "UPRN": x["UPRN"], + "landlord_property_id": df["landlord_property_id"].values[0], + } + ) + +lookup = pd.DataFrame(lookup) + +contact_list = contact_list.merge(lookup, how="left", on="UPRN") +# Store +contact_list.to_csv( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot Upload/Bromford - Solar " + "PV address list - second wave KLD - PP with landlord_property_id.csv", + index=False +) + +# I manually completed the lookup for the missed ones. We now read it back in and pull in the properties for the +# stndardised asset list +contacts_complete = pd.read_csv( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot Upload/Bromford - Solar " + "PV address list - second wave KLD - PP with landlord_property_id.csv" +) + +new_data = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot Upload/Master Sheet " + "Solar PV installs.xlsx", + sheet_name="Sheet1" +) + +contact_list = contact_list.merge( + new_data, + how="left", + left_on="UPRN", + right_on="CE UPRN" +) +route = asset_list[ + asset_list["landlord_property_id"].isin(contact_list["Legacy UPRN"].astype("Int64").astype(str)) +].copy() + +# Add the new heating data +contact_list["Legacy UPRN"] = contact_list["Legacy UPRN"].astype("Int64").astype(str) +route2 = contact_list.merge( + route, + how="left", + right_on="landlord_property_id", + left_on="Legacy UPRN" +) + +# Because I did a data pull, we can fill the other bits of information +missed = contact_list[~contact_list["Legacy UPRN"].isin(route["landlord_property_id"].astype(int))] + +# Store both the route and missed +route2.to_csv( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot Upload/route.csv", + index=False +) + +# Add on phone number +contact_details_filepath = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme " + "Hubspot Upload/Hubspot/Bromford - Solar PV address list - second wave KLD - PP with " + "landlord_property_id.xlsx") + +contacts_filenames = [ + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot Upload/contact " + "details/FAO Paul Contact Details-Table 1.csv", + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot Upload/contact " + "details/Green Contact Details-Table 1.csv", + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot Upload/contact " + "details/Main Contact Details-Table 1.csv", +] + +merge_to = pd.read_excel(contact_details_filepath) + +lookup = [] +for fn in contacts_filenames: + df = pd.read_csv(fn, encoding="utf-8-sig") + # Merge on phone + details = df[ + df["Property Reference Number (Main Address) (Property)"].isin(merge_to["UPRN"].astype(str)) + ][[ + "Property Reference Number (Main Address) (Property)", "Landline", "Mobile Phone", "Email Address", + "First Name", "Last Name" + ]] + + lookup.append(details) + +lookup = pd.concat(lookup) + +# Drop entries where landline, mobile and email are all NaN +lookup = lookup.dropna(subset=["Landline", "Mobile Phone", "Email Address"], how="all") +lookup = lookup.drop_duplicates(["Landline", "Mobile Phone", "Email Address"]) +# Sort so email is first, then landline, then mobile +lookup = lookup.sort_values( + ["Property Reference Number (Main Address) (Property)", "Email Address", "Landline", "Mobile Phone"], + ascending=[True, True, True, True] +) + +# Store +lookup.to_csv( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot Upload/Hubspot/contact " + "details.csv", + index=False +) + +lookup2 = [] +for _, x in lookup.groupby("Property Reference Number (Main Address) (Property)"): + + # We any entries have an email, we take that + if x["Email Address"].notna().any(): + x = x[x["Email Address"].notna()] + # We then take the entry with a phone number + if x["Landline"].notna().any() or x["Mobile Phone"].notna().any(): + x = x[x["Landline"].notna() | x["Mobile Phone"].notna()] + + # Take the first entry + x = x.iloc[0] + lookup2.append(x) + +lookup2 = pd.DataFrame(lookup2) + +import pandas as pd + +# Sample structure based on your columns +columns = ['Property Reference Number (Main Address) (Property)', 'Landline', 'Mobile Phone', 'Email Address'] + +# Simulating example input DataFrame +# In practice, you would use: lookup = pd.read_csv(...) or similar +lookup = pd.DataFrame(columns=columns) + +# Grouping and transforming +results = [] + +for prop_id, group in lookup.groupby("Property Reference Number (Main Address) (Property)"): + # Filter rows with any contact information + filtered = group[ + group["Email Address"].notna() & + (group["Landline"].notna() | group["Mobile Phone"].notna()) + ] + + if filtered.empty: + continue + + # Sort by presence of phone numbers (prioritize those with both) + filtered["contact_score"] = ( + filtered["Landline"].notna().astype(int) + + filtered["Mobile Phone"].notna().astype(int) + ) + filtered = filtered.sort_values("contact_score", ascending=False) + + primary = filtered.iloc[0] + # Make sure secondary is not the same as primary + if not pd.isnull(primary["Mobile Phone"]): + secondary = filtered[ + (filtered["Mobile Phone"] != primary["Mobile Phone"]) + ] + elif not pd.isnull(primary["Landline"]): + secondary = filtered[ + (filtered["Landline"] != primary["Landline"]) + ] + else: + raise Exception("Look at me") + + secondary = filtered.iloc[1] if len(filtered) > 1 else None + + results.append({ + "Property ID": prop_id, + "Primary Email": primary["Email Address"], + "Primary Phone": primary["Mobile Phone"] or primary["Landline"], + "Secondary Email": secondary["Email Address"] if secondary is not None else None, + "Secondary Phone": secondary["Mobile Phone"] or secondary["Landline"] if secondary is not None else None, + }) + +final_df = pd.DataFrame(results) + +import ace_tools as tools; + +tools.display_dataframe_to_user(name="Cleaned Contact Lookup", dataframe=final_df) + +# We set up primary and secondary phone numbers. We use mobile as the primary + + +# We have duplicates, we prioritise entries, by ID, that have a email +lookup2 = lookup.sort_values("Property Reference Number (Main Address) (Property)").drop_duplicates( + "Property Reference Number (Main Address) (Property)", keep="last" +) + +# TODO: Get into the standardised asset list format +# TODO: Add the deal postcode to Hubspot +# TODO: Upload the deal postcode diff --git a/etl/customers/cambridge/surveys.py b/etl/customers/cambridge/surveys.py new file mode 100644 index 00000000..2aa52d6f --- /dev/null +++ b/etl/customers/cambridge/surveys.py @@ -0,0 +1,24 @@ +import pandas as pd +from backend.ml_models.Valuation import PropertyValuation +from backend.app.utils import sap_to_epc + +# Read in the survey data +surveys = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Cambridge/Survey Data.xlsx", + sheet_name="Survey data", +) + +increases = [] +for _, x in surveys.iterrows(): + current_epc = sap_to_epc(x["Pre SAP"]) + target_epc = sap_to_epc(x["Scenario 1 Post SAP"]) + current_value = x["Valuation"] + + val = PropertyValuation.estimate_valuation_improvement( + current_value, + current_epc, + target_epc, + total_cost=None + ) + avg_increase = val["average_increase"] + increases.append(round(avg_increase)) diff --git a/etl/customers/ealing/fixing houses asset list.py b/etl/customers/ealing/fixing houses asset list.py new file mode 100644 index 00000000..4a39428a --- /dev/null +++ b/etl/customers/ealing/fixing houses asset list.py @@ -0,0 +1,45 @@ +import pandas as pd + +houses_list = pd.read_csv( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing/Ealing BC - HOUSES(UNCHECKED).csv" +) + +features = pd.read_csv( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing/Ealing BC - HOUSES(IGNORE - FULL MAIN).csv" +) +features = features.drop( + columns=[ + 'Archetype', 'Construction', 'Insulated', 'Material', + 'CIGA Check Required', 'PV, ACCESS ISSUE, SEE NOTES', + 'OFF GAS - ROOF ORIENTATION', 'Any further surveyor notes', 'Surveyors Name', + 'Unnamed: 30', 'Unnamed: 31' + ] +) + +demolitions = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing/Ealing - Demolished or due to be.xlsx", + sheet_name="Demolished or due to be" +) + +inspections_data = houses_list[ + [ + "Property ref", "Postcode", 'Archetype', 'Construction', 'Insulated', 'Material', + 'CIGA Check Required', 'PV, ACCESS ISSUE, SEE NOTES', + 'OFF GAS - ROOF ORIENTATION', 'Any further surveyor notes', 'YET TO BE SURVEYED' + ] +].rename(columns={"YET TO BE SURVEYED": "Surveyors Name"}) + +asset_list = features.drop( + columns=[ + 'Archetype', 'Construction', 'Insulated', 'Material', 'CIGA Check Required', + 'PV, ACCESS ISSUE, SEE NOTES', 'OFF GAS - ROOF ORIENTATION', + 'Any further surveyor notes', 'Surveyors Name', "Postcode" + ] +).merge( + inspections_data, + how="inner", + on="Property ref", +) + +asset_list.to_csv("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing/Ealing_rechecked_cleaned_05042025.csv", + index=False) diff --git a/etl/customers/ealing/prepare_for_hubspot.py b/etl/customers/ealing/prepare_for_hubspot.py new file mode 100644 index 00000000..8cffda57 --- /dev/null +++ b/etl/customers/ealing/prepare_for_hubspot.py @@ -0,0 +1,75 @@ +import numpy as np +import pandas as pd +from asset_list.hubspot.config import HubspotProcessStatus + +project_data = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing/Hubspot/Ealing Flats Completion Tracker JW " + "170625.xlsx", + sheet_name="All_Flats" +) + +project_data["hubspot_status"] = None +project_data["hubspot_status"] = np.where( + (project_data["Status"] == "Submitted") & (project_data["PAS"] == "PAS2023"), + HubspotProcessStatus.SURVEYED_COMPLETED_SIGNED_OFF.label, + project_data["hubspot_status"] +) +project_data["hubspot_status"] = np.where( + (project_data["Status"] == "Submitted") & (project_data["PAS"] == "PAS2019"), + "SURVEYED UNDER 2019 - NEEDS RE-SURVEY", + project_data["hubspot_status"] +) +project_data["project_code"] = "EALING-FLATS-" + project_data["Block Ref"].astype(str) + +asset_list = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing/Hubspot/20250707 Ealing Flats - Standardised.xlsx", + sheet_name="Standardised Asset List" +) +asset_list["landlord_property_id"] = asset_list["landlord_property_id"].astype(str) +asset_list["incorrect_landlord_property_id"] = asset_list["incorrect_landlord_property_id"].astype(str) +project_data["Property ref"] = project_data["Property ref"].astype(str) + +# We need to update the status of properties that already been surveyed +asset_list2 = asset_list.merge( + project_data[["Property ref", "hubspot_status", "project_code"]], + how="left", + right_on="Property ref", + left_on="incorrect_landlord_property_id", + suffixes=("", "_project") +) +asset_list2["hubspot_status"] = np.where( + ~pd.isna(asset_list2["hubspot_status_project"]), + asset_list2["hubspot_status_project"], + asset_list2["hubspot_status"] +) +asset_list2["project_code"] = np.where( + ~pd.isna(asset_list2["project_code"]), + asset_list2["project_code"], + asset_list2["landlord_property_id"] +) + +asset_list2 = asset_list2.drop(columns=["hubspot_status_project", "project_code_project"]) +asset_list2["cavity_reason"] = np.where( + pd.isnull(asset_list2["cavity_reason"]), + "Non-Intrusive Data Shows Empty Cavity: SAP Rating 55-68", + asset_list2["cavity_reason"] +) +asset_list2["solar_reason"] = None + +# Read in block analysis and geographical areas from standardised asset list +block_analysis_df = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing/Hubspot/20250707 Ealing Flats - Standardised.xlsx", + sheet_name="Block Analysis" +) +geographical_areas = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing/Hubspot/20250707 Ealing Flats - Standardised.xlsx", + sheet_name="Geographical Areas" +) + +# Update the new standardised asset list +filename = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing/Hubspot/20250707 Ealing Flats - Prepared " + "programme.xlsx") +with pd.ExcelWriter(filename) as writer: + asset_list2.to_excel(writer, sheet_name="Standardised Asset List", index=False) + block_analysis_df.to_excel(writer, sheet_name="Block Analysis", index=False) + geographical_areas.to_excel(writer, sheet_name="Geographical Areas", index=False) diff --git a/etl/customers/l_and_g/risk_matrix.py b/etl/customers/l_and_g/risk_matrix.py index c800117e..8f5451fc 100644 --- a/etl/customers/l_and_g/risk_matrix.py +++ b/etl/customers/l_and_g/risk_matrix.py @@ -81,6 +81,7 @@ def app(): # We need to calculate the costs cost_data = [] for _, row in epr_data.iterrows(): + epc = row["EPC"][0] sap = int(row["EPC"][1:]) diff --git a/etl/customers/mhs/new_programme.py b/etl/customers/mhs/new_programme.py new file mode 100644 index 00000000..6f1caafe --- /dev/null +++ b/etl/customers/mhs/new_programme.py @@ -0,0 +1,116 @@ +# +import pandas as pd + +asset_list = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/New programme/12052025 MHS Standardised Asset List - " + "programme.xlsx", + sheet_name="Standardised Asset List" +) + +new_cavity_programme = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/New programme/12052025 MHS Standardised Asset List - " + "programme.xlsx", + sheet_name="New Cavity Programme" +) + +new_cavity_pilot = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/New programme/12052025 MHS Standardised Asset List - " + "programme.xlsx", + sheet_name="Empty Cavity Pilot" +) + +new_solar_programme = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/New programme/12052025 MHS Standardised Asset List - " + "programme.xlsx", + sheet_name="New Solar Programme" +) + +in_fill_properties_houses = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/New programme/Domna updated programme list May 2025 (" + "1).xlsx", + sheet_name="Houses and Bungalows" +) +in_fill_properties_flats = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/New programme/Domna updated programme list May 2025 (" + "1).xlsx", + sheet_name="Flats and Maistonettes" +) +# Q1) What are these properties? Do we have them on our list already? +# All of the houses are already in the asset list +in_fill_properties_houses["is_in_asset_list"] = in_fill_properties_houses["UPRN"].isin( + asset_list["landlord_property_id"].values +) +# All of the flats are already in the asset list +in_fill_properties_flats["is_in_asset_list"] = in_fill_properties_flats["UPRN"].isin( + asset_list["landlord_property_id"].values +) + +# Q2) Which properties are excluded from the new programme? +in_fill_properties = pd.concat( + [in_fill_properties_houses, in_fill_properties_flats], ignore_index=True, sort=False +) + +# Merge on the data +in_fill_properties = in_fill_properties.merge( + asset_list, + left_on="UPRN", + right_on="landlord_property_id", + how="left" +) +# How many properties are in the new programme? + +in_fill_properties["in_new_cavity_programme"] = in_fill_properties["UPRN"].isin( + new_cavity_programme["landlord_property_id"].values +) +in_fill_properties["in_new_solar_programme"] = in_fill_properties["UPRN"].isin( + new_solar_programme["landlord_property_id"].values +) +in_fill_properties["in_new_cavity_pilot"] = in_fill_properties["UPRN"].isin( + new_cavity_pilot["landlord_property_id"].values +) +not_in_new_programme = in_fill_properties[ + (~in_fill_properties["in_new_cavity_programme"] & ~in_fill_properties["in_new_solar_programme"] & ~ + in_fill_properties["in_new_cavity_pilot"]) +].copy() + +# Why? +not_in_new_programme["cavity_reason"].value_counts() +not_in_new_programme["solar_reason"].value_counts() + +not_identified_for_anything = not_in_new_programme[ + pd.isnull(not_in_new_programme["cavity_reason"]) & + pd.isnull(not_in_new_programme["solar_reason"]) + ] + +# Flag the potential re-inspections which is 994 properties though any extractions we need to consider the HA funding +# the extraction +not_in_new_programme["funded_extractions"] = not_in_new_programme["cavity_reason"].isin( + [ + "Non-Intrusive Data Shows Cavity Extraction: SAP Rating 69-75", + "Non-Intrusive Data Shows Cavity Extraction: SAP Rating 55-68", + "Non-Intrusive Data Shows Cavity Extraction: SAP Rating 76 or more", + "Non-Intrusive Data Shows Cavity Extraction: SAP Rating 54 or less", + "EPC Shows Empty Cavity, inspections show non-cavity build: SAP Rating 76 or more", + "EPC Shows Empty Cavity, inspections show non-cavity build: SAP Rating 54 or less", + "EPC Shows Empty Cavity, inspections show retro drilled: SAP Rating 54 or less", + "EPC Shows Empty Cavity, inspections show retro drilled: SAP Rating 76 or more", + ] +) + +not_in_new_programme["excluded"] = not_identified_for_anything["landlord_property_id"].isin( + not_identified_for_anything["landlord_property_id"].values +) + +not_in_new_programme[ + not_in_new_programme["funded_extractions"] +].to_csv( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/New programme/funded_extractions.csv", + index=False +) + +not_in_new_programme[ + not_in_new_programme["excluded"] == True + ].to_csv( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/New programme/excluded_properties.csv", + index=False +) diff --git a/etl/customers/ncha/portfolio.py b/etl/customers/ncha/portfolio.py new file mode 100644 index 00000000..f47c87c8 --- /dev/null +++ b/etl/customers/ncha/portfolio.py @@ -0,0 +1,14 @@ +import pandas as pd + +cavity = pd.read_excel( + "/Users/khalimconn-kowlessar/Downloads/Energy Information MASTER June 2025 - Standardised.xlsx", + sheet_name="Cavity Properties (for review)", +) +solar = pd.read_excel( + "/Users/khalimconn-kowlessar/Downloads/Energy Information MASTER June 2025 - Standardised.xlsx", + sheet_name="Solar Properties", +) + +cavity_al = cavity[["domna_address_1", "domna_postcode", "epc_os_uprn"]].rename( + columns={"domna_address_1": "address", "domna_postcode": "postcode", "epc_os_uprn": "uprn"} +) diff --git a/etl/customers/places_for_people/abs.py b/etl/customers/places_for_people/abs.py new file mode 100644 index 00000000..aa85a93f --- /dev/null +++ b/etl/customers/places_for_people/abs.py @@ -0,0 +1,199 @@ +""" +This script is to calculate the ABS for the Places for People London project +""" + +import os +import pandas as pd + +# London +pfp_london_cav = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/abs " + "rates/PFP_areas_surrounding_london_reviewed_standardised_15052025.xlsx", + sheet_name="Cav Route", + header=1 +) +pfp_london_cav = pfp_london_cav.rename(columns={"Route": "Route March"}) +pfp_london_pv = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/abs " + "rates/PFP_areas_surrounding_london_reviewed_standardised_15052025.xlsx", + sheet_name="PV Route", + header=1 +) +pfp_london_pv = pfp_london_pv.rename(columns={"Route": "Route March"}) +pfp_london_cav["location"] = "London" +pfp_london_pv["location"] = "London" +# East +pfp_east_cav = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/abs " + "rates/PFP_east_reviewed_standarised_15052025.xlsx", + sheet_name="Cav Route", + header=1 +) +pfp_east_cav = pfp_east_cav.rename(columns={"Route": "Route March"}) +pfp_east_pv = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/abs " + "rates/PFP_east_reviewed_standarised_15052025.xlsx", + sheet_name="PV Route", + header=1 +) +pfp_east_pv = pfp_east_pv.rename(columns={"Route": "Route March"}) +pfp_east_cav["location"] = "East" +pfp_east_pv["location"] = "East" +# North east +pfp_north_east_cav = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/abs " + "rates/PFP_north_east_reviewed_standardised_15052025.xlsx", + sheet_name="Cav Route", + header=1 +) +pfp_north_east_cav = pfp_north_east_cav.rename(columns={"Route": "Route March"}) +pfp_north_east_pv = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/abs " + "rates/PFP_north_east_reviewed_standardised_15052025.xlsx", + sheet_name="PV Route", + header=1 +) +pfp_north_east_pv = pfp_north_east_pv.rename(columns={"Route": "Route March"}) +pfp_north_east_cav["location"] = "North East" +pfp_north_east_pv["location"] = "North East" +# North West +pfp_north_west_cav = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/abs " + "rates/PFP_north_west_reviewed_standardised_15052025.xlsx", + sheet_name="Cav Route", + header=1 +) +pfp_north_west_cav = pfp_north_west_cav.rename(columns={"Route": "Route March"}) +pfp_north_west_pv = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/abs " + "rates/PFP_north_west_reviewed_standardised_15052025.xlsx", + sheet_name="PV Route", + header=1 +) +pfp_north_west_pv = pfp_north_west_pv.rename(columns={"Route": "Route March"}) +pfp_north_west_cav["location"] = "North West" +pfp_north_west_pv["location"] = "North West" + +cav_route = pd.concat( + [ + pfp_london_cav, + pfp_east_cav, + pfp_north_east_cav, + pfp_north_west_cav + ] +) +solar_route = pd.concat( + [ + pfp_london_pv, + pfp_east_pv, + pfp_north_east_pv, + pfp_north_west_pv + ] +) + + +def get_band(sap_score_number): + bands = [ + ("High_A", 96, float("inf")), + ("Low_A", 92, 96), + ("High_B", 86, 92), + ("Low_B", 81, 86), + ("High_C", 74.5, 81), + ("Low_C", 69, 74.5), + ("High_D", 61.5, 69), + ("Low_D", 55, 61.5), + ("High_E", 46.5, 55), + ("Low_E", 39, 46.5), + ("High_F", 29.5, 39), + ("Low_F", 21, 29.5), + ("High_G", 10.5, 21), + ("Low_G", 1, 10.5), + ] + + for band, lower, upper in bands: + if lower <= sap_score_number < upper: + return band + + return None + + +def classify_floor_area(floor_area): + if floor_area <= 72: + return "0-72" + + if floor_area <= 97: + return "73-97" + + if floor_area <= 199: + return "98-199" + + return "200+" + + +# We classify the abs bounds +solar_route["starting_abs_band"] = solar_route["epc_sap_score_on_register"].apply(get_band) +solar_route["ending_abs_band_scenario1"] = "High_C" +solar_route["ending_abs_band_scenario2"] = "Low_B" +solar_route["epc_total_floor_area"] = solar_route["epc_total_floor_area"].fillna(90) +solar_route["floor_area_band"] = solar_route["epc_total_floor_area"].apply(classify_floor_area) + +# We classify the abs bounds +cav_route["epc_sap_score_on_register"] = cav_route["epc_sap_score_on_register"].fillna(68) +cav_route["starting_abs_band"] = cav_route["epc_sap_score_on_register"].apply(get_band) +cav_route["floor_area_band"] = cav_route["epc_total_floor_area"].apply(classify_floor_area) +cav_route["ending_abs_band"] = "Low_C" + +abs_matrix = pd.read_csv( + "/Users/khalimconn-kowlessar/Downloads/ECO4 Full Project Scores Matrix.csv" +) + +cav_route = cav_route.merge( + abs_matrix.rename(columns={"Cost Savings": "ABS Rate"}), + how="left", + left_on=["starting_abs_band", "ending_abs_band", "floor_area_band"], + right_on=["Starting Band", "Finishing Band", "Floor Area Segment"], +) +solar_route = solar_route.merge( + abs_matrix.rename(columns={"Cost Savings": "ABS Rate"}), + how="left", + left_on=["starting_abs_band", "ending_abs_band_scenario1", "floor_area_band"], + right_on=["Starting Band", "Finishing Band", "Floor Area Segment"], +) +cav_route["ABS Rate"] = cav_route["ABS Rate"].fillna(0) +solar_route["ABS Rate"] = solar_route["ABS Rate"].fillna(0) + +cav_abs_agg = ( + cav_route.groupby("Route March").agg( + { + "ABS Rate": "sum", + "landlord_property_id": "count", + } + ).reset_index() +) +cav_abs_agg["Week Number"] = cav_abs_agg["Route March"].str.extract(r"(\d+)").astype(int) +cav_abs_agg = cav_abs_agg.sort_values("Week Number", ascending=True) +cav_abs_agg = cav_abs_agg.rename(columns={"landlord_property_id": "Number of Properties"}) + +solar_abs_agg = ( + solar_route.groupby("Route March").agg( + { + "ABS Rate": "sum", + "landlord_property_id": "count", + } + ).reset_index() +) +solar_abs_agg["Week Number"] = solar_abs_agg["Route March"].str.extract(r"(\d+)").astype(int) +solar_abs_agg = solar_abs_agg.rename(columns={"landlord_property_id": "Number of Properties"}) +solar_abs_agg = solar_abs_agg.sort_values("Week Number", ascending=True) + +# We store the data +# Store as an excel +filename = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/abs rates/pfp programme rates.xlsx" +# Store the data in two tabs. One for the asset list with the EPC data and the second with the flat data + +with pd.ExcelWriter(filename) as writer: + solar_abs_agg.to_excel(writer, sheet_name="Solar ABS", index=False) + cav_abs_agg.to_excel(writer, sheet_name="Cav ABS", index=False) + + cav_route.to_excel(writer, sheet_name="Cavity data", index=False) + solar_route.to_excel(writer, sheet_name="Solar data", index=False) diff --git a/etl/customers/plus dane/prepare_asset_list.py b/etl/customers/plus dane/prepare_asset_list.py new file mode 100644 index 00000000..430c7b5a --- /dev/null +++ b/etl/customers/plus dane/prepare_asset_list.py @@ -0,0 +1,48 @@ +""" +July 2025, this script prepares the asset list for Plus Dane +""" +import pandas as pd + +oldest_asset_list = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Plus Dane/New Programme July 2025/PLUS DANE Asset List.xlsx" +) +solar_asset_list = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Plus Dane/New Programme July 2025/Plus Dane - potential " + "PV List 04.03.2025.xlsx" +) +newest_asset_list = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Plus Dane/New Programme July 2025/Sava Intelligent Energy " + "- Property List - March 2025.xlsx" +) + +old_missed = oldest_asset_list[~oldest_asset_list["UPRN"].isin(newest_asset_list["UPRN"])] +solar_missed = solar_asset_list[~solar_asset_list["UPRN"].isin(newest_asset_list["UPRN"])] # Empty + +# Build new asset list +# NEWEST +# 'UPRN', 'Address', 'Postcode', 'Town', 'EPC SAP Band', 'SAP Rating', +# 'CO₂ Emissions', 'EPC EI Band', 'Data Quality Indicator', +# 'Results Calculated', 'Property Age', 'Property Type', 'Built Form', +# 'Wall Construction', 'Wall Insulation', 'Roof Construction', +# 'Joist Insulation', 'Space Heating System', 'Space Heating Fuel' +# +# SOlAR + +df = newest_asset_list.merge( + solar_asset_list, how="left", on="UPRN", suffixes=("", "_solar"), +).merge( + oldest_asset_list, how="left", on="UPRN", suffixes=("", "_old") +) +df["asset_list_versiion"] = "July 2025" +old_missed["asset_list_versiion"] = "Historic" + +# Append on the old missed? +df = pd.concat( + [df, old_missed], ignore_index=True, sort=False +) +# Store excel +df.to_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Plus Dane/New Programme July 2025/Plus Dane Asset List " + "July 2025.xlsx", + index=False, +) diff --git a/etl/customers/remote_assessments/app.py b/etl/customers/remote_assessments/app.py index a8805a71..df4a16fe 100644 --- a/etl/customers/remote_assessments/app.py +++ b/etl/customers/remote_assessments/app.py @@ -4,7 +4,7 @@ from dotenv import load_dotenv from utils.s3 import save_csv_to_s3 from etl.find_my_epc.AssetListEpcData import AssetListEpcData -PORTFOLIO_ID = 141 +PORTFOLIO_ID = 212 USER_ID = 8 load_dotenv(dotenv_path="backend/.env") @@ -17,25 +17,15 @@ def app(): :return: """ - asset_list = [ - { - "address": "196 Merrow Street", - "postcode": "SE17 2NP", - "uprn": 200003423454, - "patch": True - }, - { - "address": "65 Liverpool Grove", - "postcode": "SE17 2HP", - "uprn": 200003423194 - }, - { - "address": "2 Brettell Street", - "postcode": "SE17 2NZ", - "uprn": 200003423607 - }, - ] - asset_list = pd.DataFrame(asset_list) + asset_list = pd.read_excel( + "/Users/khalimconn-kowlessar/Downloads/Energy Information MASTER June 2025 - Standardised.xlsx", + sheet_name="Solar Properties", + ) + asset_list = asset_list[~asset_list["estimated"]] + asset_list["domna_address_1"] = asset_list["domna_address_1"].astype(str) + asset_list = asset_list[["domna_address_1", "domna_postcode", "epc_os_uprn"]].rename( + columns={"domna_address_1": "address", "domna_postcode": "postcode", "epc_os_uprn": "uprn"} + ) # Store the asset list in s3 filename = f"{USER_ID}/{PORTFOLIO_ID}/asset_list.csv" @@ -98,14 +88,15 @@ def app(): "portfolio_id": str(PORTFOLIO_ID), "housing_type": "Private", "goal": "Increasing EPC", - "goal_value": "C", + "goal_value": "A", "trigger_file_path": filename, "already_installed_file_path": "", "patches_file_path": patches_filename, "non_invasive_recommendations_file_path": non_invasive_recommendations_filename, - "valuation_file_path": valuation_filename, + "valuation_file_path": "", "scenario_name": "Full package remote assessment", "multi_plan": True, "budget": None, + "inclusions": ["cavity_wall_insulation", "ventilation"] } print(body) diff --git a/etl/customers/thrive/Make Insepctions route.py b/etl/customers/thrive/Make Insepctions route.py new file mode 100644 index 00000000..ec4f620b --- /dev/null +++ b/etl/customers/thrive/Make Insepctions route.py @@ -0,0 +1,40 @@ +""" +This script will pull in properties, in neighbouring areas, that have been flagged for CWI +""" +import pandas as pd + +asset_list = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/July 2025 Inspections/Thrive Programme - " + "reconciled.xlsx", + sheet_name="Standardised Asset List" +) + +cavity_areas = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/July 2025 Inspections/Thrive Programme - " + "reconciled.xlsx", + sheet_name="Cavity Areas" +) + +existing_inspections_sheet = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/July 2025 Inspections/Thrive Programme - " + "reconciled.xlsx", + sheet_name="July 2025 Inspections" +) + +empties = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/July 2025 Inspections/Thrive Programme - " + "reconciled.xlsx", + sheet_name="Cavity properties - for review" +) + +cavity_inspections = asset_list[ + asset_list["domna_postcode"].isin(cavity_areas["domna_postcode"].values) +] +cavity_inspections = cavity_inspections[ + ~cavity_inspections["landlord_property_id"].isin(empties["landlord_property_id"].values) +] + +cavity_inspections.to_csv( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/July 2025 Inspections/cavity_inspections.csv", + index=False +) diff --git a/etl/customers/thrive/Programme Analysis.py b/etl/customers/thrive/Programme Analysis.py index 521cfd30..2d6a0d69 100644 --- a/etl/customers/thrive/Programme Analysis.py +++ b/etl/customers/thrive/Programme Analysis.py @@ -8,6 +8,8 @@ address the following concerns: """ import pandas as pd +from tqdm import tqdm +from backend.SearchEpc import SearchEpc # This is Thrive's list of properties and when they should have been surveyed thrive_tracker = pd.read_excel( @@ -51,27 +53,10 @@ original_columns = { } original_asset_list = original_asset_list[original_columns.keys()].rename(columns=original_columns) -original_asset_list["Data Source"] = "Thrive Tracker" +original_asset_list["Data Source"] = "Original Asset List" +original_asset_list = original_asset_list.drop_duplicates() # We append on the missed properties, with the information we have -# 'Unnamed: 0', 'Thrive Notes', 'Priority', 'UPRN', 'Short Address', '#', -# 'Adress Line 1', 'Postcode', 'Property Type', 'Build Form', -# 'Build year', 'Assumed mm ', 'SAP', 'Name', 'Primary Number', -# 'Secondary Number', 'Email', 'Thrive use: Tenancy Number', -# 'Special Requirements ', 'CIGA', 'Date CIGA check received', -# 'Proposed Progamme', 'New Proposed Programme', -# 'Missing from Route March?', 'Date Letters Sent (w.c)', 'Work Type', -# 'Warmfront Survey Notes', 'Notes Reply (Thrive)', 'Loft Insulation', -# 'Cost for Vents', 'Cavity Depth', 'Cavity Condition', -# 'Date Submitted to installer', 'PRRN Number', -# 'Loft insulation required? (Thrive)', 'Date booked ', -# 'Completed\n(yes/no)', 'Date Completed', -# 'Vents installed?\n(number and location)', -# 'Loft Top Up\n(amount of insulation) ', 'CIGA Warranty Provided ', -# 'Notes', 'Works Number', 'CW KGI Uploaded', 'Keystone Fan Added', -# 'SA Cavity Condition Updated', 'SA Loft & Energy Updated', -# 'PRRN Submitted ' - missed_properties["Full Address"] = ( missed_properties["#"].astype(str) + ", " + missed_properties["Adress Line 1"].astype(str) + ", " + @@ -94,6 +79,19 @@ missed_properties["WFT Findings"] = "Property Not Inspected" missed_properties["ECO Eligibility"] = "Property Not Inspected" missed_properties["Data Source"] = "Thrive Tracker" +# We de-dupe ides in original_asset_list +dupe_ids = original_asset_list[original_asset_list["thrive_property_id"].duplicated()]["thrive_property_id"].unique() +dupes = original_asset_list[ + original_asset_list["thrive_property_id"].isin(dupe_ids) +].copy() +dupes = dupes.sort_values("thrive_property_id") + +original_asset_list = original_asset_list.rename( + columns={ + "detailed_property_type": "build_form" + } +) + master_list = pd.concat([missed_properties, original_asset_list], ignore_index=True) # We were provided with a data update for a sample of properties. We update the data with this information @@ -103,12 +101,339 @@ data_update = pd.read_excel( header=0 ) -new_properties = data_update[~data_update["UPRN"].isin(master_list["thrive_property_id"].astype(str).values)] +new_properties = data_update[~data_update["UPRN"].isin(master_list["thrive_property_id"].astype(str).values)].copy() +new_properties["Full Address"] = ( + new_properties["#"].astype(str) + ", " + + new_properties["Adress Line 1"].astype(str) + ", " + + new_properties["Postcode"].astype(str) +) +new_properties = new_properties[missed_columns.keys()].rename(columns=missed_columns) +new_properties["WFT Findings"] = "Property Not Inspected" +new_properties["ECO Eligibility"] = "Property Not Inspected" +new_properties["Data Source"] = "13.05.2025 Data Update" + +master_list = pd.concat([new_properties, master_list]) + +# We append any new data on heating system, heating type, and insulation type, based on the data update +master_list = master_list.merge( + data_update[["UPRN", "Heating Type", "Assumed mm ", "SAP"]].rename( + columns={ + "Heating Type": "heating_type_updated", + "Assumed mm ": "assumed_loft_insulation_thickness_updated", + "SAP": "sap_rating_updated" + } + ), + how="left", + left_on="thrive_property_id", + right_on="UPRN" +) + +# We fill the missings +master_list["heating_type_updated"] = master_list["heating_type_updated"].fillna(master_list["heating_type"]) +master_list["assumed_loft_insulation_thickness_updated"] = master_list[ + "assumed_loft_insulation_thickness_updated" +].fillna(master_list["assumed_loft_insulation_thickness"]) +master_list["sap_rating_updated"] = master_list["sap_rating_updated"].fillna(master_list["sap_rating"]) + +assert not master_list["thrive_property_id"].duplicated().sum(), "Duplicate thrive_property_id found in master_list" + +master_list["Address in tracker"] = master_list["thrive_property_id"].astype(str).isin( + thrive_tracker["UPRN"].astype(str).values +) + +# Those the asset list - call it master asset list updated May2025 +master_list = master_list.drop(columns=["UPRN"]) +master_list["thrive_property_id"] = master_list["thrive_property_id"].astype(str) +# master_list.to_excel( +# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Asset List - " +# "Complete - Updated May 2025.xlsx", +# ) + +master_list["house_number_TEMP"] = master_list.apply( + lambda x: SearchEpc.get_house_number(address=x["full_address"], postcode=x["postcode"]), + axis=1 +) + +# We add in the status of the property +# TODO: Add the status of the property from the Thrive tracker +outcomes = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive - Outcomes - April " + "24-March25.xlsx", + header=0 +) +outcomes["row_id"] = outcomes.index + +# We have two ids which have the same phohe. nymber, but different UPRN, so we don't match to the tracker for these +tracker_for_matching = thrive_tracker[ + ~thrive_tracker["UPRN"].isin(['OAKGRE0065ABBLDW1', 'OAKGRE0066ABBLDW1', 'JACKET0102ABBLDW1', 'BELLCL0008BEDMDW1']) +].copy() +tracker_for_matching["Full Address"] = ( + tracker_for_matching["#"].astype(str) + ", " + + tracker_for_matching["Adress Line 1"].astype(str) + ", " + + tracker_for_matching["Postcode"].astype(str) +) + +outcomes_id_lookup = [] +for _, x in tqdm(outcomes.iterrows(), total=len(outcomes)): + + hn = str(x["No."]) + address = x["Address"] + postcode = x["Postcode"] + contact_no = str(x["Contact No"]) if not pd.isnull(x["Contact No"]) else str(x["Contact No.1"]) + contact_no = None if contact_no == "nan" else contact_no + + if address == "292 Micklefield Road": + hn = "292" + + if (address == "Micklefield Road") & (hn == "302"): + hn = "292" + + if (address == "103a Norfolk Road Rickmansworth Hertfordshire WD3 1JY"): + hn = "103a" + + if (address == "105a Norfolk Road Rickmansworth Hertfordshire WD3 1JY"): + hn = "105a" + + if (address == "107a Norfolk Road Rickmansworth Hertfordshire WD3 1JY"): + hn = "107a" + + # + # # We match this to the tracker + # m1 = tracker_for_matching[tracker_for_matching["Primary Number"].astype(str) == contact_no] + # # Many of the phone numbers don't have a leading zero in the tracker so we add them + # if (m1.shape[0] != 1) and not pd.isnull(contact_no): + # m1 = tracker_for_matching[tracker_for_matching["Primary Number"].astype(str) == contact_no.lstrip("0")] + # + # if m1.shape[0] > 1: + # raise ValueError( + # f"Error for {hn} - {address} - {postcode} - {contact_no} in the tracker" + # ) + + # if m1.empty: + m1 = tracker_for_matching[ + (tracker_for_matching["#"].astype(str) == hn) & + (tracker_for_matching["Postcode"] == postcode) + ] + + if m1.empty: + # Some properties aren't in the tracker, we match to the master list + m1 = master_list[ + (master_list["house_number_TEMP"].astype(str) == hn) & + (master_list["postcode"] == postcode) + ] + outcomes_id_lookup.append( + { + "row_id": x["row_id"], + "thrive_property_id": m1["thrive_property_id"].values[0], + "address": m1["full_address"].values[0], + "postcode": m1["postcode"].values[0], + } + ) + continue + + if m1.shape[0] != 1: + raise ValueError( + f"Error for {hn} - {address} - {postcode} - {contact_no} in the tracker" + ) + + # We add the status to the master list + outcomes_id_lookup.append( + { + "row_id": x["row_id"], + "thrive_property_id": m1["UPRN"].values[0], + "address": m1["Full Address"].values[0], + "postcode": m1["Postcode"].values[0], + } + ) + +outcomes_id_lookup = pd.DataFrame(outcomes_id_lookup) +outcomes = outcomes.merge( + outcomes_id_lookup, + how="left", + left_on="row_id", + right_on="row_id" +) + +outcomes = outcomes.drop(columns=["row_id"]) +outcomes = outcomes.rename( + columns={ + "Outcomes": "Outcome", + "Notes (If 'no " + "answer' under outcomes, have you checked around the property for access issues where possible?)": "Notes", + } +) +# Store the corrected outcomes +# outcomes.to_excel( +# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive - Outcomes - +# April 24-March25 - Corrected.xlsx", +# index=False +# ) -data_update = = data_update[["UPRN", ""]] -# TODO: Flag the Thrive priorities and create a separate project code for these -# TODO: Add the general project code -# TODO: Add the thrive \ No newline at end of file +def parse_date(value): + # Strip any 'W.C' or 'w/c' prefix and clean whitespace + value = value.strip().lower().replace('w.c', '').replace('w/c', '').strip() + try: + # Try parsing the date with dayfirst=True + return pd.to_datetime(value, dayfirst=True, errors='coerce') + except Exception: + return pd.NaT + + +outcomes['Parsed Date'] = outcomes['Date letters sent'].apply(parse_date) + +# Next step - match the submissions master to the asset list. We will append on the UPRN +eco3_submissions = pd.read_csv( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Submissions " + "ECO3.csv", + header=0 +) +eco3_submissions["row_id"] = eco3_submissions.index + +eco4_submissions = pd.read_csv( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Submissions " + "ECO4.csv", + header=0 +) +eco4_submissions["row_id"] = eco4_submissions.index + +# List of properties never on the asset list +not_on_master = [ + "7+FOXGROVE PATH+WD19 6YL", "9+FOXGROVE PATH+WD19 6YL", "11+FOXGROVE PATH+WD19 6YL", + "20+LINCOLN DRIVE+WD19 7BA", "22+LINCOLN DRIVE+WD19 7BA", "24+LINCOLN DRIVE+WD19 7BA", + "26+LINCOLN DRIVE+WD19 7BA", "1+Ryman Court, Stag Lane+WD3 5HN", "6+Ryman Court, Stag Lane+WD3 5HN", + "9+Ryman Court, Stag Lane+WD3 5HN", "10+Ryman Court, Stag Lane+WD3 5HN", "11+Ryman Court, Stag Lane+WD3 5HN", + "12+Ryman Court, Stag Lane+WD3 5HN", "14+Ryman Court, Stag Lane+WD3 5HN", "15+Ryman Court, Stag Lane+WD3 5HN", + "20+Ryman Court, Stag Lane+WD3 5HN", "21+Ryman Court, Stag Lane+WD3 5HN", "22+Ryman Court, Stag Lane+WD3 5HN", + "25+Ryman Court, Stag Lane+WD3 5HN", "26+Ryman Court, Stag Lane+WD3 5HN", "31+Ryman Court, Stag Lane+WD3 5HN", + "33+Ryman Court, Stag Lane+WD3 5HN", "34+Ryman Court, Stag Lane+WD3 5HN", + '37+Ryman Court, Stag Lane+WD3 5HN', '38+Ryman Court, Stag Lane+WD3 5HN', '39+Ryman Court, Stag Lane+WD3 5HN', + '41+Ryman Court, Stag Lane+WD3 5HN', '43+Ryman Court, Stag Lane+WD3 5HN', '45+Ryman Court, Stag Lane+WD3 5HN', + '46+Ryman Court, Stag Lane+WD3 5HN', '48+Ryman Court, Stag Lane+WD3 5HN', '49+Ryman Court, Stag Lane+WD3 5HN', + '50+Ryman Court, Stag Lane+WD3 5HN', '52+Ryman Court, Stag Lane+WD3 5HN' +] + +eco3_remap = { + "19+OAKHILL ROAD+WD5 8RE": ('19', 'OAKHILL ROAD', 'WD3 9RE'), + "29+OAKHILL ROAD+WD5 8RE": ('29', 'OAKHILL ROAD', 'WD3 9RE'), + "31+OAKHILL ROAD+WD5 8RE": ('31', 'OAKHILL ROAD', 'WD3 9RE'), + "44+OAKHILL ROAD+WD5 8RE": ('44', 'OAKHILL ROAD', 'WD3 9RF'), + "64+OAKHILL ROAD+WD4 8RF": ('64', 'OAKHILL ROAD', 'WD3 9RF'), + "11+LANCASTER WAY+WD3 PRE": ('11', 'LANCASTER WAY', 'WD5 0PQ'), + "16+LANCASTER WAY+WD3 PRE": ('16', 'LANCASTER WAY', 'WD5 0PQ'), + "58+TALBOT ROAD +WD31HE": ('58', 'TALBOT ROAD', 'WD3 1HE'), + "10+PEARTREE COURT/WELWYN GARDEN CITY+AL73XN": ('10', 'PEARTREE COURT/WELWYN GARDEN CITY', 'AL7 3XN'), + "25+GOBLINS GREEN/WELWYN GARDEN CITY+AL73ST": ('25', 'GOBLINS GREEN/WELWYN GARDEN CITY', 'AL7 3ST'), + "32+GOBLINS GREEN/WELWYN GARDEN CITY+AL73ST": ('32', 'GOBLINS GREEN/WELWYN GARDEN CITY', 'AL7 3ST'), + "94+BAKER ST/POTTERS BAR+EN62EP": ('94', 'BAKER ST/POTTERS BAR', 'EN6 2EP'), + '33+Tudor Way+WD3JA': ('33', 'Tudor Way', 'WD3 8JA'), + '120+Hazlewood lane +WD5 0HF': ('120', 'Hazlewood lane', 'WD5 0HE'), + '35+Rosehill gardens +WD5 0HE': ('35', 'Rosehill gardens', 'WD5 0HF'), + '18+Rosehill gardens +WD5 0HE': ('18', 'Rosehill gardens', 'WD5 0HF'), + '34+Rosehill gardens +WD5 0HE': ('34', 'Rosehill gardens', 'WD5 0HF'), + '58+Rosehill gardens +WD5 0HE': ('58', 'Rosehill gardens', 'WD5 0HF'), + '48+Rosehill gardens +WD5 0HE': ('48', 'Rosehill gardens', 'WD5 0HF'), + '45+Rosehill gardens +WD5 0HE': ('45', 'Rosehill gardens', 'WD5 0HF'), + '6+Rosehill gardens +WD5 0HE': ('6', 'Rosehill gardens', 'WD5 0HF'), + '2+Rosehill gardens +WD5 0HE': ('2', 'Rosehill gardens', 'WD5 0HF'), + '29+Rosehill gardens +WD5 0HE': ('29', 'Rosehill gardens', 'WD5 0HF'), + '61+GOLDEN DELL+AL8 4EE': ('61', 'GOLDEN DELL', 'AL7 4EE'), + '2O+EDINBURGH AVENUE+WD3 8LB': ('20', 'EDINBURGH AVENUE', 'WD3 8LB'), +} + +eco3_lookup = [] +for _, row in tqdm(eco3_submissions.iterrows(), total=len(eco3_submissions)): + hn = row["NO "] + pc = row["Post Code"] + street = row["Street / Block Name"] + key = f"{hn}+{street}+{pc}" + if key in not_on_master: + continue + + if key in eco3_remap: + hn, street, pc = eco3_remap[key] + # The postcode is different to the asse + + # We filter the asset list, because it's hard to know how accurate this is + m1 = master_list[ + (master_list["house_number_TEMP"].astype(str) == hn) & + (master_list["postcode"] == pc) + ] + + if m1.shape[0] != 1: + raise ValueError( + f"Error for {key} in the tracker" + ) + + eco3_lookup.append( + { + "row_id": row["row_id"], + "thrive_property_id": m1["thrive_property_id"].values[0], + "submission_house_number": row["NO "], + "submission_address1": row["Street / Block Name"], + "submission_postcode": row["Post Code"], + } + ) + +eco4_lookup = [] +for _, row in tqdm(eco4_submissions.iterrows(), total=len(eco4_submissions)): + hn = row["NO."] + pc = row["Post Code"] + street = row["Street / Block Name"] + key = f"{hn}+{street}+{pc}" + if key in not_on_master: + continue + + if key in eco3_remap: + hn, street, pc = eco3_remap[key] + # The postcode is different to the asse + + # We filter the asset list, because it's hard to know how accurate this is + m1 = master_list[ + (master_list["house_number_TEMP"].astype(str) == hn) & + (master_list["postcode"].str.lower() == pc.lower()) + ] + + if m1.shape[0] != 1: + raise ValueError( + f"Error for {key} in the tracker" + ) + + eco4_lookup.append( + { + "row_id": row["row_id"], + "thrive_property_id": m1["thrive_property_id"].values[0], + "submission_house_number": row["NO."], + "submission_address1": row["Street / Block Name"], + "submission_postcode": row["Post Code"], + } + ) + +# We match the lookups back to the submission sheets +eco3_lookup = pd.DataFrame(eco3_lookup) +eco3_submissions = eco3_submissions.merge( + eco3_lookup, + how="left", + on="row_id", +) + +eco4_lookup = pd.DataFrame(eco4_lookup) +eco4_submissions = eco4_submissions.merge( + eco4_lookup, + how="left", + on="row_id", +) + +# Store +eco3_submissions.to_csv( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Submissions " + "ECO3 - with IDS.csv", + index=False +) +eco4_submissions.to_csv( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Submissions " + "ECO4 - with IDS.csv", + index=False +) diff --git a/etl/customers/thrive/Project codes.py b/etl/customers/thrive/Project codes.py new file mode 100644 index 00000000..01a15497 --- /dev/null +++ b/etl/customers/thrive/Project codes.py @@ -0,0 +1,130 @@ +""" +THis script will take the standardised asset list and append on the project codes. +We also, review the existing install status, in case anything is wrong +""" +import pandas as pd +import numpy as np + +standardised_asset_list = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Asset List - " + "Complete - Updated May 2025 - Standardised.xlsx", + sheet_name="Standardised Asset List", +) + +project_code_allocations = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Master Tracker (Thrive - " + "Warmfront).xlsx", + sheet_name="Master Tracker", + header=1 +) + +programme_codes = project_code_allocations[ + ["UPRN", "Proposed Progamme", "New Proposed Programme", "Warmfront Survey Notes", ] +].copy() +programme_codes["programme_reference"] = programme_codes["New Proposed Programme"].copy() +programme_codes["programme_reference"] = np.where( + pd.isnull(programme_codes["programme_reference"]), + programme_codes["Proposed Progamme"], + programme_codes["programme_reference"] +) + +PROJECT_CODE_MAP = { + 'Phase 2': "THRIVE-002", + 'Phase 3': "THRIVE-003", + 'Phase 4': "THRIVE-004", + 'Phase 5': "THRIVE-005", + 'Phase 6': "THRIVE-006", + 'Phase 7': "THRIVE-007", + 'Phase 8': "THRIVE-008", + 'Phase 9': "THRIVE-009", + 'Phase 10': "THRIVE-010", + "Week 1": "THRIVE-WEEK-001", + "Week 2": "THRIVE-WEEK-002", + "Week 4": "THRIVE-WEEK-004", + "Week 7": "THRIVE-WEEK-007", +} +programme_codes["project_code"] = programme_codes["programme_reference"].map(PROJECT_CODE_MAP) + +thrive_notes = project_code_allocations[["UPRN", "Thrive Notes", "Priority", "Notes Reply (Thrive)"]].copy() + +standardised_asset_list = standardised_asset_list.merge( + programme_codes[["UPRN", "project_code", "programme_reference"]], + how="left", + left_on="landlord_property_id", + right_on="UPRN", +).merge( + thrive_notes[["UPRN", "Thrive Notes", "Priority", "Notes Reply (Thrive)"]], + how="left", + on="UPRN", +) + +standardised_asset_list = standardised_asset_list.drop(columns=["UPRN"]) + +# We fill the project code for historical completions +standardised_asset_list["project_code"] = np.where( + pd.isnull(standardised_asset_list["project_code"]) & ( + standardised_asset_list["hubspot_status"] != "READY TO BE SCHEDULED" + ) & ( + ~pd.isnull(standardised_asset_list["hubspot_status"]) + ), + "THRIVE-HISTORICAL", + standardised_asset_list["project_code"] +) + +# Store as an excel +filename = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Programme - " + "reconciled.xlsx") +# Store the data in two tabs. One for the asset list with the EPC data and the second with the flat data +# Other tabs: +block_analysis = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Asset List - " + "Complete - Updated May 2025 - Standardised.xlsx", + sheet_name="Block Analysis", +) +outcomes = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Asset List - " + "Complete - Updated May 2025 - Standardised.xlsx", + sheet_name="Outcomes", +) +unmatched_submissions = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Asset List - " + "Complete - Updated May 2025 - Standardised.xlsx", + sheet_name="Unmatched Submissions", +) +unmatched_ecosurv = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Asset List - " + "Complete - Updated May 2025 - Standardised.xlsx", + sheet_name="Unmatched Ecosurv", +) + +with pd.ExcelWriter(filename) as writer: + standardised_asset_list.to_excel(writer, sheet_name="Standardised Asset List", index=False) + block_analysis.to_excel(writer, sheet_name="Block Analysis", index=False) + # If we have outcomes, we add a tab with the outcomes + outcomes.to_excel(writer, sheet_name="Outcomes", index=False) + unmatched_submissions.to_excel(writer, sheet_name="Unmatched Submissions", index=False) + unmatched_ecosurv.to_excel(writer, sheet_name="Unmatched Ecosurv", index=False) + +# A check, just comparing against the master tracker to make sure I have all of the installs +asset_list = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Asset List - " + "Complete - Updated May 2025 - Standardised.xlsx", + sheet_name="Standardised Asset List", +) + +master_tracker = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Master Tracker (Thrive - " + "Warmfront).xlsx", + sheet_name="Master Tracker", + header=1 +) + +df = asset_list[["landlord_property_id", "hubspot_status"]].merge( + master_tracker[~pd.isnull(master_tracker['Date Completed'])][["UPRN", "Date Completed"]], + how="inner", + left_on="landlord_property_id", + right_on="UPRN" +) + +df["hubspot_status"].value_counts() +df[df["hubspot_status"] == "SUBMITTED TO INSTALLER"] diff --git a/etl/find_my_epc/AssetListEpcData.py b/etl/find_my_epc/AssetListEpcData.py index f085c8fb..2ff9a3e0 100644 --- a/etl/find_my_epc/AssetListEpcData.py +++ b/etl/find_my_epc/AssetListEpcData.py @@ -1,3 +1,4 @@ +import random import time import pandas as pd from tqdm import tqdm @@ -27,6 +28,7 @@ class AssetListEpcData: self.extracted_data = None self.non_invasive_recommendations = None self.patches = None + self.epc_data = None @staticmethod def check_asset_list(asset_list): @@ -49,7 +51,7 @@ class AssetListEpcData: "uprn": r.get("uprn"), "address": r["address"], "postcode": r["postcode"], - "recommendations": r["recommendations"] + "recommendations": r.get("recommendations") } for r in self.extracted_data ] @@ -74,7 +76,9 @@ class AssetListEpcData: # Pull the additional data extracted_data = [] + epc_data = [] for _, home in tqdm(self.asset_list.iterrows(), total=len(self.asset_list)): + add1 = home["address"] pc = home["postcode"] # Retrieve the EPC data @@ -92,9 +96,6 @@ class AssetListEpcData: if epc_searcher.newest_epc is None: continue - if not pd.isnull(home.get("patch")): - epc_searcher.newest_epc["address1"] = add1 - # Attempt both methods: try: find_epc_searcher = RetrieveFindMyEpc( @@ -104,16 +105,37 @@ class AssetListEpcData: find_epc_data = find_epc_searcher.retrieve_newest_find_my_epc_data() except Exception as e: logger.error(f"Error retrieving find my epc data: {e}") - find_epc_searcher = RetrieveFindMyEpc( - address=epc_searcher.newest_epc["address1"], - postcode=epc_searcher.newest_epc["postcode"] - ) - find_epc_data = find_epc_searcher.retrieve_newest_find_my_epc_data() - time.sleep(0.5) + if not pd.isnull(home.get("patch")): + epc_searcher.newest_epc["address1"] = add1 + + try: + find_epc_searcher = RetrieveFindMyEpc( + address=epc_searcher.newest_epc["address1"], + postcode=epc_searcher.newest_epc["postcode"] + ) + find_epc_data = find_epc_searcher.retrieve_newest_find_my_epc_data() + except Exception as e: + logger.error("Error retrieving find my epc data with alternative address format: {e}") + find_epc_data = { + "current_epc_rating": epc_searcher.newest_epc["current-energy-rating"], + "current_epc_efficiency": epc_searcher.newest_epc["current-energy-efficiency"], + "potential_epc_rating": None, + "potential_epc_efficiency": None, + "epc_data": {} + } + + # Sleep for a random amount of time between 0.5 and 1 seconds to avoid hitting the API rate limit + time.sleep(random.sample(range(50, 100), 1)[0] / 100) + + # Every 50 requests, we sleep for 10 seconds to avoid hitting the API rate limit + if len(extracted_data) % 50 == 0 and len(extracted_data) > 0: + logger.info("Sleeping for 10 seconds to avoid hitting API rate limit") + time.sleep(10) + # We need uprn to_append = { - "uprn": home.get("uprn"), + "uprn": home.get("uprn", epc_searcher.newest_epc["uprn"]), "address": home["address"], "postcode": home["postcode"], **find_epc_data, @@ -128,6 +150,8 @@ class AssetListEpcData: } extracted_data.append(to_append) + epc_data.append(epc_searcher.newest_epc) self.extracted_data = extracted_data + self.epc_data = epc_data logger.info("Data Extrction complete") diff --git a/etl/find_my_epc/RetrieveFindMyEpc.py b/etl/find_my_epc/RetrieveFindMyEpc.py index fad0c78e..50955377 100644 --- a/etl/find_my_epc/RetrieveFindMyEpc.py +++ b/etl/find_my_epc/RetrieveFindMyEpc.py @@ -1,3 +1,4 @@ +import time import re import pandas as pd import requests @@ -55,9 +56,11 @@ class RetrieveFindMyEpc: results = {} # 1. Total floor area - results['total-floor-area'] = int(self.get_text( + # We have some isntances of very old EPCs where the total floor area is not available + tfa = self.get_text( soup.find("dt", string="Total floor area").find_next_sibling("dd") - ).split(" ")[0]) + ).split(" ")[0] + results['total-floor-area'] = int(tfa) if tfa != "Not" else None # Table with features rows = soup.select("table.govuk-table tbody tr") @@ -125,9 +128,156 @@ class RetrieveFindMyEpc: return results - def retrieve_newest_find_my_epc_data(self, sap_2012_date=None): + def _extract_epc_from_soup(self, soup, epc_certificate, sap_2012_date=None): + + ratings = soup.find('desc', {'id': 'svg-desc'}).text + current_rating = ratings.split(".")[0] + potential_rating = ratings.split(".")[1] + current_sap = int(current_rating.split(' ')[-1]) + + # Retrieve the energy consumption + bills = soup.find('div', {'id': 'bills-affected'}) + bills_list = bills.find_all('li') + if not bills_list: + # If this is the case, it's usually becaue the EPC was very old. Early EPCs did not have this information + heating_text = None + hot_water_text = None + else: + heating_text = bills_list[0].text + hot_water_text = bills_list[1].text + + # Retrieve the recommendations and SAP points + recommendations = [] + recommendations_div = soup.find('div', class_='epb-recommended-improvements') + if recommendations_div: + # Find all h3 headers for each step and extract their related information + step_headers = recommendations_div.find_all('h3', class_='govuk-heading-m') + previous_sap_score = current_sap + previous_epc = current_rating.split(' ')[-6] + for step_num, step_header in enumerate(step_headers, start=1): + # Extract the step title (the measure) + measure_title = step_header.text.strip().replace(f"Step {step_num}: ", "") + + # Find the div containing the potential rating within the same section + potential_rating_div = step_header.find_next( + 'div', class_='epb-recommended-improvements__potential-rating' + ) + + # Check if the potential rating div is found + if potential_rating_div: + # Extract the rating text within the SVG text element + extracted_rating_text = potential_rating_div.find('text', class_='govuk-!-font-weight-bold') + if extracted_rating_text is not None: + rating_text = extracted_rating_text.text.strip() + else: + rating_text = " ".join([str(previous_sap_score), previous_epc]) + # Parse the rating text to separate the numeric rating and EPC letter + new_rating = int(rating_text.split()[0]) + new_epc = rating_text.split()[1] + + # Append the information as a dictionary to the recommendations list + recommendations.append({ + "step": step_num, + "measure": measure_title, + "new_rating": new_rating, + "new_epc": new_epc, + "sap_points": new_rating - previous_sap_score + }) + previous_sap_score = new_rating + previous_epc = new_epc + + # Search for the assessment informaton + assessment_information = soup.find('div', {'id': 'information'}) + # Parse this information + rows = assessment_information.find_all('div', class_='govuk-summary-list__row') + # Create a dictionary to hold the parsed information + assessment_data = {} + for row in rows: + key = row.find('dt').text.strip() + if key == "Type of assessment": + # We dont reliably extract this + continue + value_tag = row.find('dd') + + # Check if value contains a link (email) + if value_tag.find('a'): + value = value_tag.find('a').text.strip() + elif value_tag.find('summary'): + value = value_tag.find('span').text.strip() + else: + value = value_tag.text.strip() + + # These are keys that we have for both the surveyor and the acreditation scheme. Firstly, we'll + # get the surveyor's name and email so we make that information clear + if key in ["Telephone", "Email"]: + if "Assessor's " + key not in assessment_data: + assessment_data["Assessor's " + key] = value + else: + assessment_data["Accreditation Scheme's " + key] = value + continue + + assessment_data[key] = value + + expected_keys = [ + 'Assessor’s name', + "Assessor's Telephone", + "Assessor's Email", + 'Assessor’s ID', + 'Accreditation scheme', + 'Assessor’s declaration', + "Accreditation Scheme's Telephone", + "Accreditation Scheme's Email", + 'Date of assessment', + 'Date of certificate' + ] + # Check we have all the expected keys + for key in expected_keys: + if key not in assessment_data: + raise ValueError(f"Missing key: {key}") + + # The wall types of the property + property_features_table = soup.find("tbody", class_="govuk-table__body") + property_features_table = property_features_table.find_all("tr") + + # Extract wall types + self.walls = [] + for row in property_features_table: + cells = row.find_all("td") + if row.find("th").text.strip() == "Wall": + self.walls.append(cells[0].text.strip()) + + # Finally, we format the recommendations + recommendations = self.format_recommendations(recommendations, assessment_data, sap_2012_date) + + # 4) Low and zero carbon energy sources + low_carbon_energy_sources = self.extract_low_carbon_sources(soup) + + # 5) Pull out the EPC data + epc_data = self.extract_epc_data(soup) + + resulting_data = { + 'epc_certificate': epc_certificate, + 'current_epc_rating': current_rating.split(' ')[-6], + 'current_epc_efficiency': current_sap, + 'potential_epc_rating': potential_rating.split(' ')[-6], + "potential_epc_efficiency": int(potential_rating.split(' ')[-1]), + "heating_text": heating_text, + "hot_water_text": hot_water_text, + "recommendations": recommendations, + "epc_data": epc_data, + **assessment_data, + **low_carbon_energy_sources, + } + + return resulting_data + + def retrieve_all_find_my_epc_data(self, sap_2012_date=None): + """ - For a post code and address, we pull out all the required data from the find my epc website + This is a quick function to retrieve all the data from the find my epc website for a given postcode and address. + Using this to fulfill a short term need to retrieve all history for a property + :param sap_2012_date: + :return: """ postcode_input = self.postcode.replace(" ", "+") @@ -182,6 +332,98 @@ class RetrieveFindMyEpc: address_response = requests.get(chosen_epc, headers=self.HEADERS) address_res = BeautifulSoup(address_response.text, features="html.parser") + # We check the section on "Other cerificates for this property and get the url" + # Find the section for other certificates + other_cert_section = address_res.find('div', id='other_certificates_and_reports') + + # Extract all certificate number rows (anchor tags within a govuk-summary-list) + other_cert_links = other_cert_section.select('dd.govuk-summary-list__value a') + + other_certificates = [] + for link in other_cert_links: + cert_number = link.text.strip() + cert_url = link['href'].strip() + other_certificates.append({ + "certificate_number": cert_number, + "certificate_url": f"https://find-energy-certificate.service.gov.uk{cert_url}" + }) + + # Always include the currently selected EPC first + soup_list = [address_res] + + # Add additional historic certificates + for link in other_cert_links: + cert_url = f"https://find-energy-certificate.service.gov.uk{link['href'].strip()}" + response = requests.get(cert_url, headers=self.HEADERS) + time.sleep(0.3) + soup_list.append(BeautifulSoup(response.text, features="html.parser")) + + all_find_my_epc_data = [] + for soup in soup_list: + # Start with the primary one + all_find_my_epc_data.append(self._extract_epc_from_soup(soup, epc_certificate, sap_2012_date)) + + return all_find_my_epc_data + + def retrieve_newest_find_my_epc_data(self, sap_2012_date=None): + """ + For a post code and address, we pull out all the required data from the find my epc website + """ + + postcode_input = self.postcode.replace(" ", "+") + postcode_search = self.SEARCH_POSTCODE_URL.format(postcode_input=postcode_input) + postcode_response = requests.get(postcode_search, headers=self.HEADERS) + + postcode_res = BeautifulSoup(postcode_response.text, features="html.parser") + rows = postcode_res.find_all('tr', class_='govuk-table__row') + + extracted_table = [] + for row in rows: + # Extract the address and URL + address_tag = row.find('a', class_='govuk-link') + if address_tag is None: + continue + extracted_address = None + extracted_address_url = None + if address_tag: + extracted_address = address_tag.text.strip() + extracted_address_url = address_tag['href'] + + extracted_address_cleaned = ( + extracted_address.replace(",", "").replace(" ", "").lower() + ) + if not extracted_address_cleaned.startswith(self.address_cleaned): + continue + + # If the address is a match, we can extract the data + + # Extract the expiry date + expiry_date_tag = row.find('td', class_='govuk-table__cell date') + expiry_date = None + if expiry_date_tag is not None: + expiry_date = expiry_date_tag.parent.find('span').text.strip() + + extracted_table.append( + { + "extracted_address": extracted_address, + "extracted_address_url": extracted_address_url, + "expiry_date": datetime.strptime(expiry_date, '%d %B %Y'), + } + ) + + if not extracted_table: + raise ValueError("No EPC found") + + if len(extracted_table) > 1: + # We take the one with the most recent expiry date + extracted_table = sorted(extracted_table, key=lambda x: x['expiry_date'], reverse=True) + + chosen_epc = self.BASE_ENERGY_URL + extracted_table[0]['extracted_address_url'] + epc_certificate = chosen_epc.split('/')[-1] + + address_response = requests.get(chosen_epc, headers=self.HEADERS) + address_res = BeautifulSoup(address_response.text, features="html.parser") + # Key data we want to retrieve: # 1) Rating # 2) Bills estimates @@ -195,9 +437,6 @@ class RetrieveFindMyEpc: potential_rating = ratings.split(".")[1] current_sap = int(current_rating.split(' ')[-1]) - # Floor area - address_res.find() - # Retrieve the energy consumption bills = address_res.find('div', {'id': 'bills-affected'}) bills_list = bills.find_all('li') @@ -432,6 +671,13 @@ class RetrieveFindMyEpc: "Condensing boiler (separate from the range cooker)": ["boiler_upgrade"], "Heating controls (programmer and thermostatic radiator valves)": [ "roomstat_programmer_trvs", "time_temperature_zone_control" + ], + 'Heating controls (programmer room thermostat and thermostatic radiator valves)': [ + "roomstat_programmer_trvs", "time_temperature_zone_control" + ], + "Internal wall insulation": ["internal_wall_insulation"], + "High heat retention storage heaters and dual immersion cylinder and dual rate meter": [ + "high_heat_retention_storage_heater" ] } @@ -466,8 +712,13 @@ class RetrieveFindMyEpc: find_epc_data = searcher.retrieve_newest_find_my_epc_data() except Exception as e: logger.error(f"Error retrieving find my epc data: {e}") + if epc["address1"] == epc["address"]: + # There's no benefit of using the same address, so we split on comma + address1 = epc["address"].split(",")[0] + else: + address1 = epc["address1"] # We attempt with the backup add - searcher = cls(address=epc["address1"], postcode=epc["postcode"]) + searcher = cls(address=address1, postcode=epc["postcode"]) find_epc_data = searcher.retrieve_newest_find_my_epc_data() non_invasive_recommendations = { diff --git a/recommendations/Costs.py b/recommendations/Costs.py index 96eb5d0e..0ef37add 100644 --- a/recommendations/Costs.py +++ b/recommendations/Costs.py @@ -194,7 +194,7 @@ class Costs: IWI_CONTINGENCY = 0.2 # For air source heat pumps, we inflate the assume cost by quite a bit to account for design and installation - ASHP_CONTINGENCY = 0.35 + ASHP_CONTINGENCY = 0.25 # Where there is more uncertainty, a higher contingency rate is used HIGH_RISK_CONTINGENCY = 0.2 # When there is less uncertainty, a lower contingency rate is used @@ -871,10 +871,10 @@ class Costs: if needs_cylinder: # 1000 is the cost of a new hot water cylinder - total_cost = 1200 * number_heated_rooms + 1000 + total_cost = 1300 * number_heated_rooms + 1000 else: # 500 is the cost of a dual immersion heater - a rough estimate - total_cost = 1200 * number_heated_rooms + 500 + total_cost = 1300 * number_heated_rooms + 500 subtotal_before_vat = total_cost / (1 + self.VAT_RATE) vat = total_cost - subtotal_before_vat diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py index 18e1110b..9d1a094e 100644 --- a/recommendations/HeatingRecommender.py +++ b/recommendations/HeatingRecommender.py @@ -57,6 +57,31 @@ class HeatingRecommender: }, # These are the heating types we need to produce a dual heating recommendation "dual": None + }, + 'Electric underfloor heating, electric storage heaters': { + # For this, we would recommend a heat pump + "dual": None + }, + "Room heaters, electric, boiler and radiators, mains gas": { + "hhr": { + "mainheating_description": "Electric storage heaters, radiators", + "recommendation_description": "Install high heat retention electric storage heaters.", + "controls_prefix": "" + }, + "boiler": { + "mainheating_description": "Boiler and radiators, mains gas", + "recommendation_description": "Upgrade to a new condensing boiler.", + "controls_suffix": "" + }, + "dual": None + }, + "Room heaters, electric, electric storage heaters": { + "hhr": { + "mainheating_description": "Electric storage heaters, radiators", + "recommendation_description": "Install high heat retention electric storage heaters.", + "controls_prefix": "" + }, + "dual": None } } @@ -109,6 +134,10 @@ class HeatingRecommender: hhr_suitable = no_mains or self.has_electric_heating_description or self.has_room_heaters + hhr_suitable = hhr_suitable and ( + "underfloor heating" not in self.property.main_heating["clean_description"] + ) + return ( hhr_suitable and (not ashp_only_heating_recommendation) and not self.has_ashp and ("high_heat_retention_storage_heater" in measures) @@ -165,7 +194,8 @@ class HeatingRecommender: ) and (not ashp_only_heating_recommendation) and ("boiler_upgrade" in measures) and - (not self.has_ashp) + (not self.has_ashp) and + (not self.property.main_heating["has_warm_air"]) ) return is_valid, has_gas_boiler @@ -487,17 +517,30 @@ class HeatingRecommender: ] # This is a map from the heating controls description to the description of the air source heat pump set up - ashp_descriptions = { - "Time and temperature zone control": ( - f"Install a {ashp_size}KW air source heat pump, and upgrade heating controls to Smart Thermostats, " - "room sensors and smart radiator valves (time & temperature zone control). Ensure you have an 18 or " - "24 hour tariff" - ), - "Programmer, TRVs and bypass": ( - f"Install a {ashp_size}KW air source heat pump, with programmer, TRVs and a Bypass valve. Ensure you " - "have an 18 or 24 hour tariff" - ), - } + if ashp_size is None: + ashp_descriptions = { + "Time and temperature zone control": ( + f"Install two cascaded air source heat pumps, and upgrade heating controls to Smart Thermostats, " + "room sensors and smart radiator valves (time & temperature zone control). Ensure you have an 18 " + "or " + "24 hour tariff" + ) + } + else: + + ashp_descriptions = { + "Time and temperature zone control": ( + f"Install a {ashp_size}KW air source heat pump, and upgrade heating controls to Smart Thermostats, " + "room sensors and smart radiator valves (time & temperature zone control). Ensure you have an 18 " + "or " + "24 hour tariff" + ), + "Programmer, TRVs and bypass": ( + f"Install a {ashp_size}KW air source heat pump, with programmer, TRVs and a Bypass valve. Ensure " + f"you " + "have an 18 or 24 hour tariff" + ), + } new_heating_description = "Air source heat pump, radiators, electric" new_hot_water_description = "From main system" @@ -924,6 +967,7 @@ class HeatingRecommender: return recommendations self.heating_recommendations.extend(recommendations) + return None @staticmethod def estimate_boiler_size(property_type, built_form, floor_area, floor_height, num_heated_rooms): diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py index 0e73cffe..462d43aa 100644 --- a/recommendations/Recommendations.py +++ b/recommendations/Recommendations.py @@ -679,7 +679,7 @@ class Recommendations: # Handle the case of community schemes if (heating_description == "Community scheme") or (hotwater_description == "Community scheme"): - if main_fuel_description == "mains gas (community)": + if main_fuel_description in ["mains gas (community)", "UNKNOWN"]: return { "heating_fuel_type": "Natural Gas (Community Scheme)", "hotwater_fuel_type": "Natural Gas (Community Scheme)", diff --git a/recommendations/RoofRecommendations.py b/recommendations/RoofRecommendations.py index fa8b831c..31ac2433 100644 --- a/recommendations/RoofRecommendations.py +++ b/recommendations/RoofRecommendations.py @@ -191,11 +191,22 @@ class RoofRecommendations: non_invasive_recommendations = self.property.non_invasive_recommendations + # We check a specific condition - which will imply loft insulation isn't appropriate but room in roof + # insulation is + # 1) We have an uninsulated loft (assumed) + # 2) We have a non-intrusive recommendation for room in roof insulation + + rir_over_loft = ( + self.property.roof["is_pitched"] and + self.property.roof["insulation_thickness"] == "none" and + "room_in_roof_insulation" in [x["type"] for x in non_invasive_recommendations] + ) + # We firstly handle non-intrusive recommendations, which may override the normal roof insulation recommendations if ("loft_insulation" in [x["type"] for x in non_invasive_recommendations]) or ( self.property.roof["is_pitched"] and "loft_insulation" in measures and not self.property.roof["is_at_rafters"] - ): + ) and not rir_over_loft: self.recommend_roof_insulation( u_value=u_value, insulation_thickness=self.insulation_thickness, @@ -223,7 +234,8 @@ class RoofRecommendations: # There are cases where the property might have a room roof as the second roof, but we have a recommendation for # it, so we allow this override if self.property.roof["is_roof_room"] and ("room_roof_insulation" in measures) or ( - "room_roof_insulation" in [x["type"] for x in non_invasive_recommendations] + "room_roof_insulation" in [x["type"] for x in non_invasive_recommendations] or + rir_over_loft ): self.recommend_room_roof_insulation(u_value, phase, default_u_values) return @@ -502,7 +514,7 @@ class RoofRecommendations: # and the cost of the materials rir_non_invasive_recommendation = next( - (x for x in self.property.non_invasive_recommendations if x["type"] == "room_roof_insulation"), {} + (x for x in self.property.non_invasive_recommendations if x["type"] == "room_in_roof_insulation"), {} ) insulation_materials = pd.DataFrame(self.room_roof_insulation_materials) diff --git a/recommendations/VentilationRecommendations.py b/recommendations/VentilationRecommendations.py index a82e4df5..05113acf 100644 --- a/recommendations/VentilationRecommendations.py +++ b/recommendations/VentilationRecommendations.py @@ -10,11 +10,6 @@ class VentilationRecommendations(Definitions): crucial for prevent overheating risks in warmer months """ - VENTILATION_DESCRIPTIONS = [ - 'mechanical, extract only', - 'mechanical, supply and extract' - ] - def __init__( self, property_instance: Property, @@ -26,9 +21,6 @@ class VentilationRecommendations(Definitions): self.recommendation = None self.materials = [part for part in materials if part["type"] == "mechanical_ventilation"] - def identify_ventilation(self): - self.has_ventilaion = self.property.data["mechanical-ventilation"] in self.VENTILATION_DESCRIPTIONS - def recommend(self, phase): """ If there is no ventilation, we recommend installing ventilation @@ -38,8 +30,8 @@ class VentilationRecommendations(Definitions): :return: """ - self.identify_ventilation() - if self.has_ventilaion: + self.property.identify_ventilation() + if self.property.has_ventilation: return if len(self.materials) != 1: diff --git a/recommendations/optimiser/optimiser_functions.py b/recommendations/optimiser/optimiser_functions.py index 05b9ec42..6909a3f0 100644 --- a/recommendations/optimiser/optimiser_functions.py +++ b/recommendations/optimiser/optimiser_functions.py @@ -47,19 +47,19 @@ def prepare_input_measures(property_recommendations, goal, needs_ventilation): # We bundle the impact of ventilation with the measure total = ( rec["total"] + ventilation_recommendation["total"] - if rec["type"] in assumptions.measures_needing_ventilation + if rec["type"] in assumptions.measures_needing_ventilation and needs_ventilation else rec["total"] ) gain = ( rec[goal_key] + ventilation_recommendation[goal_key] - if rec["type"] in assumptions.measures_needing_ventilation + if rec["type"] in assumptions.measures_needing_ventilation and needs_ventilation else rec[goal_key] ) rec_type = ( "+".join( [rec["type"], ventilation_recommendation["type"]] - ) if rec["type"] in assumptions.measures_needing_ventilation + ) if rec["type"] in assumptions.measures_needing_ventilation and needs_ventilation else rec["type"] ) diff --git a/recommendations/tests/test_lighting_recommendations.py b/recommendations/tests/test_lighting_recommendations.py index dbb621e7..5fb914a8 100644 --- a/recommendations/tests/test_lighting_recommendations.py +++ b/recommendations/tests/test_lighting_recommendations.py @@ -49,6 +49,6 @@ class TestLightingRecommendations: 'lighting in all ' 'fixed outlets', 'low-energy-lighting': 100}, - 'total': 240.24, 'subtotal': 200.20000000000002, 'vat': 40.040000000000006, 'contingency': 14.3, - 'preliminaries': 14.3, 'material': 80.0, 'profit': 28.6, 'labour_hours': 3.2, 'labour_days': 0.4, - 'labour_cost': 63.0, 'survey': False}] + 'total': 188.76000000000002, 'subtotal': 157.3, 'vat': 31.460000000000004, 'contingency': 14.3, + 'material': 80.0, 'labour_hours': 3.2, 'labour_days': 0.4, 'labour_cost': 63.0, 'survey': False} + ] diff --git a/sfr/principal_pitch/0_prepare_sample.py b/sfr/principal_pitch/0_prepare_sample.py new file mode 100644 index 00000000..8150d519 --- /dev/null +++ b/sfr/principal_pitch/0_prepare_sample.py @@ -0,0 +1,71 @@ +""" +This is a script for preparing a sample for testing the end to end process, so that when Spring send us +data, we know it will work. +""" + +import pandas as pd +from utils.s3 import read_csv_from_s3 + +birmingham_epcs = pd.read_csv( + "/Users/khalimconn-kowlessar/Documents/hestia/sfr/Spring JV/domestic-E08000025-Birmingham/certificates.csv" +) + +# We get the newest EPC, by UPRN and LODGEMENT_DATE +birmingham_epcs['LODGEMENT_DATE'] = pd.to_datetime(birmingham_epcs['LODGEMENT_DATE']) + +birmingham_epcs = birmingham_epcs.sort_values( + by=['UPRN', 'LODGEMENT_DATE'], + ascending=[True, False] +).drop_duplicates(subset='UPRN') + +birmingham_epcs["postal_region"] = birmingham_epcs["POSTCODE"].str.split(" ").str[0] + +addressable_market = birmingham_epcs[ + (birmingham_epcs['CURRENT_ENERGY_RATING'].isin(['F', 'G', 'E', 'D'])) & + (birmingham_epcs['LODGEMENT_DATE'] >= '2020-01-01') & + (birmingham_epcs['PROPERTY_TYPE'].isin(['House', 'Bungalow'])) & + (birmingham_epcs['TENURE'].isin( + ['rental (private)', 'Rented (private)'] + )) + ] + +# We take the Spring portfolio and remove the properties in their sample +asset_list = read_csv_from_s3(bucket_name="retrofit-plan-inputs-dev", filepath='8/206/asset_list.csv') +asset_list = pd.DataFrame(asset_list) +asset_list["postal_region"] = asset_list["postcode"].str.split(" ").str[0] + +addressable_market = addressable_market[ + ~addressable_market["UPRN"].astype(int).astype(str).isin(asset_list["uprn"].values) +] +addressable_market = addressable_market[ + addressable_market["postal_region"].isin(asset_list["postal_region"].unique()) +] + +# Take a sample of properties, EPC F or G, EPC lodged in 2025. We focus on houses/bingalows +sample = birmingham_epcs[ + (birmingham_epcs['CURRENT_ENERGY_RATING'].isin(['F', 'G'])) & + (birmingham_epcs['LODGEMENT_DATE'] >= '2025-01-01') & + (birmingham_epcs['PROPERTY_TYPE'].isin(['House', 'Bungalow'])) + ] + +# Prepare the sample, with just the columns we would expect to receive from Spring +# 1) UPRN +# 2) Address +# 3) Postcode +# 4) Property type +# 5) Built form +# 6) Number of bedrooms (we'll simulate this) +# 7) Number of bathrooms (we'll simulate this) +# 8) Valuation (We'll simulate this, around 200,000) + +sample = sample[['UPRN', 'ADDRESS', 'POSTCODE', 'PROPERTY_TYPE', 'BUILT_FORM']].copy() +sample['BEDROOMS'] = 3 # Simulating number of bedrooms +sample['BATHROOMS'] = 1 # Simulating number of bathrooms +sample['VALUATION'] = 200000 # Simulating valuation +sample.columns = [x.lower() for x in sample.columns] + +# Store this as a excel +sample.to_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/sfr/Spring JV/birmingham_sample.xlsx", + index=False +) diff --git a/sfr/principal_pitch/1_prepare_data.py b/sfr/principal_pitch/1_prepare_data.py new file mode 100644 index 00000000..53969ec9 --- /dev/null +++ b/sfr/principal_pitch/1_prepare_data.py @@ -0,0 +1,124 @@ +""" +This script prepares the data for the principal pitch modelling +""" +import os +import pandas as pd +from dotenv import load_dotenv +from utils.s3 import save_csv_to_s3 +from etl.find_my_epc.AssetListEpcData import AssetListEpcData + +load_dotenv(dotenv_path="backend/.env") +EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN") +PORTFOLIO_ID = 206 +USER_ID = 8 +EPC_TARGET = "C" + +# Read the input file + +properties = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/sfr/Spring JV/Birmingham_price_top300.xlsx" +) +# Keep just the D's and below +properties = properties[properties["current_energy_rating"].isin(["D", "E", "F", "G"])].copy() +# Focus on houses +properties = properties[properties["property_type_std"] != "Flat"] +properties = properties[properties["property_type"] != "flat"] + +# Rename the key columns +properties = properties.rename( + columns={ + "address1": "address", + "number_of_bathrooms": "n_bathrooms", + "num_beds": "n_bedrooms" + } +) +properties["patch"] = True + +# Pull the non-invasive recommendations +asset_list_epc_client = AssetListEpcData( + asset_list=properties, + epc_auth_token=EPC_AUTH_TOKEN +) +asset_list_epc_client.get_data() +asset_list_epc_client.get_non_invasive_recommendations() +asset_list_epc_client.get_patch() + +extracted_df = pd.DataFrame(asset_list_epc_client.extracted_data) +epc_df = pd.DataFrame(asset_list_epc_client.epc_data) + +# Find examples where patches are different to the api +compare_epc = [] +for patch in asset_list_epc_client.patches: + extracted = extracted_df[extracted_df["uprn"] == patch["uprn"]].squeeze() + epc = epc_df[epc_df["uprn"] == patch["uprn"]].squeeze() + compare_epc.append( + { + "uprn": extracted["uprn"], + "address": extracted["address"], + "postcode": extracted["postcode"], + "api_epc": int(extracted["current_epc_efficiency"]), + "fme_epc": int(epc["current-energy-efficiency"]), + } + ) +compare_epc = pd.DataFrame(compare_epc) +diff = compare_epc[compare_epc["api_epc"] != compare_epc["fme_epc"]] +# Compare matched addresses to make sure they are the same +compare_addresses = extracted_df[["address", "postcode", "uprn"]].merge( + epc_df[["uprn", "address1", "postcode"]].rename(columns={"address1": "epc_address1", "postcode": "epc_postcode"}), + how="left", + on=["uprn"] +) + +# Add on uprn +properties = properties.merge( + extracted_df[["address", "postcode", "uprn"]], + how="left", + on=["address", "postcode"] +) + +# Store the asset list in s3 +filename = f"{USER_ID}/{PORTFOLIO_ID}/asset_list.csv" +save_csv_to_s3( + dataframe=properties, + bucket_name="retrofit-plan-inputs-dev", + file_name=filename +) + +# Store non-invasive recommendations in S3 +non_invasive_recommendations_filename = f"{USER_ID}/{PORTFOLIO_ID}/non_invasive_recommendations.csv" +save_csv_to_s3( + dataframe=pd.DataFrame(asset_list_epc_client.non_invasive_recommendations), + bucket_name="retrofit-plan-inputs-dev", + file_name=non_invasive_recommendations_filename +) + +# Store patches in S3 +patches_filename = "" +if asset_list_epc_client.patches: + patches_filename = f"{USER_ID}/{PORTFOLIO_ID}/patches.csv" + save_csv_to_s3( + dataframe=pd.DataFrame(asset_list_epc_client.patches), + bucket_name="retrofit-plan-inputs-dev", + file_name=patches_filename + ) + +body = { + "portfolio_id": str(PORTFOLIO_ID), + "housing_type": "Private", + "goal": "Increasing EPC", + "goal_value": "C", + "trigger_file_path": filename, + "already_installed_file_path": "", + "patches_file_path": patches_filename, + "non_invasive_recommendations_file_path": non_invasive_recommendations_filename, + "valuation_file_path": "", + "scenario_name": "EPC C", + "multi_plan": True, + "budget": None, + "ashp_cop": 3.5, + # This is new - when optimising, we drop scores by a few points to account for SAP 10 + "simulate_sap_10": True, + "exclusions": ["external_wall_insulation"], + "required_measures": ["cavity_wall_insulation", "loft_insulation"] +} +print(body) diff --git a/sfr/principal_pitch/2_export_data.py b/sfr/principal_pitch/2_export_data.py new file mode 100644 index 00000000..5660b78d --- /dev/null +++ b/sfr/principal_pitch/2_export_data.py @@ -0,0 +1,224 @@ +""" +This script prepares the data for the financial model +""" + +import pandas as pd +from backend.app.utils import sap_to_epc +from sqlalchemy.orm import sessionmaker +from backend.app.db.connection import db_engine +from backend.app.db.models.recommendations import Recommendation, Plan, PlanRecommendations +from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel + +PORTFOLIO_ID = 206 +SCENARIOS = [389] + + +def get_data(portfolio_id, scenario_ids): + session = sessionmaker(bind=db_engine)() + session.begin() + + # Get properties and their details for a specific portfolio + properties_query = session.query( + PropertyModel, + PropertyDetailsEpcModel + ).join( + PropertyDetailsEpcModel, PropertyModel.id == PropertyDetailsEpcModel.property_id + ).filter( + PropertyModel.portfolio_id == portfolio_id # Filter by portfolio ID + ).all() + + # Transform properties data to include all fields dynamically + properties_data = [ + {**{col.name: getattr(prop.PropertyModel, col.name) for col in PropertyModel.__table__.columns}, + **{col.name: getattr(prop.PropertyDetailsEpcModel, col.name) for col in + PropertyDetailsEpcModel.__table__.columns}} + for prop in properties_query + ] + + # Get property IDs from fetched properties + + # Get plans linked to the fetched properties + plans_query = session.query(Plan).filter(Plan.scenario_id.in_(scenario_ids)).all() + + # Transform plans data to include all fields dynamically + plans_data = [ + {col.name: getattr(plan, col.name) for col in Plan.__table__.columns} + for plan in plans_query + ] + + # Extract plan IDs for filtering recommendations through PlanRecommendations + plan_ids = [plan['id'] for plan in plans_data] + + # Get recommendations through PlanRecommendations for those plans and that are default + recommendations_query = session.query( + Recommendation, + Plan.scenario_id + ).join( + PlanRecommendations, Recommendation.id == PlanRecommendations.recommendation_id + ).join( + Plan, Plan.id == PlanRecommendations.plan_id # Join with Plan to access scenario_id + ).filter( + PlanRecommendations.plan_id.in_(plan_ids), + Recommendation.default == True # Filtering for default recommendations + ).all() + + # Transform recommendations data to include all fields dynamically and include scenario_id + recommendations_data = [ + {**{col.name: getattr(rec.Recommendation, col.name) if hasattr(rec, 'Recommendation') else getattr(rec, + col.name) for + col in Recommendation.__table__.columns}, + "Scenario ID": rec.scenario_id} + for rec in recommendations_query + ] + + session.close() + + return properties_data, plans_data, recommendations_data + + +properties_data, plans_data, recommendations_data = get_data(portfolio_id=PORTFOLIO_ID, scenario_ids=SCENARIOS) + +properties_df = pd.DataFrame(properties_data) +plans_df = pd.DataFrame(plans_data) +recommendations_df = pd.DataFrame(recommendations_data) + +recommended_measures_df = recommendations_df[ + ["property_id", "measure_type", "estimated_cost", "default"] +] +recommended_measures_df = recommended_measures_df[recommended_measures_df["default"]] +recommended_measures_df = recommended_measures_df.drop(columns=["default"]) + +post_install_sap = recommendations_df[["property_id", "default", "sap_points"]] +post_install_sap = post_install_sap[post_install_sap["default"]] +# Sum up the sap points by property id +post_install_sap = post_install_sap.groupby("property_id")[["sap_points"]].sum().reset_index() + +recommendations_measures_pivot = recommended_measures_df.pivot( + index='property_id', + columns='measure_type', + values='estimated_cost' +) +recommendations_measures_pivot = recommendations_measures_pivot.reset_index() + +# Total cost is the row sum, excluding the property_id column +recommendations_measures_pivot["total_retrofit_cost"] = recommendations_measures_pivot.drop( + columns=["property_id"] +).sum(axis=1) + +df = properties_df[ + [ + "property_id", "uprn", "address", "postcode", "property_type", "walls", "roof", "heating", "windows", + "current_epc_rating", + "current_sap_points", "total_floor_area", "number_of_rooms", + ] +].merge( + recommendations_measures_pivot, how="left", on="property_id" +).merge( + post_install_sap, how="left", on="property_id" +) + +df = df.drop(columns=["property_id"]) +df["sap_points"] = df["sap_points"].fillna(0) + +df["predicted_post_works_sap"] = df["current_sap_points"] + df["sap_points"] +df["predicted_post_works_sap"] = df["predicted_post_works_sap"].round() +df["predicted_post_works_epc"] = df["predicted_post_works_sap"].apply(lambda x: sap_to_epc(x)) + +# We merge this back to the main dataframe, which will contain the bathrooms +from utils.s3 import read_csv_from_s3 + +asset_list = read_csv_from_s3(bucket_name="retrofit-plan-inputs-dev", filepath='8/206/asset_list.csv') +asset_list = pd.DataFrame(asset_list) +df["uprn"] = df["uprn"].astype(str) +asset_list = asset_list.merge( + df.drop(columns=["address", "postcode", "property_type", "total_floor_area"]), + how="left", + on="uprn" +) + +condition_costs = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/sfr/Spring JV/Condition costs.xlsx", + sheet_name="Prices - Khalim", + header=35 +) +# Remove unnamed columns and reset index +condition_costs = condition_costs.loc[:, ~condition_costs.columns.str.contains('^Unnamed')] +condition_costs = condition_costs.reset_index(drop=True) + + +# We now estimate condition cost +def simulate_condition(asset_list, condition_costs): + """ + This function is for testing, and will simulate condition cost from 1-10 for each property to see what the + costing array looks like. + :param df: + :return: + """ + + condition_df = [] + for _, row in asset_list.iterrows(): + + n_bathrooms = row["bathrooms"] + + conditions = {} + for condition in reversed(range(1, 11)): + condition_cost = condition_costs[ + condition_costs["Condition"] == condition + ].drop(columns=["Condition"]).iloc[0] + + # Each cost is scaled by floor area + condition_cost = condition_cost * row["total_floor_area"] + condition_cost["Bathroom"] = condition_cost["Bathroom"] * n_bathrooms + + total_condition_cost = condition_cost.sum() + conditions["Condition " + str(condition)] = (total_condition_cost) + + condition_df.append( + { + "uprn": row["uprn"], + **conditions + } + ) + + condition_df = pd.DataFrame(condition_df) + + asset_list = asset_list.merge( + condition_df, + how="left", + on="uprn" + ) + + return asset_list + + +# asset_list = simulate_condition(asset_list, condition_costs) + +# We calculate the condition cost based on the condition +for _, row in asset_list.iterrows(): + + condition = row["condition_score"] + if condition in [None, ""]: + continue + condition = int(float(condition)) + + condition_cost = condition_costs[ + condition_costs["Condition"] == condition + ].drop(columns=["Condition"]).iloc[0] + + # Each cost is scaled by floor area + condition_cost = condition_cost * float(row["total_floor_area"]) + n_bathrooms = row["n_bathrooms"] + condition_cost["Bathroom"] = condition_cost["Bathroom"] * float(n_bathrooms) + + total_condition_cost = condition_cost.sum() + asset_list.loc[asset_list["uprn"] == row["uprn"], "domna_condition_cost"] = total_condition_cost + +# Store output +asset_list.to_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/sfr/Spring JV/20250624_portfolio_retrofit_packages.xlsx", + index=False +) + +condition_cost_comparison = asset_list[ + ["condition_score", "decoration_sum_min ", "decoration_sum_max", "domna_condition_cost"] +]