diff --git a/.idea/Model.iml b/.idea/Model.iml index 09f2e496..c6561970 100644 --- a/.idea/Model.iml +++ b/.idea/Model.iml @@ -7,7 +7,7 @@ - + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml index fb10c6b0..50cad4ca 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -3,7 +3,7 @@ - + diff --git a/asset_list/AssetList.py b/asset_list/AssetList.py index ad3087c3..21376708 100644 --- a/asset_list/AssetList.py +++ b/asset_list/AssetList.py @@ -301,6 +301,14 @@ class AssetList: "Potential unsafe environment", "Date of Inspection", "Borescoped?" ] + # Another version of non-intrusives: + NON_INTRUSIVES_NEW_FORMAT_COLNAMES_V2 = [ + 'Archetype', 'Archetype 2', 'Construction', 'Insulated', 'Material', 'Boroscoped?', + 'CIGA Check Required', 'ROOF ORIENTATION', 'TILE HUNG', 'RENDERED', + 'CLADDING', 'ACCESS ISSUES', 'FURTHER SURVEYOR NOTES', 'DATE', + 'NAME OF SURVEYOR' + ] + NON_INTRUSIVES_ELIGIBILITY_COLUMN = "Eligibility (Red/Yellow/Green)" OLD_FORMAT_NON_INTRUSIVE_COLNAMES = ['WFT Findings', 'ECO Eligibility'] @@ -442,6 +450,8 @@ class AssetList: self.non_intrusives_present = "CIGA Check Required" in self.raw_asset_list.columns # We detect if we have the old format of non-intruvies self.old_format_non_intrusives_present = "WFT Findings" in self.raw_asset_list.columns + if self.old_format_non_intrusives_present: + self.non_intrusives_present = False self.non_intrusives_eligibility = "Eligibility (Red/Yellow/Green)" in self.raw_asset_list.columns @@ -449,6 +459,8 @@ class AssetList: "Has the property been re-walled?" in self.raw_asset_list.columns ) + self.new_format_non_insturives_present_v2 = 'TILE HUNG' in self.raw_asset_list.columns + # Names of columns self.landlord_property_id = landlord_property_id self.address1_colname = address1_colname @@ -750,7 +762,7 @@ class AssetList: self.rename_map = {k: v for k, v in self.rename_map.items() if k is not None} non_intrusive_columns = [] - if self.non_intrusives_present: + if self.non_intrusives_present and not self.new_format_non_insturives_present_v2: non_intrusive_columns = self.NON_INTRUSIVES_COLNAMES if self.non_intrusives_eligibility: @@ -759,6 +771,9 @@ class AssetList: if self.new_format_non_insturives_present: non_intrusive_columns += self.NON_INTRUSIVES_NEW_FORMAT_COLNAMES + if self.new_format_non_insturives_present_v2: + non_intrusive_columns += self.NON_INTRUSIVES_NEW_FORMAT_COLNAMES_V2 + if self.old_format_non_intrusives_present: # We check if we have the ECO Eligibility column, which we might not have non_intrusive_columns = [ @@ -827,52 +842,44 @@ class AssetList: # We attempt to convert the year built to a datetime, by detecting the format and converting def extract_year(date_str): - """ - Extracts the year from a date string in the format '01-Jul-YYYY'. - Returns the extracted year as an integer or None if the format is incorrect. - """ - known_errors = [ + known_errors = { "#MULTIVALUE", + "ND", + "PIMSS EMPTY", + "UNKNOWN", "This cell has an external reference that can't be shown or edited. Editing this cell will " "remove the external reference.", - "ND", - 'PIMSS EMPTY', - "UNKNOWN" - ] + 0 + } - if pd.isnull(date_str) or date_str in known_errors or (date_str == 0): + if pd.isnull(date_str) or date_str in known_errors: return None - if isinstance(date_str, str): - match = re.match(r"\d{1,2}-[A-Za-z]{3}-(\d{4})", date_str) - if match: - return int(match.group(1)) # Extract the year and convert to integer - if "-" in date_str: - - # Count the number of times we have "-", as we've seen double ranges - # (when we have extensions) so the format is like this: - # 'G: 1983-1990, H: 1991-1995' - if date_str.count("-") == 2: - # We have a range - return int(date_str.split("-")[1].split(",")[0]) - # We probably have a range - return int(date_str.split("-")[1].strip()) - + # Handle datetime if isinstance(date_str, datetime): return date_str.year - if isinstance(date_str, float): - if str(int(date_str)).isdigit() & (len(str(int(date_str))) == 4): + # Handle numeric year (float or int) + if isinstance(date_str, (int, float)): + if 1000 <= int(date_str) <= 2100: return int(date_str) - # Check if date_str is a year itself - if str(date_str).isdigit() & (len(str(date_str)) == 4): - return int(date_str) + # Now handle string-based logic + if isinstance(date_str, str): + # Direct date match e.g. 01-Jul-2021 + match = re.match(r"\d{1,2}-[A-Za-z]{3}-(\d{4})", date_str) + if match: + return int(match.group(1)) - # Remove any non-numeric characters - date_str = re.sub(r"\D", "", str(date_str)) - if str(date_str).isdigit() & (len(str(date_str)) == 4): - return int(date_str) + # Find all 4-digit years in string + years = [int(y) for y in re.findall(r"\b(?:19|20)\d{2}\b", date_str)] + if years: + return max(years) # Return most recent year + + # If only numbers are present without format + numeric_str = re.sub(r"\D", "", date_str) + if len(numeric_str) == 4 and numeric_str.isdigit(): + return int(numeric_str) raise NotImplementedError(f"Unhandled format for year built, value is {date_str} - implement me") @@ -1104,7 +1111,7 @@ class AssetList: num_floors=x[self.ATTRIBUTE_NUMBER_OF_FLOORS], floor_height=( float(x[self.EPC_API_DATA_NAMES["floor-height"]]) if - x[self.EPC_API_DATA_NAMES["floor-height"]] else 2.5 + not pd.isnull(x[self.EPC_API_DATA_NAMES["floor-height"]]) else 2.5 ), perimeter=x[self.ATTRIBUTE_ESTIMATED_PERIMETER], built_form=x[self.EPC_API_DATA_NAMES["built-form"]] @@ -1315,10 +1322,16 @@ class AssetList: # Before we being, we identify if a property has solar already as we use this # for identifying cavity jobs - if self.non_intrusives_present: - existing_solar_non_intrusives_check = ( - self.standardised_asset_list["non-intrusives: PV, ACCESS ISSUE, SEE NOTES"] == "SOLAR PV ON ROOF" - ) + if self.non_intrusives_present and not self.old_format_non_intrusives_present: + + if self.new_format_non_insturives_present_v2: + existing_solar_non_intrusives_check = ( + self.standardised_asset_list["non-intrusives: ROOF ORIENTATION"] == "ALREADY HAS SOLAR PV" + ) + else: + existing_solar_non_intrusives_check = ( + self.standardised_asset_list["non-intrusives: PV, ACCESS ISSUE, SEE NOTES"] == "SOLAR PV ON ROOF" + ) elif self.old_format_non_intrusives_present: existing_solar_non_intrusives_check = ( self.standardised_asset_list["non-intrusives: WFT Findings"].str.lower().str.strip().isin( @@ -1557,7 +1570,7 @@ class AssetList: ) & ( ~self.standardised_asset_list[self.STANDARD_HEATING_SYSTEM].isin( ["district heating", "communal heating", "communal gas boiler"] - ) & ~self.standardised_asset_list[self.STANDARD_HEATING_SYSTEM].str.contains("gas ") + ) & ~self.standardised_asset_list[self.STANDARD_HEATING_SYSTEM].astype(str).str.contains("gas ") ) ) @@ -1596,12 +1609,17 @@ class AssetList: # With this in mind, we look for 2 clases # 1) The property is fully insulated apart from the loft (<200mm insulation) # 2) THe property is fully insulated - - print("Should we include cavity properties where they might be uninsulated?") self.standardised_asset_list["solar_landlord_walls_insulated"] = ( self.standardised_asset_list[self.STANDARD_WALL_CONSTRUCTION].isin( [ - "filled cavity", "insulated solid brick", "insulated timber frame", + "filled cavity", + "insulated solid brick", + "insulated timber frame", + "uninsulated cavity", + "insulated system built", + "insulated granite or whinstone", + "insulated sandstone or limestone", + "new build - average thermal transmittance" ] ) ) @@ -1999,24 +2017,6 @@ class AssetList: self.standardised_asset_list[col] ) - blocks_of_flats = self.standardised_asset_list[ - self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] == "block of flats" - ] - - non_blocks_of_flats = self.standardised_asset_list[ - self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] != "block of flats" - ] - - # Produce some aggregate figures - self.work_type_figures = { - **non_blocks_of_flats["cavity_reason"].value_counts().to_dict(), - **{ - k + " (Block of flats)": v for k, v in - blocks_of_flats["solar_reason"].value_counts().to_dict().items() - }, - **self.standardised_asset_list["solar_reason"].value_counts().to_dict() - } - # We prepare outcomes for output if self.outcomes is not None: logger.info("Preparing outcomes for output") @@ -2047,6 +2047,26 @@ class AssetList: ) ) + def get_work_figures(self): + blocks_of_flats = self.standardised_asset_list[ + self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] == "block of flats" + ] + + non_blocks_of_flats = self.standardised_asset_list[ + self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] != "block of flats" + ] + + # Produce some aggregate figures + self.work_type_figures = { + **non_blocks_of_flats["cavity_reason"].value_counts().to_dict(), + **{ + k + " (Block of flats)": v for k, v in + blocks_of_flats["solar_reason"].value_counts().to_dict().items() + }, + **self.standardised_asset_list["solar_reason"].value_counts().to_dict() + } + pprint(self.work_type_figures) + def fill_landlord_block_reference(self, has_blocks_of_flats): if not has_blocks_of_flats: return @@ -2082,15 +2102,40 @@ class AssetList: for _, row in blocks.iterrows(): addr = str(row[self.STANDARD_ADDRESS_1]) + full_addr = row[self.STANDARD_FULL_ADDRESS] + + # We also look for terms like "Odd", "even", "all" in the address to indicate if it should be just + # the odds, evens or all of the numbers + has_odd = ( + "(odd)" in addr.lower() or + "(odd)" in full_addr.lower() or + "(odds)" in addr.lower() or + "(odds)" in full_addr.lower() + ) + has_even = ( + "(even)" in addr.lower() or + "(even)" in full_addr.lower() or + "(evens)" in addr.lower() or + "(evens)" in full_addr.lower() + ) # 1 ─ Range (e.g. 1-7) m_range = RANGE_RE.search(addr) if m_range: + start, end = m_range.groups() start, end = int(re.match(r'\d+', start)[0]), int(re.match(r'\d+', end)[0]) if start > end or (end - start) > 100: raise ValueError(f"Suspicious range '{addr}'") - for n in range(start, end + 1): + + # We define the looping range on whether we have odd, even or all numbers + house_number_range = range(start, end + 1) + if has_odd: + house_number_range = [x for x in house_number_range if x % 2 != 0] + if has_even: + house_number_range = [x for x in house_number_range if x % 2 == 0] + + for n in house_number_range: new = row.copy() new_addr = RANGE_RE.sub(str(n), addr, count=1) original_full_address = new[self.STANDARD_FULL_ADDRESS] @@ -2108,9 +2153,9 @@ class AssetList: expanded_rows.append(new) continue - # 2 ─ Explicit list (e.g. 1, 2, 5 Block) + # 2 ─ Explicit list (e.g. 1, 2, 5 Block) or split by an ampersand (e.g. 1 & 2 Block) nums = NUM_RE.findall(addr) - if len(nums) > 1 and ',' in addr: + if len(nums) > 1 and (',' in addr or '&' in addr): for n in nums: new = row.copy() new_addr = re.sub(NUM_RE, n, addr, count=1) # replace the first number only @@ -2320,7 +2365,7 @@ class AssetList: self.standardised_asset_list["cavity_reason"] = np.where( self.standardised_asset_list[self.STANDARD_BLOCK_REFERENCE].isin(eligible_blocks), self.standardised_asset_list["cavity_reason"] - + " " + "(Flat in block with more than 50% eligible, but not eligible itself)", + + " " + "(Flat in block with more than 50% eligible)", self.standardised_asset_list["cavity_reason"] ) @@ -2375,6 +2420,11 @@ class AssetList: none_details = [x for x in details_colnames if x is None] details_colnames = [x for x in details_colnames if x is not None] + if local_filepath is None: + # Create an empty DataFrame based on the fields in self.contact_detail_fields + self.contact_details = pd.DataFrame(columns=list(self.contact_detail_fields.keys())) + return + contact_details = pd.read_excel( local_filepath, sheet_name=sheet_name )[[self.contact_detail_fields["landlord_property_id"]] + details_colnames] @@ -2486,10 +2536,14 @@ class AssetList: if reconcile_programme: programme_data = programme_data[~pd.isnull(programme_data["project_code"])] else: + + if programme_data["hubspot_status"].nunique() > 1: + logger.info("Multiple hubspot_status found - are you sure you don't want to reconcile the programme?") + ready_to_be_scheduled = ( ( programme_data["hubspot_status"] == hubspot_config.HubspotProcessStatus.READY_TO_BE_SCHEDULED.label - ) & (~pd.isnull(programme_data["survey_date"])) + ) ) # completed_works = ( # (programme_data["hubspot_status"] != @@ -2540,13 +2594,13 @@ class AssetList: ) else: # We shouldn't have any missing products - programme_data = programme_data[ - ~pd.isnull(programme_data["survey_date"]) - ] + # programme_data = programme_data[ + # ~pd.isnull(programme_data["survey_date"]) + # ] if pd.isnull(programme_data["domna_product"]).sum(): raise ValueError("Missing products") - programme_data = programme_data.drop(columns=["solar_product", "cavity_product"]) + programme_data = programme_data.drop(columns=["solar_product", "cavity_product"]) product_df = ( pd.DataFrame(self.CRM_PRODUCTS).T[["name", "id", "unit_price"]] @@ -2587,6 +2641,13 @@ class AssetList: programme_data[self.EPC_API_DATA_NAMES["uprn"]] ) + # Remove any negative URPSN which are not valid + programme_data[uprn_column] = np.where( + programme_data["estimated"].isin([1, True]), + None, + programme_data[uprn_column] + ) + # Add in some columns if we have them date_of_inspections = ( "Non-Intrusives: Date of Inspection" if @@ -2753,6 +2814,7 @@ class AssetList: columns={v: k for k, v in schema_mappings.items() if v is not None} ) + programme_data['Postcode '] = programme_data['Postcode '].copy() programme_data['Installer '] = installer_name programme_data['Name '] = ( programme_data['Full Address '] + " ," + programme_data['Postcode '] @@ -2951,7 +3013,7 @@ class AssetList: outcomes["row_id"] = outcomes.index if outcomes_houseno[idx] is None: - outcomes_houseno = "houseno" + outcomes_houseno[idx] = "houseno" outcomes["houseno"] = outcomes[outcomes_address[idx]].apply( lambda x: SearchEpc.get_house_number(x, outcomes[outcomes_postcode]) ) @@ -3219,12 +3281,21 @@ class AssetList: install_col = "INSTALL / CANCELLATION DATE" elif 'INSTALL/ CANCELLATION DATE' in master_data.columns: install_col = 'INSTALL/ CANCELLATION DATE' + elif "INSTALL/CANCELLATION DATE" in master_data.columns: + install_col = "INSTALL/CANCELLATION DATE" + elif 'Measure 1 Install Date' in master_data.columns: + install_col = 'Measure 1 Install Date' else: raise ValueError("No install or cancellation date") - submission_col = ( - "SUBMISSION DATE" if "SUBMISSION DATE" in master_data.columns else "SUBMISSION DATE TO INSTALLERS" - ) + if "SUBMISSION DATE" in master_data.columns: + submission_col = "SUBMISSION DATE" + elif "SUBMISSION DATE TO INSTALLERS" in master_data.columns: + submission_col = "SUBMISSION DATE TO INSTALLERS" + elif "Submission Date" in master_data.columns: + submission_col = "Submission Date" + else: + raise ValueError("No submission date column found in master data") master_data["row_id"] = master_data.index @@ -3239,6 +3310,10 @@ class AssetList: scheme_col = "AFFORDABLE WARMTH OR EPC FOR HOUSING ASSOCIATION" elif "AFFORDABLE WARMTH" in master_data.columns: scheme_col = "AFFORDABLE WARMTH" + elif "Scheme" in master_data.columns: + scheme_col = "Scheme" + elif "Affordable Warmth" in master_data.columns: + scheme_col = "Affordable Warmth" else: scheme_col = "OFFICE USE ONLY" @@ -3254,12 +3329,30 @@ class AssetList: property_type_col = "PROPERTY TYPE As per table emailed" elif "PROPERTY TYPE As per table emailed" in master_data.columns: property_type_col = "PROPERTY TYPE As per table emailed" + elif "PROPERTY TYPE" in master_data.columns: + property_type_col = "PROPERTY TYPE" else: property_type_col = "PROPERTY TYPE (SEE DEEMED SCORES SHEET) Eg. 3W_Flat_1 (As per Matrix)" + if "INSTALLERS NOTES ; REASONS FOR CANCELLATIONS" in master_data.columns: + installer_notes_col = "INSTALLERS NOTES ; REASONS FOR CANCELLATIONS" + elif "INSTALLERS NOTES" in master_data.columns: + installer_notes_col = "INSTALLERS NOTES" + elif 'Installers Notes' in master_data.columns: + installer_notes_col = 'Installers Notes' + elif 'NOTES ; REASONS FOR CANCELLATIONS OR WHERE INSTALL DATE WAS OBTAINED FROM' in master_data.columns: + installer_notes_col = 'NOTES ; REASONS FOR CANCELLATIONS OR WHERE INSTALL DATE WAS OBTAINED FROM' + else: + raise ValueError("No installer notes column found in master data") + + if "INSTALLER" in master_data.columns: + installer_col = "INSTALLER" + elif "Installer" in master_data.columns: + installer_col = "Installer" + else: + raise ValueError("No installer column found in master data") + measure_mix_col = "MEASURE COMBO" - installer_notes_col = "INSTALLERS NOTES ; REASONS FOR CANCELLATIONS" - installer_col = "INSTALLER" town_colname = "TOWN" if "TOWN" in master_data.columns else 'Town/Area' logger.info("Matching master data to asset list") @@ -3301,6 +3394,10 @@ class AssetList: ] house_no = row[house_no_col] + + if pd.isnull(house_no): + house_no = None + if isinstance(house_no, (float, int)): house_no = str(int(house_no)) @@ -3401,6 +3498,9 @@ class AssetList: master_data[measure_mix_col] = "Measure mix not recorded" matched = pd.DataFrame(matched) + if matched.empty: + continue + master_to_append = master_data[ [scheme_col, "row_id", install_col, submission_col, measure_mix_col, installer_notes_col, installer_col] ].merge( diff --git a/asset_list/abs_estimates.py b/asset_list/abs_estimates.py new file mode 100644 index 00000000..58adcca6 --- /dev/null +++ b/asset_list/abs_estimates.py @@ -0,0 +1,229 @@ +""" +Simple script to take a standardised asset list and calculate the abs. We'll use this code to estimate +the ABS for properties, going forward +""" +import os +import pandas as pd +import numpy as np +from dotenv import load_dotenv +from etl.find_my_epc.AssetListEpcData import AssetListEpcData +from backend.Funding import Funding +from backend.app.utils import sap_to_epc + +load_dotenv(dotenv_path="backend/.env") +EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN") + +asset_list = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Instagroup Review/Livewest South-West - Standardised V2.xlsx", + sheet_name="Cavity Route (Insta Review)" +) + +abs_matrix = pd.read_csv( + "/Users/khalimconn-kowlessar/Downloads/ECO4 Full Project Scores Matrix.csv" +) +pps_matrix = pd.read_excel( + "/Users/khalimconn-kowlessar/Downloads/ECO4 Partial Project Scores Matrix v5.xlsx", + header=1 +) +pps_matrix.columns = [c.strip() for c in pps_matrix.columns] + +# We need to estimate the number of points the work will produce and the finishing band. For this, we assume 7 for +# cavity and 15 for solar. We'll be more specific in the future, but for now, this is a good enough estimate. +route = asset_list[["domna_address_1", "domna_postcode", "epc_os_uprn"]].rename( + columns={"domna_address_1": "address", "domna_postcode": "postcode", "epc_os_uprn": "upr"} +) +route["address"] = route["address"].astype(str) + +asset_list_epc_client = AssetListEpcData( + asset_list=route, + epc_auth_token=EPC_AUTH_TOKEN +) + +asset_list_epc_client.get_data() +asset_list_epc_client.get_non_invasive_recommendations() + +solar_sap_points = [] +for r in asset_list_epc_client.non_invasive_recommendations: + if not r.get("recommendations"): + continue + solar_recommendations = [ + x for x in r["recommendations"] if "solar_pv" in x["type"] + ] + if solar_recommendations: + solar_recommendations = solar_recommendations[0] + else: + continue + + address = r["address"] + postcode = r["postcode"] + + solar_sap_points.append( + { + "address": address, + "postcode": postcode, + "sap_points": solar_recommendations["sap_points"] + } + ) + +solar_sap_points = pd.DataFrame(solar_sap_points) +solar_sap_points.drop_duplicates(subset=["address", "postcode"], inplace=True) +# Store the sap points in the cavity route to csv +# cwi_sap_points.to_csv( +# "/Users/khalimconn-kowlessar/Documents/hestia/Instagroup Review/cwi_sap_points_livewest_sw.csv", +# index=False +# ) + +avg_solar_points_by_postcode = solar_sap_points.groupby(["postcode"]).agg({"sap_points": "mean"}).reset_index() +avg_solar_points = solar_sap_points["sap_points"].median() +asset_list["domna_address_1"] = asset_list["domna_address_1"].astype(str) +asset_list = asset_list.merge( + solar_sap_points, how="left", left_on=["domna_address_1", "domna_postcode"], right_on=["address", "postcode"] +).drop( + columns=["address", "postcode"] +) + +# Fill the sap points with the average cwi points +asset_list = asset_list.merge( + avg_solar_points_by_postcode.rename(columns={"postcode": "domna_postcode"}), + how="left", on=["domna_postcode"], suffixes=("", "_avg") +) +asset_list["sap_points"] = asset_list["sap_points"].fillna(asset_list["sap_points_avg"]) +asset_list.drop(columns=["sap_points_avg"], inplace=True) + +asset_list["sap_points"] = asset_list["sap_points"].fillna(avg_solar_points) +asset_list["post_works_sap"] = asset_list["epc_sap_score_on_register"] + asset_list["sap_points"] +asset_list["post_works_epc"] = asset_list["post_works_sap"].apply(lambda x: sap_to_epc(x)) +asset_list["starting_half_band"] = asset_list["epc_sap_score_on_register"].apply(lambda x: Funding.get_sap_band(x)) +asset_list["ending_half_band"] = asset_list["post_works_sap"].apply(lambda x: Funding.get_sap_band(x)) +asset_list["floor_area_band"] = asset_list["epc_total_floor_area"].apply(lambda x: Funding.get_floor_area_band(x)) + +asset_list["ending_half_band"] = np.where( + (asset_list["post_works_epc"] == asset_list["epc_rating_on_register"]), + "Low_C", + asset_list["ending_half_band"] +) +# Realistically, we'll take the properties to a low C at worst +asset_list["ending_half_band"] = np.where( + (asset_list["post_works_sap"] < 69), + "Low_C", + asset_list["ending_half_band"] +) + +asset_list = asset_list.merge( + abs_matrix, how="left", left_on=["starting_half_band", "ending_half_band", "floor_area_band"], + right_on=['Starting Band', 'Finishing Band', 'Floor Area Segment', ] +) +asset_list = asset_list.drop(columns=['Starting Band', 'Finishing Band', 'Floor Area Segment']) + +asset_list = asset_list.rename( + columns={"Cost Savings": "funding_abs"} +) + +print(asset_list["domna_property_id"].duplicated().sum()) + +# Store this data +asset_list.to_csv( + "/Users/khalimconn-kowlessar/Documents/hestia/Instagroup Review/livewest_sw_solar_abs_estimates-solar.csv", + index=False +) + +# Cavity process! +# cwi_sap_points = [] +# for r in asset_list_epc_client.non_invasive_recommendations: +# if not r.get("recommendations"): +# continue +# cwi_recommendations = [ +# x for x in r["recommendations"] if "cavity_wall_insulation" in x["type"] +# ] +# if cwi_recommendations: +# cwi_recommendations = cwi_recommendations[0] +# else: +# continue +# +# address = r["address"] +# postcode = r["postcode"] +# +# cwi_sap_points.append( +# { +# "address": address, +# "postcode": postcode, +# "sap_points": cwi_recommendations["sap_points"] +# } +# ) +# +# cwi_sap_points = pd.DataFrame(cwi_sap_points) +# cwi_sap_points = pd.read_csv( +# "/Users/khalimconn-kowlessar/Documents/hestia/Instagroup Review/cwi_sap_points_livewest_sw.csv" +# ) +# cwi_sap_points.drop_duplicates(subset=["address", "postcode"], inplace=True) +avg_cwi_points_by_postcode = cwi_sap_points.groupby(["postcode"]).agg({"sap_points": "mean"}).reset_index() +avg_cwi_points = cwi_sap_points["sap_points"].median() +asset_list = asset_list.merge( + cwi_sap_points, how="left", left_on=["domna_address_1", "domna_postcode"], right_on=["address", "postcode"] +).drop( + columns=["address", "postcode"] +) + +# Fill the sap points with the average cwi points +asset_list = asset_list.merge( + avg_cwi_points_by_postcode.rename(columns={"postcode": "domna_postcode"}), + how="left", on=["domna_postcode"], suffixes=("", "_avg") +) +asset_list["sap_points"] = asset_list["sap_points"].fillna(asset_list["sap_points_avg"]) +asset_list.drop(columns=["sap_points_avg"], inplace=True) + +asset_list["sap_points"] = asset_list["sap_points"].fillna(avg_cwi_points) +asset_list["post_works_sap"] = asset_list["epc_sap_score_on_register"] + asset_list["sap_points"] +asset_list["post_works_epc"] = asset_list["post_works_sap"].apply(lambda x: sap_to_epc(x)) +asset_list["starting_half_band"] = asset_list["epc_sap_score_on_register"].apply(lambda x: Funding.get_sap_band(x)) +asset_list["ending_half_band"] = asset_list["post_works_sap"].apply(lambda x: Funding.get_sap_band(x)) +asset_list["floor_area_band"] = asset_list["epc_total_floor_area"].apply(lambda x: Funding.get_floor_area_band(x)) + +asset_list["funding_scheme"] = np.where( + ( + (asset_list["post_works_epc"] == asset_list["epc_rating_on_register"]) + ), + "GBIS", + "ECO4" +) +asset_list = asset_list.merge( + abs_matrix, how="left", left_on=["starting_half_band", "ending_half_band", "floor_area_band"], + right_on=['Starting Band', 'Finishing Band', 'Floor Area Segment', ] +) +asset_list = asset_list.drop(columns=['Starting Band', 'Finishing Band', 'Floor Area Segment']) + +# Using CWI solid 1.7 -> 0.3 rates +cwi_pps_matrix = pps_matrix[ + pps_matrix["Measure_Type"].isin(["CWI_0.033"]) +] +# Merge on +asset_list = asset_list.merge( + cwi_pps_matrix[['Starting Band', 'Total Floor Area Band', 'Cost Savings']].rename( + columns={ + "Cost Savings": "partial_project_score", + "Starting Band": "starting_half_band", + "Total Floor Area Band": "floor_area_band" + } + ), + how="left", + on=["starting_half_band", "floor_area_band"], +) +asset_list["partial_project_score"] = np.where( + (asset_list["epc_sap_score_on_register"] > 69), + None, + asset_list["partial_project_score"] +) + +asset_list["funding_abs"] = np.where( + asset_list["funding_scheme"] == "GBIS", + asset_list["partial_project_score"], + asset_list["Cost Savings"] +) + +asset_list["domna_property_id"].duplicated().sum() + +# Store this data +asset_list.to_csv( + "/Users/khalimconn-kowlessar/Documents/hestia/Instagroup Review/livewest_sw_abs_estimates.csv", + index=False +) diff --git a/asset_list/app.py b/asset_list/app.py index 7c0023ce..e431f723 100644 --- a/asset_list/app.py +++ b/asset_list/app.py @@ -1,7 +1,6 @@ import os import json import pandas as pd -from pprint import pprint from asset_list.AssetList import AssetList from asset_list.mappings.property_type import PROPERTY_MAPPING from asset_list.mappings.built_form import BUILT_FORM_MAPPINGS @@ -60,39 +59,370 @@ def app(): Property UPRN """ - # NCHA - data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/NCHA" - data_filename = "Energy Information MASTER June 2025.xlsx" - sheet_name = "Data" + # CDS + data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/CDS" + data_filename = "Founder Estates - Asset List.xlsx" + sheet_name = "Combined" postcode_column = 'Postcode' fulladdress_column = "Address" address1_column = None address1_method = "house_number_extraction" address_cols_to_concat = [] missing_postcodes_method = None - landlord_year_built = "Build Date (HAR10)" + landlord_year_built = None landlord_os_uprn = None - landlord_property_type = "Property Type (HAR10)" - landlord_built_form = "Build Form (EPC)" - landlord_wall_construction = "Wall Description" - landlord_roof_construction = None - landlord_heating_system = "HEAT Code" + landlord_property_type = None + landlord_built_form = None + landlord_wall_construction = None + landlord_heating_system = "Heating Type" landlord_existing_pv = None - landlord_property_id = "Place ref" - landlord_sap = "EPC SAP" - outcomes_filename = None - outcomes_sheetname = None - outcomes_postcode = None - outcomes_houseno = None - outcomes_id = None - outcomes_address = None + landlord_property_id = "Row ID" + outcomes_filename = [] + outcomes_sheetname = [] + outcomes_postcode = [] + outcomes_houseno = [] + outcomes_address = [] + outcomes_id = [] master_filepaths = [] master_to_asset_list_filepath = None - phase = False - ecosurv_landlords = None asset_list_header = 0 landlord_block_reference = None master_id_colnames = [] + landlord_roof_construction = None + phase = False + landlord_sap = None + ecosurv_landlords = None + + # Plus Dane + data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Plus Dane/New Programme July 2025/" + data_filename = "20250711 Plus Dane Asset List.xlsx" + sheet_name = "Sheet1" + postcode_column = 'Postcode' + fulladdress_column = "Address" + address1_column = None + address1_method = "house_number_extraction" + address_cols_to_concat = [] + missing_postcodes_method = None + landlord_year_built = "Property Age" + landlord_os_uprn = None + landlord_property_type = "Property Type" + landlord_built_form = "Built Form" + landlord_wall_construction = "Wall Construction" + landlord_heating_system = "Full Heating System" + landlord_existing_pv = None + landlord_property_id = "UPRN" + outcomes_filename = [ + os.path.join(data_folder, "Outcomes - Plus Dane_CWI_2024.xlsx"), + os.path.join(data_folder, "Outcomes - Plus Dane_CWI_2025.xlsx"), + os.path.join(data_folder, "Outcomes - Plus Dane_PV_2025.xlsx"), + ] + outcomes_sheetname = [ + "CWI & LI - 2024", "2025 - CWI", "PV - 2025", + ] + outcomes_postcode = ["Postcode", "Postcode", "Postcode"] + outcomes_houseno = ["No.", "No", "No"] + outcomes_address = ["Address", "Address", "Address"] + outcomes_id = ["Asset Reference", "LL UPRN", "LL UPRN"] + master_filepaths = [ + os.path.join(data_folder, "submissions/JJC-Table 1.csv"), + os.path.join(data_folder, "submissions/SCIS-Table 1.csv") + ] + master_to_asset_list_filepath = None + asset_list_header = 1 + landlord_block_reference = None + master_id_colnames = [None, None] + landlord_roof_construction = None + phase = False + landlord_sap = "SAP Rating" + ecosurv_landlords = "plus dane" + + # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Brentwood/July 2025 New Programme" + # data_filename = "20250710 Asset List Brentwood.xlsx" + # sheet_name = "Sheet1" + # postcode_column = 'Postcode' + # fulladdress_column = None + # address1_column = "House Number" + # address1_method = None + # address_cols_to_concat = ["House Number", "Address Line 1", "Address Line 2", "Address Line 3"] + # missing_postcodes_method = None + # landlord_year_built = "Year Built" + # landlord_os_uprn = None + # landlord_property_type = "Dwelling" + # landlord_built_form = None + # landlord_wall_construction = None + # landlord_heating_system = "Heating" + # landlord_existing_pv = None + # landlord_property_id = "UPRN" + # outcomes_filename = [os.path.join(data_folder, "Brentwood - outcomes for analysis.xlsx")] + # outcomes_sheetname = ["OUTCOMES"] + # outcomes_postcode = ["POSTCODE"] + # outcomes_houseno = [None] + # outcomes_address = ["ADDRESS"] + # outcomes_id = [None] + # master_filepaths = [os.path.join(data_folder, "Submissions.csv")] + # master_to_asset_list_filepath = None + # asset_list_header = 1 + # landlord_block_reference = None + # master_id_colnames = [None] + # landlord_roof_construction = None + # phase = False + # landlord_sap = None + # ecosurv_landlords = "brentwood" + + # Brentwood + # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Brentwood/July 2025 New Programme" + # data_filename = "20250710 Asset List Brentwood.xlsx" + # sheet_name = "Sheet1" + # postcode_column = 'Postcode' + # fulladdress_column = None + # address1_column = "House Number" + # address1_method = None + # address_cols_to_concat = ["House Number", "Address Line 1", "Address Line 2", "Address Line 3"] + # missing_postcodes_method = None + # landlord_year_built = "Year Built" + # landlord_os_uprn = None + # landlord_property_type = "Dwelling" + # landlord_built_form = None + # landlord_wall_construction = None + # landlord_heating_system = "Heating" + # landlord_existing_pv = None + # landlord_property_id = "UPRN" + # outcomes_filename = [os.path.join(data_folder, "Brentwood - outcomes for analysis.xlsx")] + # outcomes_sheetname = ["OUTCOMES"] + # outcomes_postcode = ["POSTCODE"] + # outcomes_houseno = [None] + # outcomes_address = ["ADDRESS"] + # outcomes_id = [None] + # master_filepaths = [os.path.join(data_folder, "Submissions.csv")] + # master_to_asset_list_filepath = None + # asset_list_header = 1 + # landlord_block_reference = None + # master_id_colnames = [None] + # landlord_roof_construction = None + # phase = False + # landlord_sap = None + # ecosurv_landlords = "brentwood" + # + # # Eastlight + # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Eastlight/New Programme" + # data_filename = "INSPECTIONS MASTER Non Tech.xlsx" + # sheet_name = "EASTLIGHT CW" + # postcode_column = 'Postcode' + # fulladdress_column = None + # address1_column = "HouseName" + # address1_method = None + # address_cols_to_concat = ["HouseName", "Block", "Address1", "Address2", "Address3"] + # missing_postcodes_method = None + # landlord_year_built = "Built In Year" + # landlord_os_uprn = None + # landlord_property_type = "AssetType" + # landlord_built_form = "Archetype" # Using inspections archetype + # landlord_wall_construction = None + # landlord_roof_construction = None + # landlord_heating_system = "Main Heating Source" + # landlord_existing_pv = None + # landlord_property_id = "UPRN" + # landlord_sap = "SAP Score" + # outcomes_filename = [ + # os.path.join(data_folder, "Eastlight_CWI_JJC_2025.xlsx"), + # os.path.join(data_folder, "Eastlight_CWI_SCIS_2025.xlsx"), + # ] + # outcomes_sheetname = ["Outcomes", "Feedback"] + # outcomes_postcode = ["Postcode", "Postcode"] + # outcomes_houseno = ["No", "No."] + # outcomes_id = [None, None] + # outcomes_address = ["Address", "Address"] + # master_filepaths = [ + # os.path.join(data_folder, "ECO 3-Table 1.csv"), + # os.path.join(data_folder, "ECO 4-Table 1.csv"), + # ] + # master_to_asset_list_filepath = None + # phase = False + # ecosurv_landlords = "eastlight" + # asset_list_header = 0 + # landlord_block_reference = None + # master_id_colnames = [None, None] + # landlord_sap = None + + # Pickering and Ferens + # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Pickering & Ferens" + # data_filename = "SAP 9 vs SAP 10 Sava Intelligent Energy - Property List (190625).xlsx" + # sheet_name = "Sava Intelligent Energy - Prope" + # postcode_column = 'Postcode' + # fulladdress_column = 'Address' + # address1_column = None + # address1_method = "house_number_extraction" + # address_cols_to_concat = [] + # missing_postcodes_method = None + # landlord_year_built = None + # landlord_os_uprn = None + # landlord_property_type = "Property Type" # Using the inspections property type + # landlord_built_form = "Archetype 2" + # landlord_wall_construction = None + # landlord_roof_construction = None + # landlord_heating_system = None + # landlord_existing_pv = None + # landlord_property_id = "UPRN" + # landlord_sap = "SAP Rating (RdSAP 10)" + # outcomes_filename = [] + # outcomes_sheetname = [] + # outcomes_postcode = [] + # outcomes_houseno = [] + # outcomes_id = [] + # outcomes_address = [] + # master_filepaths = [ + # os.path.join(data_folder, "PICKERING & FERENS ROLLING MASTER SHEET HEDGEFUND - 26.7.24 - K.csv"), + # os.path.join(data_folder, "PICKERING & FERENS NEW MASTER GBIS UPDATED 21.8.24 - M - For Analysis.csv"), + # ] + # master_to_asset_list_filepath = None + # phase = False + # ecosurv_landlords = "pickering" + # asset_list_header = 0 + # landlord_block_reference = None + # master_id_colnames = [None, None] + + # Colchester + # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Colchester" + # data_filename = "Warmfront data- Colchester Borough Homes (Complete).xlsx" + # sheet_name = "Sheet1" + # postcode_column = 'Full Address.1' + # fulladdress_column = "Full Address" + # address1_column = None + # address1_method = "first_word" + # address_cols_to_concat = [] + # missing_postcodes_method = None + # landlord_year_built = "Build Date" + # landlord_os_uprn = None + # landlord_property_type = "Property Type" + # landlord_wall_construction = "Wallinsul" + # landlord_heating_system = "HeatSorc" + # landlord_existing_pv = None + # landlord_property_id = "Property Reference" + # outcomes_filename = [] + # outcomes_sheetname = [] + # outcomes_postcode = [] + # outcomes_houseno = [] + # outcomes_id = [] + # outcomes_address = [] + # master_filepaths = [] + # master_to_asset_list_filepath = None + # asset_list_header = 0 + # landlord_built_form = None + # landlord_roof_construction = None + # landlord_sap = None + # landlord_block_reference = None + # phase = False + # ecosurv_landlords = None + # master_id_colnames = [] + + # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing/Hubspot" + # data_filename = "EalingFlats.xlsx" + # sheet_name = "Sheet1" + # postcode_column = 'Postcode' + # fulladdress_column = "Address" + # address1_column = None + # address1_method = "house_number_extraction" + # address_cols_to_concat = [] + # missing_postcodes_method = None + # landlord_year_built = None + # landlord_os_uprn = None + # landlord_property_type = None # Using the inspections property type + # landlord_built_form = None + # landlord_wall_construction = None + # landlord_roof_construction = None + # landlord_heating_system = None + # landlord_existing_pv = None + # landlord_property_id = "Property ref" + # landlord_sap = None + # outcomes_filename = [] + # outcomes_sheetname = [] + # outcomes_postcode = [] + # outcomes_houseno = [] + # outcomes_id = [] + # outcomes_address = [] + # master_filepaths = [] + # master_to_asset_list_filepath = None + # phase = False + # ecosurv_landlords = None + # asset_list_header = 0 + # landlord_block_reference = "Block Ref" + # master_id_colnames = [] + + # Southern - Jan list + # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Southern/New Programme/Jan 2025 List" + # data_filename = "SOUTHERN ASSETS January 2025 Additions Query 21.03.2025.xlsx" + # sheet_name = "Jan 2025 additions" + # postcode_column = 'Post Code' + # fulladdress_column = None + # address1_column = "NO." + # address1_method = None + # address_cols_to_concat = ["NO.", "Street / Block Name", "Town/Area"] + # missing_postcodes_method = None + # landlord_year_built = None + # landlord_os_uprn = None + # landlord_property_type = None # Using the inspections property type + # landlord_built_form = None + # landlord_wall_construction = None + # landlord_roof_construction = None + # landlord_heating_system = None + # landlord_existing_pv = None + # landlord_property_id = "SH Property Reference" + # landlord_sap = None + # outcomes_filename = [ + # os.path.join(data_folder, "RT - Southern Housing Group - JJC.xlsx"), + # os.path.join(data_folder, "RT - SOUTHERN OUTCOMES - SCIS Merged.xlsx"), + # ] + # outcomes_sheetname = ["Feedback", "Collated"] + # outcomes_postcode = ["Poscode", "Postcode"] + # outcomes_houseno = ["No.", "No"] + # outcomes_id = ["UPRNs", None] + # outcomes_address = ["Address", "Address"] + # master_filepaths = [ + # os.path.join(data_folder, "southern_submissions/CAVITY'S - DECEMBER 2018-Table 1.csv"), + # os.path.join(data_folder, "southern_submissions/CAVITY'S 2019-Table 1.csv"), + # os.path.join(data_folder, "southern_submissions/CAVITY'S ECO4-Table 1.csv"), + # os.path.join(data_folder, "southern_submissions/LOFT'S-Table 1.csv"), + # ] + # master_to_asset_list_filepath = None + # phase = False + # ecosurv_landlords = "southern" + # asset_list_header = 0 + # landlord_block_reference = None + # master_id_colnames = [None, None, None, None] + + # NCHA + # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/NCHA" + # data_filename = "Energy Information MASTER June 2025.xlsx" + # sheet_name = "Data" + # postcode_column = 'Postcode' + # fulladdress_column = "Address" + # address1_column = None + # address1_method = "house_number_extraction" + # address_cols_to_concat = [] + # missing_postcodes_method = None + # landlord_year_built = "Build Date (HAR10)" + # landlord_os_uprn = None + # landlord_property_type = "Property Type (HAR10)" + # landlord_built_form = "Build Form (EPC)" + # landlord_wall_construction = "Wall Description" + # landlord_roof_construction = None + # landlord_heating_system = "HEAT Code" + # landlord_existing_pv = None + # landlord_property_id = "Place ref" + # landlord_sap = "EPC SAP" + # outcomes_filename = None + # outcomes_sheetname = None + # outcomes_postcode = None + # outcomes_houseno = None + # outcomes_id = None + # outcomes_address = None + # master_filepaths = [] + # master_to_asset_list_filepath = None + # phase = False + # ecosurv_landlords = None + # asset_list_header = 0 + # landlord_block_reference = None + # master_id_colnames = [] # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Calico" # data_filename = "07.04 CALICO - Final List.xlsx" @@ -593,6 +923,7 @@ def app(): # We now flag the status of the property asset_list.label_property_status() asset_list.analyse_geographies() + asset_list.get_work_figures() # Store as an excel filename = os.path.join(data_folder, ".".join(data_filename.split(".")[:-1])) + " - Standardised.xlsx" diff --git a/asset_list/hubspot/config.py b/asset_list/hubspot/config.py index 5110fb5f..23ff900a 100644 --- a/asset_list/hubspot/config.py +++ b/asset_list/hubspot/config.py @@ -17,7 +17,7 @@ class HubspotProcessStatus(IntEnum): # The property didn't get access and needs sign off SURVEYED_NO_ACCESS_NEEDS_SIGN_OFF = 2, "SURVEYED - NO ACCESS - NEED SIGN OFF" # The survey has been completed. We don't have any update as to whether the property has been installed - SURVEYED_COMPLETED_SIGNED_OFF = 3, "SURVEYED - COMPLETED - SIGNED OFF" + SURVEYED_COMPLETED_SIGNED_OFF = 3, "SURVEYED - AUTOMATED SIGNED OFF" # The property turned out to be ineligibile NOT_VIABLE = 4, "NOT VIABLE" # The property is with the installer. This will likely be the default for historic programmes @@ -79,7 +79,7 @@ CRM_UPLOAD_COLUMNS = [ 'Last EPC: Room Height ', 'Last EPC: Age Band ', 'Deal Stage ', 'Pipeline ', 'Expected Commencement Date ', - 'Deal Name ', 'Project Code ', + 'Deal Name ', 'Project Code ', 'Postcode ', 'Product ID ', 'Name ', 'Unit price ', 'Quantity ', 'Deal Owner', 'Amount ', 'Installer ' ] diff --git a/asset_list/hubspot/prepare_for_hubspot.py b/asset_list/hubspot/prepare_for_hubspot.py index eed6d7e7..b12f4c04 100644 --- a/asset_list/hubspot/prepare_for_hubspot.py +++ b/asset_list/hubspot/prepare_for_hubspot.py @@ -2,6 +2,32 @@ import os import pandas as pd from asset_list.AssetList import AssetList +import re + + +def normalize_uk_phone(number: str | float | int) -> str | None: + if pd.isna(number): + return None + + number = str(number) + number = re.sub(r"[^\d+]", "", number) + + # Handle common short inputs: add '0' if likely missing + if re.match(r"^7\d{8,9}$", number) or re.match(r"^1\d{8,9}$", number): + number = "0" + number + + # Convert to international format + if number.startswith("0"): + number = "+44" + number[1:] + elif number.startswith("0044"): + number = "+" + number[2:] + + # Must be +44 followed by 10 digits (some area codes may vary) + if re.match(r"^\+44\d{9,10}$", number): + return number + + return None + def app(): """ @@ -18,32 +44,28 @@ def app(): """ # inputs: - reconcile_programme = False # If True, the hubspot upload will include all properties with a project code - customer_domain = "https://sandwell.gov.uk" - installer_name = "J & J CRUMP" + reconcile_programme = True # If True, the hubspot upload will include all properties with a project code + customer_domain = "https://ealing.gov.uk" + installer_name = "SCIS" asset_list_filepath = ( - "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Sandwell/Hubspot/Sandwell BC - Full Asset List MAIN - " - "Standardised.xlsx" + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing/Hubspot/20250707 Ealing Flats - Prepared " + "programme.xlsx" ) - asset_list_sheet_name = "Proposed Program" - asset_list_header = 1 + asset_list_sheet_name = "Standardised Asset List" + asset_list_header = 0 - contact_details_filepath = ( - "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Sandwell/Hubspot/Sandwell Contact Details.xlsx" - ) - contacts_sheet_name = "Sheet1" - contacts_landlord_property_id = "landlord_property_id" + contact_details_filepath = None + contacts_sheet_name = "Sheet 1" + contacts_landlord_property_id = "UPRN" contacts_phone_number_column = "phone_number" contacts_secondary_phone_number_column = "secondary_phone_number" contacts_secondary_contact_full_name = "secondary_contact_full_name" contacts_email_column = "email" contacts_fullname_column = "fullname" - contacts_firstname_column = "firstname" - contacts_lastname_column = "lastname" + contacts_firstname_column = "First Name" + contacts_lastname_column = "Last Name" - existing_programme_filepath = ( - "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Sandwell/Hubspot/property-status.csv" - ) + existing_programme_filepath = None asset_list = AssetList.load_standardised_asset_list( asset_list_filepath, asset_list_sheet_name, asset_list_header @@ -68,12 +90,12 @@ def app(): ) # Remove the existing programme - existing_programme = pd.read_csv(existing_programme_filepath, encoding="utf-8-sig") - asset_list.hubspot_data = asset_list.hubspot_data[ - ~asset_list.hubspot_data["Domna Property ID "].isin( - existing_programme['Domna Property ID'].values - ) - ] + # existing_programme = pd.read_csv(existing_programme_filepath, encoding="utf-8-sig") + # asset_list.hubspot_data = asset_list.hubspot_data[ + # ~asset_list.hubspot_data["Domna Property ID "].isin( + # existing_programme['Domna Property ID'].values + # ) + # ] # Get the filepath and the filename. Append hubspot upload to the filename. We also change the file type to csv directory, filename = os.path.split(asset_list_filepath) @@ -89,3 +111,66 @@ def app(): # Just store locally asset_list.hubspot_data.to_csv(output_filepath, index=False, encoding="utf-8-sig") + + # # TODO: Set this up separately, but we associate multiple contacts to the same deal + # contact_details = pd.read_csv( + # "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot " + # "Upload/Hubspot/contact " + # "details.csv" + # ) + # + # # contacts_phone_number_column = "phone_number" + # # contacts_secondary_phone_number_column = "secondary_phone_number" + # # contacts_secondary_contact_full_name = "secondary_contact_full_name" + # # contacts_email_column = "email" + # # contacts_fullname_column = "fullname" + # # contacts_firstname_column = "First Name" + # # contacts_lastname_column = "Last Name" + # contact_details["phone_number"] = contact_details["Mobile Phone"].copy() + # # If phone number is NaN, we will use the landline number + # contact_details["phone_number"] = contact_details["phone_number"].fillna(contact_details["Landline"]) + # contact_details["secondary_phone_number"] = contact_details["Landline"].copy() + # # If secondary phone number is the same as primary, we remove it + # import numpy as np + # contact_details["secondary_phone_number"] = np.where( + # contact_details["secondary_phone_number"] == contact_details["phone_number"], + # np.nan, + # contact_details["secondary_phone_number"] + # ) + # contact_details = contact_details[ + # ['Property Reference Number (Main Address) (Property)', "Email Address", "phone_number", + # "secondary_phone_number", "First Name", "Last Name"]].copy().rename( + # columns={"Property Reference Number (Main Address) (Property)": "landlord_proprty_id"} + # ) + # contact_details["fullname"] = contact_details["First Name"] + " " + contact_details["Last Name"] + # # Format the phone numbers + # + # contact_details["phone_number"] = contact_details["phone_number"].astype(int).astype(str).apply( + # normalize_uk_phone) + # contact_details["secondary_phone_number"] = contact_details["secondary_phone_number"].astype("Int64").astype( + # str).apply( + # normalize_uk_phone) + # + # # Add in the Hubspot deal data + # hubspot_data = pd.read_csv( + # "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot Upload/Hubspot/" + # "property-status.csv", + # encoding="utf-8-sig" + # ) + # # Merge on contact details + # contact_details = hubspot_data[["Landlord Property ID", "Deal ID"]].merge( + # contact_details, + # how="left", + # right_on="landlord_proprty_id", + # left_on="Landlord Property ID" + # ) + # + # contact_details = contact_details.drop(columns=["landlord_proprty_id"]) + # + # # Store as csv + # contact_details.to_csv( + # "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar " + # "Programme Hubspot Upload/Hubspot/" + # "contact_details.csv", + # index=False, encoding="utf-8-sig" + # ) diff --git a/asset_list/mappings/built_form.py b/asset_list/mappings/built_form.py index 45e45c54..c17e0ed4 100644 --- a/asset_list/mappings/built_form.py +++ b/asset_list/mappings/built_form.py @@ -3,7 +3,7 @@ import numpy as np STANDARD_BUILT_FORMS = { "unknown", # Houses - "end-terrace", "semi-detached", "detached", "mid-terrace", + "end-terrace", "semi-detached", "detached", "mid-terrace", "enclosed mid-terrace", "enclosed end-terrace", # Flats "ground floor", "mid-floor", "top-floor", "basement", "low rise", "high rise", } @@ -358,6 +358,19 @@ BUILT_FORM_MAPPINGS = { '1983- 90 SEMI DET': 'semi-detached', '1983-90 MID TERR': 'mid-terrace', '1976-82 SEMI DET': 'semi-detached', - 'PRE 1900 MID TERR': 'mid-terrace' + 'PRE 1900 MID TERR': 'mid-terrace', + None: 'unknown', + 'SEMI-DETACHED': 'semi-detached', + 'DETACHED': 'detached', + 'MID TERRACE': 'mid-terrace', + 'END TERRACE': 'end-terrace', + 'ENCLOSED MID': 'enclosed mid-terrace', + + 'BUILDING': 'unknown', + 'FLAT COMMUNAL FACILITIES': 'unknown', + 'MAISONETTE': 'unknown', + 'HOUSE': 'unknown', + 'FLAT': 'unknown', + 'BLOCK': 'unknown' } diff --git a/asset_list/mappings/heating_systems.py b/asset_list/mappings/heating_systems.py index 1a46c429..010d49a5 100644 --- a/asset_list/mappings/heating_systems.py +++ b/asset_list/mappings/heating_systems.py @@ -28,6 +28,7 @@ STANDARD_HEATING_SYSTEMS = { "electric underfloor", "no heating", "non-electric underfloor", + "warm air heating", } HEATING_MAPPINGS = { @@ -326,5 +327,42 @@ HEATING_MAPPINGS = { 'ECO TEC PRO 24 (OLD TYPE)': 'gas combi boiler', 'GREENSTAR 30SI COMPACT': 'gas combi boiler', 'BAXI DUO TEC 28 COMBI ErP': 'gas combi boiler', - 'Not applicable for this asset type': 'unknown' + 'Not applicable for this asset type': 'unknown', + 'Boiler: F rated Regular Boiler': 'gas condensing boiler', + 'Warm Air Systems: Electric warm air: Electricaire system': 'warm air heating', + 'Boiler: B rated Combi': 'gas condensing combi', + 'Boiler: G rated Regular Boiler': 'gas condensing boiler', + 'Electric Storage Systems: Modern (slimline) storage heaters': 'electric storage heaters', + 'Boiler: C rated CPSU': 'gas condensing combi', + 'Boiler: D rated Regular Boiler': 'gas condensing boiler', + 'Warm Air Systems: Gas fired warm air with balanced or open flue: Ducted or stub-ducted, on-off control, ' + 'pre 1998': 'warm air heating', + 'Electric Storage Systems: Integrated storage+direct-acting heater': 'electric storage heaters', + 'Boiler: D rated Combi': 'gas condensing combi', + 'Heat Pump: (from database)': 'air source heat pump', + 'Community Heating Systems: Community CHP and boilers (RdSAP)': 'communal heating', + '': 'unknown', + + 'Solid Fuel Boiler': 'solid fuel', + 'Heating (Other)': 'other', + 'Solid Fuel Fire Only': 'solid fuel', + 'No Main Heat Source': 'no heating', + 'Electric Programmable': 'electric storage heaters', + 'Linked to Communal Boiler': 'communal heating', + 'Bio Mass Boiler': 'solid fuel', + 'Electric Non Programmable': 'electric storage heaters', + + 'Room heaters, Mains gas': 'room heaters', + 'Boiler, Solid fuel': 'solid fuel', + 'Room heaters, Electricity': 'room heaters', + 'Room heaters, Solid fuel': 'room heaters', + 'Boiler, Oil': 'oil boiler', + 'Boiler, Biomass': 'boiler - other fuel', + 'Community heating, Community (non-gas)': 'communal heating', + 'Heat pump (wet), Electricity': 'air source heat pump', + 'Community heating, Community (mains gas)': 'communal gas boiler', + 'Boiler, Electricity': 'electric boiler', + 'Boiler, LPG': 'gas boiler, radiators', + 'Boiler, Mains gas': 'gas boiler, radiators', + 'Storage heating, Electricity': 'electric storage heaters' } diff --git a/asset_list/mappings/property_type.py b/asset_list/mappings/property_type.py index bdb6580e..caca0cf0 100644 --- a/asset_list/mappings/property_type.py +++ b/asset_list/mappings/property_type.py @@ -256,7 +256,6 @@ PROPERTY_MAPPING = { 'HOUSE (3 STOREY)': 'house', 'FLAT GROUND FLOOR': 'flat', 'FLAT TOP FLOOR': 'flat', - 'SHARED HOUSE': 'house', 'MAISONETTE': 'maisonette', 'DIRECT ACCESS HOSTEL': 'other', @@ -266,5 +265,11 @@ PROPERTY_MAPPING = { 'SHOP': 'other', 'Office Block': 'other', 'BLOCK (Non-Communal)': 'block of flats', - 'Refuge': 'other' + 'Refuge': 'other', + None: 'unknown', + 'HFOP FLAT': 'flat', + 'HFOP BEDSIT': 'bedsit', + 'LINKED FLAT': 'flat', + 'LINKED BUNGALOW': 'bungalow' + } diff --git a/asset_list/mappings/roof.py b/asset_list/mappings/roof.py index 13359ded..66860bec 100644 --- a/asset_list/mappings/roof.py +++ b/asset_list/mappings/roof.py @@ -9,6 +9,7 @@ STANDARD_ROOF_CONSTRUCTIONS = { "pitched less than 100mm insulation", "another dwelling above", "flat unknown insulation", + "flat insulated", "unknown insulated", "unknown", } @@ -51,5 +52,127 @@ ROOF_CONSTRUCTION_MAPPINGS = { '100MM': 'pitched less than 100mm insulation', 'U/K': 'unknown', 'U/K - 250MM RIR FLAT CEILING': 'flat unknown insulation', - 'U/K - 200MM RIR FLAT CEILING': 'flat unknown insulation' + 'U/K - 200MM RIR FLAT CEILING': 'flat unknown insulation', + + 'AnotherDwellingAbove: Unknown, PitchedNormalLoftAccess: 100mm': 'another dwelling above', + 'PitchedNormalNoLoftAccess: 150mm': 'pitched insulated', + 'PitchedNormalLoftAccess: As Built, PitchedNormalNoLoftAccess: None': 'pitched insulated', + 'PitchedNormalLoftAccess: 150mm, PitchedNormalNoLoftAccess: Unknown': 'pitched insulated', + 'Flat: As Built, PitchedNormalLoftAccess: 200mm': 'flat unknown insulation', + 'PitchedNormalLoftAccess: 200mm': 'pitched insulated', + 'PitchedNormalNoLoftAccess: 50mm': 'unknown', + 'PitchedNormalNoLoftAccess: No Insulation': 'pitched less than 100mm insulation', + 'PitchedNormalLoftAccess: 50mm, PitchedNormalNoLoftAccess: None': 'pitched less than 100mm insulation', + 'PitchedNormalLoftAccess: 50mm, PitchedNormalLoftAccess: No Insulation': 'pitched less than 100mm insulation', + 'PitchedNormalLoftAccess: 150mm': 'unknown', 'Flat: None': 'pitched insulated', + 'Flat: As Built, PitchedNormalLoftAccess: 150mm, PitchedNormalNoLoftAccess: None': 'flat unknown insulation', + 'PitchedNormalNoLoftAccess: 250mm, PitchedNormalNoLoftAccess: Unknown': 'pitched insulated', + 'PitchedNormalLoftAccess: 75mm': 'pitched less than 100mm insulation', + 'Flat: Unknown, PitchedNormalLoftAccess: 200mm, SameDwellingAbove: Unknown': 'flat unknown insulation', + 'Flat: As Built, PitchedNormalLoftAccess: 100mm, PitchedNormalNoLoftAccess: None': 'flat unknown insulation', + 'PitchedNormalNoLoftAccess: 250mm': 'pitched insulated', + 'PitchedNormalLoftAccess: 150mm, PitchedNormalNoLoftAccess: 100mm': 'pitched insulated', + 'AnotherDwellingAbove: Unknown, PitchedNormalLoftAccess: 300mm': 'another dwelling above', + 'PitchedNormalLoftAccess: 200mm, PitchedNormalNoLoftAccess: 50mm': 'pitched insulated', + 'Flat: As Built, PitchedNormalNoLoftAccess: 100mm': 'flat unknown insulation', + 'PitchedNormalLoftAccess: 250mm, PitchedNormalNoLoftAccess: Unknown': 'pitched insulated', + 'PitchedNormalLoftAccess: 100mm, PitchedNormalLoftAccess: 150mm': 'pitched less than 100mm insulation', + 'PitchedNormalLoftAccess: 100mm, PitchedNormalLoftAccess: 200mm': 'pitched less than 100mm insulation', + 'PitchedNormalNoLoftAccess: 75mm': 'pitched less than 100mm insulation', + 'Flat: As Built, PitchedNormalLoftAccess: 25mm': 'flat unknown insulation', + 'PitchedNormalLoftAccess: 150mm, SameDwellingAbove': 'pitched insulated', + 'PitchedNormalLoftAccess: 150mm, PitchedNormalLoftAccess: 50mm': 'pitched insulated', + 'Flat: As Built, PitchedNormalLoftAccess: 100mm': 'flat unknown insulation', + 'Flat: As Built, PitchedNormalNoLoftAccess: None': 'flat unknown insulation', + 'PitchedNormalLoftAccess: 150mm, PitchedNormalLoftAccess: 200mm': 'pitched insulated', + 'PitchedNormalNoLoftAccess: 300mm': 'pitched insulated', + 'Flat: As Built, PitchedNormalNoLoftAccess: 150mm': 'flat unknown insulation', + 'PitchedNormalLoftAccess: 150mm, PitchedNormalNoLoftAccess: None': 'pitched insulated', + 'PitchedNormalNoLoftAccess: 200mm': 'pitched insulated', + 'PitchedNormalLoftAccess: 300mm, PitchedNormalNoLoftAccess: Unknown': 'pitched insulated', + 'PitchedNormalLoftAccess: None': 'pitched less than 100mm insulation', + 'Flat: As Built': 'flat unknown insulation', + 'PitchedNormalLoftAccess: 100mm, PitchedNormalLoftAccess: 250mm': 'pitched less than 100mm insulation', + 'AnotherDwellingAbove: Unknown, PitchedNormalLoftAccess: 50mm': 'another dwelling above', + 'PitchedNormalLoftAccess: 200mm, PitchedNormalLoftAccess: 250mm, PitchedNormalNoLoftAccess: None': 'pitched ' + 'insulated', + 'PitchedNormalLoftAccess: 200mm, PitchedNormalLoftAccess: 250mm': 'pitched insulated', + 'Flat: 50mm': 'flat unknown insulation', + 'AnotherDwellingAbove: Unknown, PitchedNormalNoLoftAccess: None': 'another dwelling above', + 'PitchedNormalNoLoftAccess: None': 'pitched uninsulated', + 'AnotherDwellingAbove: Unknown, PitchedNormalLoftAccess: 25mm': 'another dwelling above', + 'AnotherDwellingAbove: Unknown, Flat: As Built, PitchedNormalNoLoftAccess: Unknown': 'another dwelling above', + 'Flat: As Built, PitchedNormalLoftAccess: 200mm, PitchedNormalNoLoftAccess: Unknown': 'flat unknown insulation', + 'Flat: Unknown, PitchedNormalLoftAccess: 75mm, PitchedNormalLoftAccess: Unknown': 'flat unknown insulation', + 'PitchedNormalLoftAccess: 100mm, PitchedNormalLoftAccess: Unknown': 'pitched less than 100mm insulation', + 'PitchedNormalLoftAccess: 200mm, PitchedNormalLoftAccess: 300mm': 'pitched insulated', + 'PitchedNormalLoftAccess: 100mm, PitchedNormalNoLoftAccess: 100mm': 'pitched less than 100mm insulation', + 'PitchedNormalLoftAccess: 100mm, PitchedNormalLoftAccess: No Insulation': 'pitched less than 100mm insulation', + 'AnotherDwellingAbove: Unknown, Flat: As Built, PitchedNormalLoftAccess: 150mm': 'another dwelling above', + 'PitchedNormalLoftAccess: 75mm, PitchedNormalNoLoftAccess: Unknown': 'pitched less than 100mm insulation', + 'Flat: As Built, PitchedNormalLoftAccess: 300mm': 'unknown', 'Flat: 100mm': 'flat unknown insulation', + 'PitchedNormalNoLoftAccess: 150mm, PitchedNormalNoLoftAccess: Unknown': 'pitched insulated', + 'PitchedNormalNoLoftAccess: 100mm': 'pitched less than 100mm insulation', + 'PitchedNormalLoftAccess: 12mm': 'pitched less than 100mm insulation', + 'AnotherDwellingAbove: Unknown, PitchedNormalNoLoftAccess: 150mm': 'another dwelling above', + 'PitchedNormalLoftAccess: No Insulation': 'pitched less than 100mm insulation', + + 'PitchedNormalLoftAccess: 25mm, PitchedNormalNoLoftAccess: Unknown': 'pitched less than 100mm insulation', + + 'PitchedNormalLoftAccess: 250mm, PitchedNormalNoLoftAccess: None, PitchedNormalNoLoftAccess: Unknown': 'pitched ' + 'insulated', + 'PitchedNormalNoLoftAccess: 100mm, PitchedNormalNoLoftAccess: Unknown': 'pitched less than 100mm insulation', + 'Flat: As Built, PitchedNormalNoLoftAccess: 100mm, PitchedNormalNoLoftAccess: Unknown': 'flat unknown insulation', + + 'PitchedNormalNoLoftAccess: Unknown, SameDwellingAbove: Unknown': 'pitched no access to loft', + 'PitchedNormalLoftAccess: 200mm, PitchedNormalNoLoftAccess: None': 'pitched insulated', + 'PitchedNormalLoftAccess: 100mm, PitchedNormalNoLoftAccess: As Built': 'pitched less than 100mm insulation', + 'PitchedNormalNoLoftAccess: 50mm, PitchedNormalNoLoftAccess: Unknown': 'pitched less than 100mm insulation', + 'Flat: As Built, Flat: Unknown, PitchedNormalLoftAccess: 250mm': 'flat unknown insulation', + 'PitchedNormalLoftAccess: 50mm': 'pitched less than 100mm insulation', + 'PitchedNormalLoftAccess: 250mm, PitchedNormalNoLoftAccess: None': 'pitched insulated', + 'Flat: 100mm, Flat: As Built': 'flat unknown insulation', + 'PitchedNormalLoftAccess: 100mm, PitchedNormalNoLoftAccess: Unknown': 'pitched less than 100mm insulation', + 'AnotherDwellingAbove: Unknown, PitchedNormalLoftAccess: No Insulation': 'another dwelling above', + 'PitchedNormalLoftAccess: 100mm, PitchedNormalNoLoftAccess: None': 'pitched less than 100mm insulation', + 'PitchedNormalLoftAccess: 300mm': 'pitched insulated', + 'PitchedNormalLoftAccess: 100mm': 'pitched less than 100mm insulation', + 'PitchedNormalLoftAccess: 270mm': 'pitched insulated', + 'PitchedNormalNoLoftAccess: 200mm, PitchedNormalNoLoftAccess: Unknown': 'pitched insulated', + 'PitchedNormalLoftAccess: 200mm, PitchedNormalNoLoftAccess: Unknown': 'pitched insulated', + 'Flat: As Built, PitchedNormalLoftAccess: 250mm': 'flat unknown insulation', + 'PitchedNormalLoftAccess: 50mm, PitchedNormalLoftAccess: 75mm': 'pitched less than 100mm insulation', + 'PitchedNormalLoftAccess: 300mm, PitchedNormalLoftAccess: Unknown': 'pitched insulated', + 'Flat: As Built, PitchedNormalNoLoftAccess: 250mm': 'flat unknown insulation', + 'Flat: As Built, PitchedNormalNoLoftAccess: 50mm': 'flat unknown insulation', + 'Flat: As Built, PitchedNormalLoftAccess: 75mm': 'flat unknown insulation', + 'PitchedNormalLoftAccess: 250mm, PitchedNormalNoLoftAccess: No Insulation': 'pitched insulated', + 'AnotherDwellingAbove: Unknown, PitchedNormalLoftAccess: 150mm': 'another dwelling above', + 'PitchedNormalLoftAccess: 75mm, PitchedNormalNoLoftAccess: No Insulation': 'pitched less than 100mm insulation', + 'AnotherDwellingAbove: Unknown, PitchedNormalNoLoftAccess: Unknown': 'another dwelling above', + 'Flat: As Built, PitchedNormalNoLoftAccess: 200mm': 'flat unknown insulation', + 'Flat: As Built, Flat: Unknown, PitchedNormalLoftAccess: 150mm': 'flat unknown insulation', + 'Flat: As Built, PitchedNormalLoftAccess: 150mm': 'flat unknown insulation', + 'PitchedNormalLoftAccess: 300mm, PitchedNormalNoLoftAccess: 100mm': 'pitched insulated', + 'PitchedNormalLoftAccess: 100mm, PitchedNormalLoftAccess: 75mm': 'pitched less than 100mm insulation', + 'AnotherDwellingAbove: Unknown, PitchedNormalNoLoftAccess: 50mm, PitchedNormalNoLoftAccess: No Insulation': + 'another dwelling above', + 'Flat: As Built, PitchedNormalLoftAccess: 50mm': 'flat unknown insulation', + 'PitchedNormalLoftAccess: 25mm': 'pitched less than 100mm insulation', + 'PitchedNormalLoftAccess: 50mm, PitchedNormalNoLoftAccess: Unknown': 'pitched less than 100mm insulation', + 'PitchedNormalNoLoftAccess: 150mm, PitchedNormalNoLoftAccess: Unknown, PitchedThatched: 25mm': 'pitched insulated', + 'Flat: 150mm+': 'flat insulated', + 'Flat: Unknown, PitchedNormalLoftAccess: 100mm, PitchedNormalNoLoftAccess: Unknown': 'flat unknown insulation', + 'PitchedNormalLoftAccess: 150mm, PitchedNormalLoftAccess: Unknown': 'pitched insulated', + 'PitchedNormalLoftAccess: 150mm, PitchedNormalLoftAccess: 250mm': 'pitched insulated', + 'Flat: As Built, PitchedNormalLoftAccess: 100mm, PitchedNormalNoLoftAccess: Unknown': 'flat unknown insulation', + 'PitchedNormalLoftAccess: 250mm': 'pitched insulated', + 'PitchedNormalLoftAccess: 250mm, PitchedNormalLoftAccess: 75mm': 'pitched insulated', + 'PitchedNormalLoftAccess: 250mm, PitchedNormalLoftAccess: 50mm': 'pitched insulated', + 'AnotherDwellingAbove: Unknown, PitchedNormalLoftAccess: 200mm': 'another dwelling above', + + 'PitchedNormalNoLoftAccess: Unknown': 'pitched no access to loft', + 'PitchedNormalLoftAccess: Unknown': 'pitched unknown insulation', + 'AnotherDwellingAbove: Unknown': 'another dwelling above' + } diff --git a/asset_list/mappings/walls.py b/asset_list/mappings/walls.py index 2e0a332f..245b7f88 100644 --- a/asset_list/mappings/walls.py +++ b/asset_list/mappings/walls.py @@ -7,122 +7,163 @@ STANDARD_WALL_CONSTRUCTIONS = { "uninsulated solid brick", "insulated solid brick", "solid brick unknown insulation", # Timber Frame "timber frame unknown insulation", "insulated timber frame", "uninsulated timber frame", - "system built", "granite or whinstone", "other", - "unknown", "sandstone or limestone", + # System + "system built unknown insulation", "insulated system built", "uninsulated system built", + # Granite or Whinstone + "granite or whinstone unknown insulation", "insulated granite or whinstone", "uninsulated granite or whinstone", + # Sandstone or Limestone + "sandstone or limestone unknown insulation", "insulated sandstone or limestone", + "uninsulated sandstone or limestone", + # Other + "other", "cob", "new build - average thermal transmittance", } WALL_CONSTRUCTION_MAPPINGS = { "New Build - Average Thermal Transmittance": "new build - average thermal transmittance", - 'Average thermal transmittance 0.25 W/m?K': 'unknown', + 'Average thermal transmittance 0.25 W/m?K': 'new build - average thermal transmittance', 'Cavity wall, as built, insulated (assumed)': 'filled cavity', 'Average thermal transmittance 0.31 W/m?K': 'unknown', 'Cavity wall, as built, no insulation (assumed)': 'uninsulated cavity', - 'Average thermal transmittance 0.30 W/m?K': 'unknown', 'Average thermal transmittance 0.28 W/m-¦K': 'unknown', - 'Average thermal transmittance 0.25 W/m-¦K': 'unknown', 'Average thermal transmittance 0.21 W/m-¦K': 'unknown', - 'Average thermal transmittance 0.20 W/m-¦K': 'unknown', 'Average thermal transmittance 0.29 W/m?K': 'unknown', - 'Average thermal transmittance 0.16 W/m?K': 'unknown', - 'Average thermal transmittance 0.27 W/m²K': 'unknown', - 'Average thermal transmittance 0.15 W/m-¦K': 'unknown', 'Average thermal transmittance 0.23 W/m-¦K': 'unknown', - 'Average thermal transmittance 0.18 W/m?K': 'unknown', - 'Granite or whin, with internal insulation': 'granite or whinstone', - "Granite or whinstone, as built, insulated (assumed)": "granite or whinstone", - 'Average thermal transmittance 0.22 W/m-¦K': 'unknown', 'Average thermal transmittance 0.24 W/m?K': 'unknown', - 'Average thermal transmittance 0.16 W/m-¦K': 'unknown', 'Average thermal transmittance 0.35 W/m?K': 'unknown', - 'Average thermal transmittance 0.26 W/m-¦K': 'unknown', 'Average thermal transmittance 0.62 W/m?K': 'unknown', - 'Average thermal transmittance 0.64 W/m?K': 'unknown', 'Average thermal transmittance 0.61 W/m?K': 'unknown', - 'Sandstone or limestone, as built, no insulation (assumed)': 'sandstone or limestone', - 'Average thermal transmittance 0.33 W/m?K': 'unknown', + 'Average thermal transmittance 0.30 W/m?K': 'new build - average thermal transmittance', + 'Average thermal transmittance 0.28 W/m-¦K': 'new build - average thermal transmittance', + 'Average thermal transmittance 0.25 W/m-¦K': 'new build - average thermal transmittance', + 'Average thermal transmittance 0.21 W/m-¦K': 'new build - average thermal transmittance', + 'Average thermal transmittance 0.20 W/m-¦K': 'new build - average thermal transmittance', + 'Average thermal transmittance 0.29 W/m?K': 'new build - average thermal transmittance', + 'Average thermal transmittance 0.16 W/m?K': 'new build - average thermal transmittance', + 'Average thermal transmittance 0.27 W/m²K': 'new build - average thermal transmittance', + 'Average thermal transmittance 0.15 W/m-¦K': 'new build - average thermal transmittance', + 'Average thermal transmittance 0.23 W/m-¦K': 'new build - average thermal transmittance', + 'Average thermal transmittance 0.18 W/m?K': 'new build - average thermal transmittance', + 'Granite or whin, with internal insulation': 'insulated granite or whinstone', + "Granite or whinstone, as built, insulated (assumed)": "uninsulated granite or whinstone", + 'Average thermal transmittance 0.22 W/m-¦K': 'new build - average thermal transmittance', + 'Average thermal transmittance 0.24 W/m?K': 'new build - average thermal transmittance', + 'Average thermal transmittance 0.16 W/m-¦K': 'new build - average thermal transmittance', + 'Average thermal transmittance 0.35 W/m?K': 'new build - average thermal transmittance', + 'Average thermal transmittance 0.26 W/m-¦K': 'new build - average thermal transmittance', + 'Average thermal transmittance 0.62 W/m?K': 'new build - average thermal transmittance', + 'Average thermal transmittance 0.64 W/m?K': 'new build - average thermal transmittance', + 'Average thermal transmittance 0.61 W/m?K': 'new build - average thermal transmittance', + 'Sandstone or limestone, as built, no insulation (assumed)': 'uninsulated sandstone or limestone', + 'Average thermal transmittance 0.33 W/m?K': 'new build - average thermal transmittance', 'Cavity wall,': "cavity unknown insulation", 'Cavity wall, as built, partial insulation (assumed)': 'partial insulated cavity', - 'Average thermal transmittance 0.29 W/m-¦K': 'unknown', 'Average thermal transmittance 0.32 W/m-¦K': 'unknown', - 'Average thermal transmittance 0.19 W/m-¦K': 'unknown', 'Average thermal transmittance 0.27 W/m?K': 'unknown', - 'Average thermal transmittance 0.22 W/m?K': 'unknown', 'Average thermal transmittance 0.38 W/m?K': 'unknown', - 'Average thermal transmittance 0.26 W/m?K': 'unknown', 'Average thermal transmittance 0.27 W/m-¦K': 'unknown', - 'Average thermal transmittance 0.18 W/m-¦K': 'unknown', 'Average thermal transmittance = 0.27 W/m?K': 'unknown', - 'Cavity wall, with external insulation': 'filled cavity', 'Average thermal transmittance 0.21 W/m?K': 'unknown', - 'Average thermal transmittance 0.23 W/m?K': 'unknown', 'Average thermal transmittance 0.20 W/m?K': 'unknown', - 'Average thermal transmittance 0.32 W/m?K': 'unknown', 'Average thermal transmittance 0.24 W/m-¦K': 'unknown', + 'Average thermal transmittance 0.29 W/m-¦K': 'new build - average thermal transmittance', + 'Average thermal transmittance 0.32 W/m-¦K': 'new build - average thermal transmittance', + 'Average thermal transmittance 0.19 W/m-¦K': 'new build - average thermal transmittance', + 'Average thermal transmittance 0.27 W/m?K': 'new build - average thermal transmittance', + 'Average thermal transmittance 0.22 W/m?K': 'new build - average thermal transmittance', + 'Average thermal transmittance 0.38 W/m?K': 'new build - average thermal transmittance', + 'Average thermal transmittance 0.26 W/m?K': 'new build - average thermal transmittance', + 'Average thermal transmittance 0.27 W/m-¦K': 'new build - average thermal transmittance', + 'Average thermal transmittance 0.18 W/m-¦K': 'new build - average thermal transmittance', + 'Average thermal transmittance = 0.27 W/m?K': 'new build - average thermal transmittance', + 'Cavity wall, with external insulation': 'filled cavity', + 'Average thermal transmittance 0.21 W/m?K': 'new build - average thermal transmittance', + 'Average thermal transmittance 0.23 W/m?K': 'new build - average thermal transmittance', + 'Average thermal transmittance 0.20 W/m?K': 'new build - average thermal transmittance', + 'Average thermal transmittance 0.32 W/m?K': 'new build - average thermal transmittance', + 'Average thermal transmittance 0.24 W/m-¦K': 'new build - average thermal transmittance', 'Cavity wall, with internal insulation': 'filled cavity', - 'Average thermal transmittance 0.17 W/m-¦K': 'unknown', 'Average thermal transmittance 0.28 W/m?K': 'unknown', + 'Average thermal transmittance 0.17 W/m-¦K': 'new build - average thermal transmittance', + 'Average thermal transmittance 0.28 W/m?K': 'new build - average thermal transmittance', 'new build - average thermal transmittance': 'new build - average thermal transmittance', - 'average thermal transmittance 0.25 w/m?k': 'unknown', + 'average thermal transmittance 0.25 w/m?k': 'new build - average thermal transmittance', 'cavity wall, as built, insulated (assumed)': 'filled cavity', - 'average thermal transmittance 0.31 w/m?k': 'unknown', + 'average thermal transmittance 0.31 w/m?k': 'new build - average thermal transmittance', 'cavity wall, as built, no insulation (assumed)': 'uninsulated cavity', - 'average thermal transmittance 0.30 w/m?k': 'unknown', 'average thermal transmittance 0.28 w/m-¦k': 'unknown', - 'average thermal transmittance 0.25 w/m-¦k': 'unknown', 'average thermal transmittance 0.21 w/m-¦k': 'unknown', - 'average thermal transmittance 0.20 w/m-¦k': 'unknown', 'average thermal transmittance 0.29 w/m?k': 'unknown', - 'average thermal transmittance 0.16 w/m?k': 'unknown', 'average thermal transmittance 0.27 w/m²k': 'unknown', - 'average thermal transmittance 0.15 w/m-¦k': 'unknown', 'average thermal transmittance 0.23 w/m-¦k': 'unknown', - 'average thermal transmittance 0.18 w/m?k': 'unknown', - 'granite or whin, with internal insulation': 'granite or whinstone', - 'average thermal transmittance 0.22 w/m-¦k': 'unknown', 'average thermal transmittance 0.24 w/m?k': 'unknown', - 'average thermal transmittance 0.16 w/m-¦k': 'unknown', 'average thermal transmittance 0.35 w/m?k': 'unknown', - 'average thermal transmittance 0.26 w/m-¦k': 'unknown', 'average thermal transmittance 0.62 w/m?k': 'unknown', - 'average thermal transmittance 0.64 w/m?k': 'unknown', 'average thermal transmittance 0.61 w/m?k': 'unknown', - 'sandstone or limestone, as built, no insulation (assumed)': 'sandstone or limestone', - 'average thermal transmittance 0.33 w/m?k': 'unknown', 'cavity wall,': "cavity unknown insulation", + 'average thermal transmittance 0.30 w/m?k': 'new build - average thermal transmittance', + 'average thermal transmittance 0.28 w/m-¦k': 'new build - average thermal transmittance', + 'average thermal transmittance 0.25 w/m-¦k': 'new build - average thermal transmittance', + 'average thermal transmittance 0.21 w/m-¦k': 'new build - average thermal transmittance', + 'average thermal transmittance 0.20 w/m-¦k': 'new build - average thermal transmittance', + 'average thermal transmittance 0.29 w/m?k': 'new build - average thermal transmittance', + 'average thermal transmittance 0.16 w/m?k': 'new build - average thermal transmittance', + 'average thermal transmittance 0.27 w/m²k': 'new build - average thermal transmittance', + 'average thermal transmittance 0.15 w/m-¦k': 'new build - average thermal transmittance', + 'average thermal transmittance 0.23 w/m-¦k': 'new build - average thermal transmittance', + 'average thermal transmittance 0.18 w/m?k': 'new build - average thermal transmittance', + 'granite or whin, with internal insulation': 'insulated granite or whinstone', + 'average thermal transmittance 0.22 w/m-¦k': 'new build - average thermal transmittance', + 'average thermal transmittance 0.24 w/m?k': 'new build - average thermal transmittance', + 'average thermal transmittance 0.16 w/m-¦k': 'new build - average thermal transmittance', + 'average thermal transmittance 0.35 w/m?k': 'new build - average thermal transmittance', + 'average thermal transmittance 0.26 w/m-¦k': 'new build - average thermal transmittance', + 'average thermal transmittance 0.62 w/m?k': 'new build - average thermal transmittance', + 'average thermal transmittance 0.64 w/m?k': 'new build - average thermal transmittance', + 'average thermal transmittance 0.61 w/m?k': 'new build - average thermal transmittance', + 'sandstone or limestone, as built, no insulation (assumed)': 'uninsulated sandstone or limestone', + 'average thermal transmittance 0.33 w/m?k': 'new build - average thermal transmittance', + 'cavity wall,': "cavity unknown insulation", 'cavity wall, as built, partial insulation (assumed)': 'partial insulated cavity', - 'average thermal transmittance 0.29 w/m-¦k': 'unknown', 'average thermal transmittance 0.32 w/m-¦k': 'unknown', - 'average thermal transmittance 0.19 w/m-¦k': 'unknown', 'average thermal transmittance 0.27 w/m?k': 'unknown', - 'average thermal transmittance 0.22 w/m?k': 'unknown', 'average thermal transmittance 0.38 w/m?k': 'unknown', - 'average thermal transmittance 0.26 w/m?k': 'unknown', 'average thermal transmittance 0.27 w/m-¦k': 'unknown', - 'average thermal transmittance 0.18 w/m-¦k': 'unknown', 'average thermal transmittance = 0.27 w/m?k': 'unknown', - 'cavity wall, with external insulation': 'filled cavity', 'average thermal transmittance 0.21 w/m?k': 'unknown', - 'average thermal transmittance 0.23 w/m?k': 'unknown', 'average thermal transmittance 0.20 w/m?k': 'unknown', - 'average thermal transmittance 0.32 w/m?k': 'unknown', 'average thermal transmittance 0.24 w/m-¦k': 'unknown', - 'cavity wall, with internal insulation': 'filled cavity', 'average thermal transmittance 0.17 w/m-¦k': 'unknown', - 'average thermal transmittance 0.28 w/m?k': 'unknown', + 'average thermal transmittance 0.29 w/m-¦k': 'new build - average thermal transmittance', + 'average thermal transmittance 0.32 w/m-¦k': 'new build - average thermal transmittance', + 'average thermal transmittance 0.19 w/m-¦k': 'new build - average thermal transmittance', + 'average thermal transmittance 0.27 w/m?k': 'new build - average thermal transmittance', + 'average thermal transmittance 0.22 w/m?k': 'new build - average thermal transmittance', + 'average thermal transmittance 0.38 w/m?k': 'new build - average thermal transmittance', + 'average thermal transmittance 0.26 w/m?k': 'new build - average thermal transmittance', + 'average thermal transmittance 0.27 w/m-¦k': 'new build - average thermal transmittance', + 'average thermal transmittance 0.18 w/m-¦k': 'new build - average thermal transmittance', + 'average thermal transmittance = 0.27 w/m?k': 'new build - average thermal transmittance', + 'cavity wall, with external insulation': 'filled cavity', + 'average thermal transmittance 0.21 w/m?k': 'new build - average thermal transmittance', + 'average thermal transmittance 0.23 w/m?k': 'new build - average thermal transmittance', + 'average thermal transmittance 0.20 w/m?k': 'new build - average thermal transmittance', + 'average thermal transmittance 0.32 w/m?k': 'new build - average thermal transmittance', + 'average thermal transmittance 0.24 w/m-¦k': 'new build - average thermal transmittance', + 'cavity wall, with internal insulation': 'filled cavity', + 'average thermal transmittance 0.17 w/m-¦k': 'new build - average thermal transmittance', + 'average thermal transmittance 0.28 w/m?k': 'new build - average thermal transmittance', 'Cavity wall, filled cavity': 'filled cavity', 'Cavity wall, filled cavity and external insulation': 'filled cavity', - 'Granite or whinstone, as built, no insulation (assumed)': 'granite or whinstone', + 'Granite or whinstone, as built, no insulation (assumed)': 'uninsulated granite or whinstone', 'Solid brick, as built, insulated (assumed)': 'insulated solid brick', 'Solid brick, as built, no insulation (assumed)': 'uninsulated solid brick', 'Solid brick, with external insulation': 'insulated solid brick', 'Solid brick, with internal insulation': 'insulated solid brick', - 'System built, as built, insulated (assumed)': 'system built', - 'System built, as built, no insulation (assumed)': 'system built', - 'System built, with external insulation': 'system built', - 'System built, with internal insulation': 'system built', - 'Timber frame, as built, insulated (assumed)': 'timber frame', - 'Timber frame, as built, no insulation (assumed)': 'timber frame', - 'Timber frame, as built, partial insulation (assumed)': 'timber frame', - 'Timber frame, with additional insulation': 'timber frame', + 'System built, as built, insulated (assumed)': 'insulated system built', + 'System built, as built, no insulation (assumed)': 'uninsulated system built', + 'System built, with external insulation': 'insulated system built', + 'System built, with internal insulation': 'insulated system built', + 'Timber frame, as built, insulated (assumed)': 'insulated timber frame', + 'Timber frame, as built, no insulation (assumed)': 'uninsulated timber frame', + 'Timber frame, as built, partial insulation (assumed)': 'insulated timber frame', + 'Timber frame, with additional insulation': 'insulated timber frame', 'CAVITY': 'cavity unknown insulation', 'COMB': 'unknown', 'NONE': 'unknown', 'NOTKNOWN': 'unknown', 'SOLID': 'solid brick unknown insulation', np.nan: 'unknown', - 'RENDER/TIMBER FRAME': 'timber frame', - 'SYSTEM BUILT': 'system built', + 'RENDER/TIMBER FRAME': 'timber frame unknown insulation', + 'SYSTEM BUILT': 'system built unknown insulation', 'PCC PANELS': 'other', 'NOT APPLICABLE - FLAT': 'unknown', - 'BRICK/TIMBER FRAME': 'timber frame', + 'BRICK/TIMBER FRAME': 'timber frame unknown insulation', 'BRICK/BLOCK CAVITY': 'cavity unknown insulation', - 'STONE SOLID': 'sandstone or limestone', - 'EXT CLADDING SYSTEM': 'system built', + 'STONE SOLID': 'sandstone or limestone unknown insulation', + 'EXT CLADDING SYSTEM': 'system built unknown insulation', 'BRICK/BLOCK SOLID': 'solid brick unknown insulation', - 'Cavity Filled cavity (with internal/external)': 'filled cavity', 'ND (inferred) Filled cavity': 'filled cavity', 'Cavity Filled cavity': 'filled cavity', 'Cavity Unknown insulation': 'cavity unknown insulation', - 'Timber frame As-built': 'timber frame', - 'System build Unknown insulation': 'system built', + 'Timber frame As-built': 'uninsulated timber frame', + 'System build Unknown insulation': 'system built unknown insulation', 'Cavity As-built': 'uninsulated cavity', - 'System build External': 'system built', + 'System build External': 'insulated system built', 'ND (inferred) ND (inferred)': 'unknown', 'Solid brick External': 'insulated solid brick', 'Cavity External': 'filled cavity', - 'System build As-built': 'system built', + 'System build As-built': 'uninsulated system built', 'Solid brick Internal': 'insulated solid brick', 'Cavity Internal': 'filled cavity', - 'System build Internal': 'system built', - 'Solid brick As-built': 'solid brick unknown insulation', - + 'System build Internal': 'insulated system built', + 'Solid brick As-built': 'uninsulated solid brick', 'Cavity ': 'cavity unknown insulation', 'Solid brick ': 'solid brick unknown insulation', 'Timber frame Timber frame (good insulation)': 'insulated timber frame', @@ -141,91 +182,90 @@ WALL_CONSTRUCTION_MAPPINGS = { 'Cavity: Unknown': 'cavity unknown insulation', 'Cavity: AsBuilt (Post 1995)': 'filled cavity', 'Cavity: AsBuilt (1976-1982)': 'cavity unknown insulation', - 'SystemBuilt: AsBuilt': 'system built', - 'TimberFrame: AsBuilt': "timber frame unknown insulation", - 'Cavity: AsBuilt (1983-1995)': 'cavity unknown insulation', + 'SystemBuilt: AsBuilt': 'uninsulated system built', + 'TimberFrame: AsBuilt': "uninsulated timber frame", + 'Cavity: AsBuilt (1983-1995)': 'filled cavity', 'Cavity: AsBuilt (1983-1995), Cavity: FilledCavity': 'filled cavity', - 'SolidBrick: AsBuilt': 'solid brick unknown insulation', + 'SolidBrick: AsBuilt': 'uninsulated solid brick', 'Cavity: FilledCavity': 'filled cavity', 'SolidBrick: Internal': 'insulated solid brick', 'Cavity: External': 'filled cavity', - 'Sandstone: Internal': 'sandstone or limestone', - 'Cavity: AsBuilt (Pre 1976)': 'cavity unknown insulation', - 'System build': 'system built', + 'Sandstone: Internal': 'insulated sandstone or limestone', + 'Cavity: AsBuilt (Pre 1976)': 'uninsulated cavity', + 'System build': 'system built unknown insulation', 'Solid brick': 'solid brick unknown insulation', - 'Stone': 'sandstone or limestone', + 'Stone': 'sandstone or limestone unknown insulation', 'Timber frame': 'timber frame unknown insulation', '2017 onwards': 'new build - average thermal transmittance', 'ND (inferred)': 'unknown', - 'Flat / maisonette': 'other', - 'Other': 'other', + 'Flat / maisonette': 'unknown', + 'Other': 'unknown', 'Timber Frame': 'timber frame unknown insulation', 'Cavity Wall': 'cavity unknown insulation', - 'Non-Traditional': 'system built', - 'PRC': 'system built', - 'Cross Wall': 'system built', + 'Non-Traditional': 'system built unknown insulation', + 'PRC': 'system built unknown insulation', + 'Cross Wall': 'system built unknown insulation', 'Solid Wall': 'solid brick unknown insulation', 'Traditional': 'unknown', 'Solid': 'solid brick unknown insulation', - 'Wates no fines': 'system built', - 'Concrete Frame': 'system built', - 'PRCWATES': 'system built', - 'Refurbished Cornish': 'system built', + 'Wates no fines': 'system built unknown insulation', + 'Concrete Frame': 'system built unknown insulation', + 'PRCWATES': 'system built unknown insulation', + 'Refurbished Cornish': 'system built unknown insulation', 'Bailey Stratton': 'other', - 'Refurbished Reema': 'system built', - 'PRCREEMA': 'system built', - 'Trustsell Type': 'system built', + 'Refurbished Reema': 'system built unknown insulation', + 'PRCREEMA': 'system built unknown insulation', + 'Trustsell Type': 'system built unknown insulation', 'Petra Nissan': 'unknown', - 'Reinstated Airey': 'system built', - 'Refurbished Airey': 'system built', + 'Reinstated Airey': 'system built unknown insulation', + 'Refurbished Airey': 'system built unknown insulation', # From Abri- slightly unclear on types but not a large portion of the data - 'No Fines Type': 'system built', - 'Refurbished Unity': 'system built', + 'No Fines Type': 'system built unknown insulation', + 'Refurbished Unity': 'system built unknown insulation', 'Timber Framed': 'timber frame unknown insulation', - 'Refurbished Woolaway': 'system built', + 'Refurbished Woolaway': 'system built unknown insulation', 'Modern Methods of Construction': 'other', - 'BISF - Brit Iron & Steel Federation': 'system built', - 'Steel Framed': 'system built', + 'BISF - Brit Iron & Steel Federation': 'system built unknown insulation', + 'Steel Framed': 'system built unknown insulation', 'Timber Framed with confirmed Fire Stopping': 'timber frame unknown insulation', - 'Sipporex': 'system built', + 'Sipporex': 'system built unknown insulation', - 'Wates': 'system built', - 'Bryants': 'system built', - 'Gregory (Crosswall)': 'system built', - 'Rsmit': 'system built', - 'Dorman Long': 'system built', - 'Tarmac': 'system built', - 'RBIS': 'system built', - 'Five Oaks': 'system built', + 'Wates': 'system built unknown insulation', + 'Bryants': 'system built unknown insulation', + 'Gregory (Crosswall)': 'system built unknown insulation', + 'Rsmit': 'system built unknown insulation', + 'Dorman Long': 'system built unknown insulation', + 'Tarmac': 'system built unknown insulation', + 'RBIS': 'system built unknown insulation', + 'Five Oaks': 'system built unknown insulation', 'Not known': 'unknown', - 'Smiths': 'system built', - 'Kendrick': 'system built', - 'IDC': 'system built', - 'Wimpey (Part Brick)': 'system built', - 'Whitehall': 'system built', - 'Wimpey': 'system built', - 'Bison': 'system built', - 'Zinns': 'system built', - 'Bisf': 'system built', - 'Integer': 'system built', - 'Cornish': 'system built', - 'Rwate': 'system built', - 'Hill Presweld Steel': 'system built', + 'Smiths': 'system built unknown insulation', + 'Kendrick': 'system built unknown insulation', + 'IDC': 'system built unknown insulation', + 'Wimpey (Part Brick)': 'system built unknown insulation', + 'Whitehall': 'system built unknown insulation', + 'Wimpey': 'system built unknown insulation', + 'Bison': 'system built unknown insulation', + 'Zinns': 'system built unknown insulation', + 'Bisf': 'system built unknown insulation', + 'Integer': 'system built unknown insulation', + 'Cornish': 'system built unknown insulation', + 'Rwate': 'system built unknown insulation', + 'Hill Presweld Steel': 'system built unknown insulation', 'Cavity Filled Cavity': 'filled cavity', 'Cavity Unknown': 'cavity unknown insulation', 'Cavity Filled Cavity (internal)': 'filled cavity', '': 'unknown', 'Cavity Internal Insulation': 'filled cavity', 'Cavity As Built': "uninsulated cavity", - 'Non Trad Large Panel System': 'system built', - 'Non Trad Cornish': 'system built', - 'Non Trad Reema': 'system built', + 'Non Trad Large Panel System': 'system built unknown insulation', + 'Non Trad Cornish': 'system built unknown insulation', + 'Non Trad Reema': 'system built unknown insulation', 'Traditional Cavity Brickwork': 'cavity unknown insulation', - 'System build (undefined)': 'system built', - 'Non Trad Wimpey': 'system built', - 'Non Trad Wates': 'system built', - + 'System build (undefined)': 'system built unknown insulation', + 'Non Trad Wimpey': 'system built unknown insulation', + 'Non Trad Wates': 'system built unknown insulation', 'CAVITY FILLED 270MM': 'filled cavity', 'CAVITY FILLED 270MM': 'filled cavity', 'CAVITY FILLED 250MM': 'filled cavity', @@ -238,17 +278,60 @@ WALL_CONSTRUCTION_MAPPINGS = { 'CAVITY A/B 270MM': "uninsulated cavity", 'SOLID BRICK/CAVITY EXT': 'solid brick unknown insulation', 'CAVITY EWI': 'filled cavity', - 'SANDSTONE/CAVITY EXT': 'sandstone or limestone', - 'SYSTEM BUILD 100MM EWI': 'system built', + 'SANDSTONE/CAVITY EXT': 'sandstone or limestone unknown insulation', + 'SYSTEM BUILD 100MM EWI': 'insulated system built', 'CAVITY A/B 260MM': "uninsulated cavity", 'CAVITY A/B 270MM': "uninsulated cavity", 'CAVITY A/B 250MM': "uninsulated cavity", - 'System': 'system built', - 'Sandstone/Limestone': 'sandstone or limestone', - 'No Fines': 'system built', - 'Granite/Whinstone': 'granite or whinstone', + 'System': 'system built unknown insulation', + 'Sandstone/Limestone': 'sandstone or limestone unknown insulation', + 'No Fines': 'system built unknown insulation', + 'Granite/Whinstone': 'granite or whinstone unknown insulation', 'Not applicable to this asset type': 'unknown', - 'Steel Frame': 'system built', + 'Steel Frame': 'system built unknown insulation', 'Solid Wall As Built': 'uninsulated solid brick', - 'Solid As Built': 'uninsulated solid brick' + 'Solid As Built': 'uninsulated solid brick', + 'Cavity: FilledCavity, Cavity: Unknown': 'filled cavity', + 'Cavity: AsBuilt (Pre 1976), TimberFrame: Unknown': 'uninsulated cavity', + 'SolidBrick: AsBuilt, SolidBrick: Unknown': 'uninsulated solid brick', + 'Cavity: FilledCavity, SolidBrick: Unknown': 'filled cavity', + 'Cavity: AsBuilt (Pre 1976), SolidBrick: Unknown': 'uninsulated cavity', + 'Cavity: FilledCavity, TimberFrame: Unknown': 'filled cavity', + 'Cavity: AsBuilt (1976-1982), Cavity: Unknown': 'uninsulated cavity', + 'Cavity: Unknown, SolidBrick: AsBuilt': 'cavity unknown insulation', + 'Cavity: AsBuilt (1976-1982), Cavity: FilledCavity': 'filled cavity', + 'Cavity: External, Cavity: FilledCavity': 'filled cavity', + 'Cavity: AsBuilt (Post 1995), TimberFrame: AsBuilt': 'filled cavity', + 'TimberFrame: AsBuilt, TimberFrame: Internal': 'timber frame unknown insulation', + 'GraniteOrWhinstone: AsBuilt': 'uninsulated granite or whinstone', + 'Cavity: AsBuilt (Post 1995), Cavity: FilledCavity, SolidBrick: Internal': 'filled cavity', + 'Cavity: AsBuilt (Pre 1976), Cavity: FilledCavity': 'filled cavity', + 'SolidBrick: AsBuilt, SolidBrick: External': 'insulated solid brick', + 'Cavity: AsBuilt (Post 1995), Cavity: FilledCavity': 'filled cavity', + 'Cavity: FilledCavity, SolidBrick: Internal': 'filled cavity', + 'Cavity: AsBuilt (Post 1995), Cavity: FilledCavity, SolidBrick: Unknown': 'filled cavity', + 'Cavity: AsBuilt (Pre 1976), SolidBrick: AsBuilt': 'uninsulated cavity', + 'Cavity: AsBuilt (1976-1982), SolidBrick: AsBuilt': 'filled cavity', + + 'Cavity: FilledCavity, SolidBrick: AsBuilt': 'filled cavity', + 'SolidBrick: External': 'insulated solid brick', + 'Cavity: FilledCavity, Cavity: Internal': 'filled cavity', + 'Cavity: External, SolidBrick: AsBuilt': 'filled cavity', + 'SolidBrick: AsBuilt, TimberFrame: AsBuilt': 'uninsulated solid brick', + 'Cavity: FilledCavity, SystemBuilt: AsBuilt': 'filled cavity', + 'Cavity: AsBuilt (1976-1982), SystemBuilt: AsBuilt': 'system built', + 'Cavity: AsBuilt (Post 1995), SolidBrick: AsBuilt': 'filled cavity', + 'Cavity: AsBuilt (1983-1995), TimberFrame: AsBuilt': 'filled cavity', + 'SystemBuilt: AsBuilt, TimberFrame: AsBuilt': 'uninsulated system built', + 'TimberFrame: Internal': 'insulated timber frame', + 'Cavity: Internal': 'filled cavity', + 'SystemBuilt: External': 'filled cavity', + 'Cavity: AsBuilt (Pre 1976), SystemBuilt: AsBuilt': 'uninsulated cavity', + 'SystemBuilt: Internal': 'insulated system built', + 'Cavity: AsBuilt (1983-1995), SolidBrick: AsBuilt': 'solid brick unknown insulation', + 'Cavity: AsBuilt (Pre 1976), TimberFrame: AsBuilt': 'timber frame unknown insulation', + 'SolidBrick: AsBuilt, SolidBrick: Internal': 'uninsulated solid brick', + 'Cavity: FilledCavity, TimberFrame: AsBuilt': 'filled cavity', + 'Cavity: FilledCavity, SolidBrick: AsBuilt, SolidBrick: Internal': 'filled cavity', + 'Cavity: Internal, SolidBrick: AsBuilt': 'filled cavity', } diff --git a/asset_list/requirements.txt b/asset_list/requirements.txt index 99943397..b68706be 100644 --- a/asset_list/requirements.txt +++ b/asset_list/requirements.txt @@ -6,7 +6,10 @@ epc-api-python==1.0.2 thefuzz boto3 openpyxl -openai +openai>=1.3.5 tiktoken msgpack -beautifulsoup4 \ No newline at end of file +beautifulsoup4 +pydantic>=1.10.7 +typing-extensions>=4.5.0 +requests>=2.28.2 diff --git a/asset_list/utils.py b/asset_list/utils.py index 1678b8e9..fe2b7d14 100644 --- a/asset_list/utils.py +++ b/asset_list/utils.py @@ -1,5 +1,5 @@ import time -import numpy as np +import random import pandas as pd from backend.SearchEpc import SearchEpc from etl.find_my_epc.RetrieveFindMyEpc import RetrieveFindMyEpc @@ -37,7 +37,9 @@ def get_data( "mid-terrace": "Mid-Terrace", "end-terrace": "End-Terrace", "semi-detached": "Semi-Detached", - "detached": "Detached" + "detached": "Detached", + "enclosed end-terrace": "End-Terrace", + "enclosed mid-terrace": "Mid-Terrace", } epc_data = [] @@ -101,7 +103,6 @@ def get_data( else: # Try splitting on space add1 = full_address.split(" ")[0].strip() - else: add1 = str(house_number) searcher = SearchEpc( @@ -172,7 +173,7 @@ def get_data( find_epc_data = {} except Exception as e: raise Exception(f"Error retrieving FindMyEPC data: {e}") - time.sleep(np.random.uniform(0.1, 1)) + time.sleep(random.sample(range(50, 100), 1)[0] / 100) epc = { row_id_name: home[row_id_name], @@ -182,6 +183,11 @@ def get_data( } epc_data.append(epc) + + if len(epc_data) % 50 == 0 and len(epc_data) > 0: + logger.info("Sleeping for 10 seconds to avoid hitting API rate limit") + time.sleep(10) + except Exception as e: errors.append(home[row_id_name]) time.sleep(5) diff --git a/backend/Property.py b/backend/Property.py index 91c1265a..22eb2fc3 100644 --- a/backend/Property.py +++ b/backend/Property.py @@ -217,6 +217,9 @@ class Property: self.eco4_eligibility = None self.whlg_eligibility = None + # Ventilation + self.has_ventilation = self.identify_ventilation() + @classmethod def extract_kwargs(cls, kwargs): """ @@ -1197,7 +1200,7 @@ class Property: self.heating_energy_source = self.heating_energy_source[0] if self.heating_energy_source == "Varied (Community Scheme)": - if self.main_fuel["fuel_type"] == "mains gas": + if self.main_fuel["fuel_type"] in ["mains gas", None]: # We assume when None as it's unknown self.heating_energy_source = "Natural Gas (Community Scheme)" else: raise Exception("Implement me") @@ -1233,6 +1236,13 @@ class Property: if "air_source_heat_pump" not in measures: return False + # If we have a house over a floor area threshold, we recommend an ASHP + if ( + self.data["property-type"] in ["House", "Bungalow"] and + self.floor_area > assumptions.ASHP_FLOOR_AREA_THRESHOLD + ): + return True + suitable_house = self.data["property-type"] == "House" and self.data["built-form"] in [ "Detached", "Semi-Detached", "End-Terrace", ] @@ -1342,3 +1352,12 @@ class Property: self.gbis_eligibiltiy = funding_calulator.gbis_eligibiltiy self.eco4_eligibility = funding_calulator.eco4_eligibility self.whlg_eligibility = funding_calulator.whlg_eligibility + + def identify_ventilation(self): + + ventilation_descriptions = [ + 'mechanical, extract only', + 'mechanical, supply and extract' + ] + + return self.data["mechanical-ventilation"] in ventilation_descriptions diff --git a/backend/app/assumptions.py b/backend/app/assumptions.py index f1090ef3..d36266d3 100644 --- a/backend/app/assumptions.py +++ b/backend/app/assumptions.py @@ -58,6 +58,19 @@ DESCRIPTIONS_TO_FUEL_TYPES = { "Room heaters, wood logs": {"fuel": "Wood Logs", "cop": 1}, "Boiler and radiators, coal": {"fuel": "Coal", "cop": 0.85}, "From main system, no cylinderstat": {"fuel": "Natural Gas", "cop": 0.85}, + "Room heaters, coal": {"fuel": "Coal", "cop": 0.85}, + "Electric underfloor heating, Electric storage heaters": {"fuel": "Electricity", "cop": 1}, + 'Room heaters, electric, Boiler and radiators, mains gas': {"fuel": "Natural Gas", "cop": 0.85}, + 'Boiler and radiators, mains gas, Boiler and radiators, mains gas': {"fuel": "Natural Gas", "cop": 0.85}, + 'Room heaters, electric, Electric storage heaters': {"fuel": "Electricity", "cop": 1}, + "Boiler and radiators, mains gas, Electric storage heaters": {"fuel": "Natural Gas", "cop": 0.85}, + "Boiler and radiators, anthracite": {"fuel": "Anthracite", "cop": 0.85}, + 'Electric immersion, off-peak, plus solar': {"fuel": "Electricity + Solar Thermal", "cop": 1}, + 'Ground source heat pump, radiators, electric': { + "fuel": "Electricity", "cop": AVERAGE_ASHP_EFFICIENCY / 100 + }, + 'Electric instantaneous at point of use, plus solar': {"fuel": "Electricity + Solar Thermal", "cop": 1}, + "Electric storage heaters, Room heaters, electric": {"fuel": "Electricity", "cop": 1}, } # These are the measure types where if there is a ventilation recommendation, we force the inclusion of it @@ -65,3 +78,6 @@ DESCRIPTIONS_TO_FUEL_TYPES = { measures_needing_ventilation = [ "internal_wall_insulation", "external_wall_insulation", "cavity_wall_insulation" ] + +# If we have a property beyond this size, we assume it's likely large enough to have an ASHP +ASHP_FLOOR_AREA_THRESHOLD = 120 # m2 diff --git a/backend/app/plan/schemas.py b/backend/app/plan/schemas.py index 48300f2a..6b8b192d 100644 --- a/backend/app/plan/schemas.py +++ b/backend/app/plan/schemas.py @@ -96,3 +96,7 @@ class PlanTriggerRequest(BaseModel): # When performing a remote assessment, if this has been set, it will allow the engine to # pull data from the find my epc website, to utilise as part of a remote assessment event_type: Optional[Literal["remote_assessment"]] = None + + # If true, before optimising the engine will select a slightly larger package, to account for the SAP 10 causing + # scores to drop by a few points + simulate_sap_10: Optional[bool] = False diff --git a/backend/engine/engine.py b/backend/engine/engine.py index 5316fd03..d631e349 100644 --- a/backend/engine/engine.py +++ b/backend/engine/engine.py @@ -30,7 +30,6 @@ import backend.app.assumptions as assumptions from backend.ml_models.api import ModelApi from backend.Property import Property -from backend.Funding import Funding from backend.apis.GoogleSolarApi import GoogleSolarApi from recommendations.optimiser.CostOptimiser import CostOptimiser @@ -507,7 +506,7 @@ async def model_engine(body: PlanTriggerRequest): ) # if we have a remote assment data type, we pull the additional data and include it - if (body.event_type == "remote_assessment") and not (epc_searcher.newest_epc["estimated"]): + if (body.event_type == "remote_assessment") and not (epc_searcher.newest_epc.get("estimated")): logger.info("Retrieving find my epc data") try: property_non_invasive_recommendations, patch = RetrieveFindMyEpc.get_from_epc( @@ -728,7 +727,8 @@ async def model_engine(body: PlanTriggerRequest): # Additionally, if we have required measures, they should also be included. Therefore # we can discount the number of points required to get to the target SAP band (or increase) # in the case of ventilation - needs_ventilation = any(x in property_measure_types for x in assumptions.measures_needing_ventilation) + needs_ventilation = any( + x in property_measure_types for x in assumptions.measures_needing_ventilation) and not p.has_ventilation input_measures = prepare_input_measures(measures_to_optimise, body.goal, needs_ventilation) @@ -772,6 +772,10 @@ async def model_engine(body: PlanTriggerRequest): epc_to_sap_lower_bound(body.goal_value) - current_sap_points ) - fixed_gain + if body.simulate_sap_10: + # We add 3 additional SAP points to the required gain to account for SAP 10 + sap_gain += 3 + if not body.optimise: if body.goal != "Increasing EPC": raise NotImplementedError("Only EPC optimisation is currently supported") @@ -826,7 +830,11 @@ async def model_engine(body: PlanTriggerRequest): ) # If wall insulation is selected, we also include mechanical ventilation as a best practice measure - if any(x in [r["type"] for r in solution] for x in assumptions.measures_needing_ventilation): + ventilation_selected = [ + r for r in solution if "+mechanical_ventilation" in r["type"] + ] + if (any(x in [r["type"] for r in solution] for x in assumptions.measures_needing_ventilation) or + len(ventilation_selected)): ventilation_rec = next( (r[0] for r in recommendations[p.id] if r[0]["type"] == "mechanical_ventilation"), None diff --git a/backend/ml_models/AnnualBillSavings.py b/backend/ml_models/AnnualBillSavings.py index b22837d8..4291b1d1 100644 --- a/backend/ml_models/AnnualBillSavings.py +++ b/backend/ml_models/AnnualBillSavings.py @@ -28,8 +28,8 @@ class AnnualBillSavings: # Latest price cap figures from Ofgem are for April 2024 # https://www.ofgem.gov.uk/energy-price-cap - ELECTRICITY_PRICE_CAP = 0.2486 - GAS_PRICE_CAP = 0.0634 + ELECTRICITY_PRICE_CAP = 0.2573 + GAS_PRICE_CAP = 0.0633 # This is the most recent export payment figure, at 9.28p/kWh # Smart export guarantee rates can be found here: # https://www.sunsave.energy/solar-panels-advice/exporting-to-the-grid/best-seg-rates @@ -39,8 +39,8 @@ class AnnualBillSavings: PRICE_FACTOR = 0.09549999999999999 # Daily standard charge, based on average across England, Scotland and Wales, and includes VAT - DAILY_STANDARD_CHARGE_GAS = 0.3165 - DAILY_STANDARD_CHARGE_ELECTRICITY = 0.6097 + DAILY_STANDARD_CHARGE_GAS = 0.2982 + DAILY_STANDARD_CHARGE_ELECTRICITY = 0.5137 # Based on https://www.nottenergy.com/advice-and-tools/project-energy-cost-comparison # For July 2024. These quotes are based on the east midlands region, so we diff --git a/etl/customers/Brentwood/compile_new_asset_list.py b/etl/customers/Brentwood/compile_new_asset_list.py new file mode 100644 index 00000000..e3ced5ab --- /dev/null +++ b/etl/customers/Brentwood/compile_new_asset_list.py @@ -0,0 +1,38 @@ +""" +Brentwood sent us a new asset list in July 2025. This script will combine the data in the new asset list with the +old, so we have a single picture +""" + +import pandas as pd + +new_asset_list = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Brentwood/July 2025 New Programme/All Assets " + "29.05.2025.xlsx", + sheet_name="Sheet1", + header=1 +) + +old_asset_list = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Brentwood/July 2025 New Programme/BRENTWOOD Asset " + "list.xlsx", + sheet_name="Asset List" +) + +# We combine based on the data we want +compiled = new_asset_list.merge( + old_asset_list[["UPRN", "Asset Type", "Year Built", "Dwelling", "Bedrooms", "Ownership", 'Asbestos Full Survey', + 'Stock Condition Survey', 'Cat', 'Heating', + 'WFT Findings', 'ECO Eligibility', 'CIGA Requested', 'CIGA Guarantee', + 'ECO Survey completed']], + how="left", + on="UPRN" +) + +compiled["WFT Findings"] = compiled["WFT Findings"].fillna("Not Inspected") + +# Store this data +compiled.to_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Brentwood/July 2025 New Programme/20250710 Asset List " + "Brentwood.xlsx", + index=False +) diff --git a/etl/customers/Colchester/July 2025 Finalised Route.py b/etl/customers/Colchester/July 2025 Finalised Route.py new file mode 100644 index 00000000..f3ecf2d9 --- /dev/null +++ b/etl/customers/Colchester/July 2025 Finalised Route.py @@ -0,0 +1,54 @@ +import pandas as pd + +comments_df = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Colchester/July Finalised " + "Project/CBH_RetroTeamList_amended_25-06-05.xlsx", +) + +cavity_route = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Colchester/July Finalised " + "Project/20250708 Colchester Borough Homes- Standardised.xlsx", + sheet_name="July 2025 Route - Cavity" +) + +solar_route = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Colchester/July Finalised " + "Project/20250708 Colchester Borough Homes- Standardised.xlsx", + sheet_name="July 2025 Route - Solar" +) + +# Merge on the comments +comments = comments_df[ + ["URPN", 'Unnamed: 6', 'SHDF Live', 'SHDF Removed', 'SHDF Reserve', '25-26 List (138 to EPC)'] +].copy() + +cavity_route = cavity_route.merge( + comments, left_on="landlord_property_id", right_on="URPN", how="left" +) +solar_route = solar_route.merge( + comments, left_on="landlord_property_id", right_on="URPN", how="left" +) + +# Get properties that are not on either route +not_on_routes = comments_df[ + ~comments_df["URPN"].isin(cavity_route["landlord_property_id"]) & + ~comments_df["URPN"].isin(solar_route["landlord_property_id"]) + ] + +# Store +not_on_routes.to_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Colchester/July Finalised " + "Project/Properties not on routes.xlsx", + index=False +) +# Save the routes +cavity_route.to_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Colchester/July Finalised " + "Project/Cavity Route.xlsx", + index=False +) +solar_route.to_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Colchester/July Finalised " + "Project/Solar Route.xlsx", + index=False +) diff --git a/etl/customers/acis/solid_wall_funding.py b/etl/customers/acis/solid_wall_funding.py new file mode 100644 index 00000000..5515b29c --- /dev/null +++ b/etl/customers/acis/solid_wall_funding.py @@ -0,0 +1,144 @@ +import os +import pandas as pd +import numpy as np +from dotenv import load_dotenv +from etl.find_my_epc.AssetListEpcData import AssetListEpcData +from backend.Funding import Funding +from backend.app.utils import sap_to_epc +from recommendations.recommendation_utils import estimate_external_wall_area + +load_dotenv(dotenv_path="backend/.env") +EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN") + +abs_matrix = pd.read_csv( + "/Users/khalimconn-kowlessar/Downloads/ECO4 Full Project Scores Matrix.csv" +) +pps_matrix = pd.read_excel( + "/Users/khalimconn-kowlessar/Downloads/ECO4 Partial Project Scores Matrix v5.xlsx", + header=1 +) +pps_matrix.columns = [c.strip() for c in pps_matrix.columns] + +asset_list = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/ACIS/Solid Wall Properties - Standardised_2.xlsx", + sheet_name="Standardised Asset List" +) + +asset_list = asset_list.rename( + columns={"domna_address_1": "address", "domna_postcode": "postcode"} +) +asset_list["address"] = asset_list["address"].astype(str) + +# Pull the find my EPC data and get the SAP points for solid wall +asset_list_epc_client = AssetListEpcData( + asset_list=asset_list, + epc_auth_token=EPC_AUTH_TOKEN +) +asset_list_epc_client.get_data() +asset_list_epc_client.get_non_invasive_recommendations() +# We pull out solid wall insulation +solid_wall_sap_points = [] +for r in asset_list_epc_client.non_invasive_recommendations: + solid_recommendations = [ + x for x in r["recommendations"] if ("internal_wall_insulation" in x["type"]) or ( + "external_wall_insulation" in x["type"] + ) + ] + if solid_recommendations: + solid_recommendations = solid_recommendations[0] + else: + continue + + address = r["address"] + postcode = r["postcode"] + + solid_wall_sap_points.append( + { + "address": address, + "postcode": postcode, + "sap_points": solid_recommendations["sap_points"] + } + ) + +solid_wall_sap_points = pd.DataFrame(solid_wall_sap_points) +avg_points = solid_wall_sap_points["sap_points"].median() + +asset_list = asset_list.merge(solid_wall_sap_points, how="left", on=["address", "postcode"]) +asset_list["sap_points"] = asset_list["sap_points"].fillna(avg_points) +asset_list["post_works_sap"] = asset_list["epc_sap_score_on_register"] + asset_list["sap_points"] +asset_list["post_works_epc"] = asset_list["post_works_sap"].apply(lambda x: sap_to_epc(x)) +asset_list["starting_half_band"] = asset_list["epc_sap_score_on_register"].apply(lambda x: Funding.get_sap_band(x)) +asset_list["ending_half_band"] = asset_list["post_works_sap"].apply(lambda x: Funding.get_sap_band(x)) +asset_list["floor_area_band"] = asset_list["epc_total_floor_area"].apply(lambda x: Funding.get_floor_area_band(x)) + +asset_list["funding_scheme"] = np.where( + ( + (asset_list["post_works_epc"] == asset_list["epc_rating_on_register"]) + ), + "GBIS", + "ECO4" +) + +# Merge on the ABS matrix +asset_list = asset_list.merge( + abs_matrix, how="left", left_on=["starting_half_band", "ending_half_band", "floor_area_band"], + right_on=['Starting Band', 'Finishing Band', 'Floor Area Segment', ] +) +asset_list = asset_list.drop(columns=['Starting Band', 'Finishing Band', 'Floor Area Segment']) + +# store for backup +# asset_list.to_csv( +# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/ACIS/Solid Wall Properties - +# Standardised_2_with_funding.csv", +# index=False +# ) + +# For GBIS, we use the PPS +# Almost all properties are gas + +# Using IWI solid 1.7 -> 0.3 rates +pps_matrix = pps_matrix[ + pps_matrix["Measure_Type"].isin(["IWI_solid_1.7_0.3"]) +] + +# Merge on +asset_list = asset_list.merge( + pps_matrix[['Starting Band', 'Total Floor Area Band', 'Cost Savings']].rename( + columns={ + "Cost Savings": "partial_project_score", + "Starting Band": "starting_half_band", + "Total Floor Area Band": "floor_area_band" + } + ), + how="left", + on=["starting_half_band", "floor_area_band"], +) +asset_list["partial_project_score"] = np.where( + asset_list["starting_half_band"].isin(["Low_C", "High_C"]), + None, + asset_list["partial_project_score"] +) + +asset_list["funding_abs"] = np.where( + asset_list["funding_scheme"] == "GBIS", + asset_list["partial_project_score"], + asset_list["Cost Savings"] +) + +asset_list["heat_loss_area"] = asset_list.apply( + lambda x: estimate_external_wall_area( + num_floors=x["attribute_est_number_floors"], + floor_height=( + float(x["epc_floor_height"]) if + not pd.isnull(x["epc_floor_height"]) else 2.5 + ), + perimeter=x["attribute_est_perimter"], + built_form=x["epc_archetype"] + ), + axis=1 +) + +filename = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/ACIS/20250624 ACIS solid wall - standardised.xlsx" + +with pd.ExcelWriter(filename) as writer: + asset_list.to_excel(writer, sheet_name="Standardised Asset List", index=False) diff --git a/etl/customers/blakeridge_mill/data.py b/etl/customers/blakeridge_mill/data.py new file mode 100644 index 00000000..c9d7f9e6 --- /dev/null +++ b/etl/customers/blakeridge_mill/data.py @@ -0,0 +1,49 @@ +# Get units for postcodes WF17 8RA, WF17 8RB +import os + +import pandas as pd +from epc_api.client import EpcClient +from dotenv import load_dotenv + +load_dotenv(dotenv_path="backend/.env") +EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN") + +postcodes = [ + "WF17 8RA", + "WF17 8RB", +] + +client = EpcClient(auth_token=EPC_AUTH_TOKEN) + +data = [] +for postcode in postcodes: + resp = client.domestic.search( + params={"postcode": postcode, "address": None, "local-authority": None, "property-type": None, + "floor-area": None, + "energy-band": None, "from-month": None, "from-year": None, "to-month": None, "to-year": None, + 'constituency': None}, + size=1000 + ) + data.extend(resp["rows"]) + +df = pd.DataFrame(data) +# Get newest field by UPRN, inspection-date +df["inspection-date"] = pd.to_datetime(df["inspection-date"]) +df = df.sort_values(by=["uprn", "inspection-date"], ascending=[True, False]) +df = df.drop_duplicates(subset=["uprn"], keep="first") + +df.to_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Blakeridge Mill/blakeridge_mill_epc_data.xlsx", index=False +) + +df = df[df["address"] != "The Tower Blakeridge Mill, Upper Blakeridge Lane"] +df["walls-description"].value_counts() +df["roof-description"].value_counts() + +df["total-floor-area"].astype(float).mean() +df["current-energy-efficiency"] = pd.to_numeric(df["current-energy-efficiency"], errors='coerce') + +df.groupby("transaction-type")["current-energy-efficiency"].mean() +df["transaction-type"].value_counts() + +df[df["transaction-type"] == "rental"]["built-form"].value_counts() diff --git a/etl/customers/bromford/solar_pv_cleanup.py b/etl/customers/bromford/solar_pv_cleanup.py new file mode 100644 index 00000000..c2c541da --- /dev/null +++ b/etl/customers/bromford/solar_pv_cleanup.py @@ -0,0 +1,289 @@ +import pandas as pd +from tqdm import tqdm +from backend.SearchEpc import SearchEpc +import numpy as np + +contact_list = pd.read_csv( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot Upload/Bromford - Solar " + "PV address list - second wave KLD - PP.csv" +) +contact_list["house_no"] = contact_list.apply(lambda x: SearchEpc.get_house_number( + address=str(x["Address 1: Street 1"]).strip(), + postcode=str(x["Postal Code"]).strip(), +), axis=1) + +asset_list = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot Upload/asset_list - " + "Standardised (1).xlsx", + sheet_name="Standardised Asset List" +) + +lookup = [] +missed = [] +for _, x in tqdm(contact_list.iterrows(), total=len(contact_list)): + + if x["Address 1: Street 1"] == '1 The Beck': + lookup.append( + { + "UPRN": x["UPRN"], + "landlord_property_id": 40692, + } + ) + continue + + if x["Address 1: Street 1"] == '3 The Beck ': + lookup.append( + { + "UPRN": x["UPRN"], + "landlord_property_id": 40693, + } + ) + continue + + if x["Address 1: Street 1"] == '2 Orchard Close ': + lookup.append( + { + "UPRN": x["UPRN"], + "landlord_property_id": 7924, + } + ) + continue + + if x["Address 1: Street 1"] == '2 Orchard Close ': + lookup.append( + { + "UPRN": x["UPRN"], + "landlord_property_id": 7924, + } + ) + continue + + if x["Address 1: Street 1"] == '3 Croxall Road': + lookup.append( + { + "UPRN": x["UPRN"], + "landlord_property_id": 40650, + } + ) + continue + + if x["Address 1: Street 1"] == '4 Ward Road ': + lookup.append( + { + "UPRN": x["UPRN"], + "landlord_property_id": 33175, + } + ) + continue + + df = asset_list[ + asset_list["domna_full_address"].str.replace(",", "").str.contains(x["Address 1: Street 1"].strip()) & + asset_list["domna_postcode"].str.contains(x["Postal Code"].strip()) + ] + + if df.shape[0] != 1: + df = asset_list[ + asset_list["domna_full_address"].str.replace(",", "") == x["Address 1: Street 1"].strip() & + asset_list["domna_postcode"].str.contains(x["Postal Code"].strip()) + ] + + if df.shape[0] != 1: + df = asset_list[ + (asset_list["domna_address_1"].astype(str) == str(x["house_no"])) & + (asset_list["domna_postcode"].str.contains(x["Postal Code"].strip()) == True) + ] + + if df.shape[0] != 1: + missed.append(x["UPRN"]) + continue + + lookup.append( + { + "UPRN": x["UPRN"], + "landlord_property_id": df["landlord_property_id"].values[0], + } + ) + +lookup = pd.DataFrame(lookup) + +contact_list = contact_list.merge(lookup, how="left", on="UPRN") +# Store +contact_list.to_csv( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot Upload/Bromford - Solar " + "PV address list - second wave KLD - PP with landlord_property_id.csv", + index=False +) + +# I manually completed the lookup for the missed ones. We now read it back in and pull in the properties for the +# stndardised asset list +contacts_complete = pd.read_csv( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot Upload/Bromford - Solar " + "PV address list - second wave KLD - PP with landlord_property_id.csv" +) + +new_data = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot Upload/Master Sheet " + "Solar PV installs.xlsx", + sheet_name="Sheet1" +) + +contact_list = contact_list.merge( + new_data, + how="left", + left_on="UPRN", + right_on="CE UPRN" +) +route = asset_list[ + asset_list["landlord_property_id"].isin(contact_list["Legacy UPRN"].astype("Int64").astype(str)) +].copy() + +# Add the new heating data +contact_list["Legacy UPRN"] = contact_list["Legacy UPRN"].astype("Int64").astype(str) +route2 = contact_list.merge( + route, + how="left", + right_on="landlord_property_id", + left_on="Legacy UPRN" +) + +# Because I did a data pull, we can fill the other bits of information +missed = contact_list[~contact_list["Legacy UPRN"].isin(route["landlord_property_id"].astype(int))] + +# Store both the route and missed +route2.to_csv( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot Upload/route.csv", + index=False +) + +# Add on phone number +contact_details_filepath = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme " + "Hubspot Upload/Hubspot/Bromford - Solar PV address list - second wave KLD - PP with " + "landlord_property_id.xlsx") + +contacts_filenames = [ + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot Upload/contact " + "details/FAO Paul Contact Details-Table 1.csv", + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot Upload/contact " + "details/Green Contact Details-Table 1.csv", + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot Upload/contact " + "details/Main Contact Details-Table 1.csv", +] + +merge_to = pd.read_excel(contact_details_filepath) + +lookup = [] +for fn in contacts_filenames: + df = pd.read_csv(fn, encoding="utf-8-sig") + # Merge on phone + details = df[ + df["Property Reference Number (Main Address) (Property)"].isin(merge_to["UPRN"].astype(str)) + ][[ + "Property Reference Number (Main Address) (Property)", "Landline", "Mobile Phone", "Email Address", + "First Name", "Last Name" + ]] + + lookup.append(details) + +lookup = pd.concat(lookup) + +# Drop entries where landline, mobile and email are all NaN +lookup = lookup.dropna(subset=["Landline", "Mobile Phone", "Email Address"], how="all") +lookup = lookup.drop_duplicates(["Landline", "Mobile Phone", "Email Address"]) +# Sort so email is first, then landline, then mobile +lookup = lookup.sort_values( + ["Property Reference Number (Main Address) (Property)", "Email Address", "Landline", "Mobile Phone"], + ascending=[True, True, True, True] +) + +# Store +lookup.to_csv( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot Upload/Hubspot/contact " + "details.csv", + index=False +) + +lookup2 = [] +for _, x in lookup.groupby("Property Reference Number (Main Address) (Property)"): + + # We any entries have an email, we take that + if x["Email Address"].notna().any(): + x = x[x["Email Address"].notna()] + # We then take the entry with a phone number + if x["Landline"].notna().any() or x["Mobile Phone"].notna().any(): + x = x[x["Landline"].notna() | x["Mobile Phone"].notna()] + + # Take the first entry + x = x.iloc[0] + lookup2.append(x) + +lookup2 = pd.DataFrame(lookup2) + +import pandas as pd + +# Sample structure based on your columns +columns = ['Property Reference Number (Main Address) (Property)', 'Landline', 'Mobile Phone', 'Email Address'] + +# Simulating example input DataFrame +# In practice, you would use: lookup = pd.read_csv(...) or similar +lookup = pd.DataFrame(columns=columns) + +# Grouping and transforming +results = [] + +for prop_id, group in lookup.groupby("Property Reference Number (Main Address) (Property)"): + # Filter rows with any contact information + filtered = group[ + group["Email Address"].notna() & + (group["Landline"].notna() | group["Mobile Phone"].notna()) + ] + + if filtered.empty: + continue + + # Sort by presence of phone numbers (prioritize those with both) + filtered["contact_score"] = ( + filtered["Landline"].notna().astype(int) + + filtered["Mobile Phone"].notna().astype(int) + ) + filtered = filtered.sort_values("contact_score", ascending=False) + + primary = filtered.iloc[0] + # Make sure secondary is not the same as primary + if not pd.isnull(primary["Mobile Phone"]): + secondary = filtered[ + (filtered["Mobile Phone"] != primary["Mobile Phone"]) + ] + elif not pd.isnull(primary["Landline"]): + secondary = filtered[ + (filtered["Landline"] != primary["Landline"]) + ] + else: + raise Exception("Look at me") + + secondary = filtered.iloc[1] if len(filtered) > 1 else None + + results.append({ + "Property ID": prop_id, + "Primary Email": primary["Email Address"], + "Primary Phone": primary["Mobile Phone"] or primary["Landline"], + "Secondary Email": secondary["Email Address"] if secondary is not None else None, + "Secondary Phone": secondary["Mobile Phone"] or secondary["Landline"] if secondary is not None else None, + }) + +final_df = pd.DataFrame(results) + +import ace_tools as tools; + +tools.display_dataframe_to_user(name="Cleaned Contact Lookup", dataframe=final_df) + +# We set up primary and secondary phone numbers. We use mobile as the primary + + +# We have duplicates, we prioritise entries, by ID, that have a email +lookup2 = lookup.sort_values("Property Reference Number (Main Address) (Property)").drop_duplicates( + "Property Reference Number (Main Address) (Property)", keep="last" +) + +# TODO: Get into the standardised asset list format +# TODO: Add the deal postcode to Hubspot +# TODO: Upload the deal postcode diff --git a/etl/customers/ealing/fixing houses asset list.py b/etl/customers/ealing/fixing houses asset list.py new file mode 100644 index 00000000..4a39428a --- /dev/null +++ b/etl/customers/ealing/fixing houses asset list.py @@ -0,0 +1,45 @@ +import pandas as pd + +houses_list = pd.read_csv( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing/Ealing BC - HOUSES(UNCHECKED).csv" +) + +features = pd.read_csv( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing/Ealing BC - HOUSES(IGNORE - FULL MAIN).csv" +) +features = features.drop( + columns=[ + 'Archetype', 'Construction', 'Insulated', 'Material', + 'CIGA Check Required', 'PV, ACCESS ISSUE, SEE NOTES', + 'OFF GAS - ROOF ORIENTATION', 'Any further surveyor notes', 'Surveyors Name', + 'Unnamed: 30', 'Unnamed: 31' + ] +) + +demolitions = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing/Ealing - Demolished or due to be.xlsx", + sheet_name="Demolished or due to be" +) + +inspections_data = houses_list[ + [ + "Property ref", "Postcode", 'Archetype', 'Construction', 'Insulated', 'Material', + 'CIGA Check Required', 'PV, ACCESS ISSUE, SEE NOTES', + 'OFF GAS - ROOF ORIENTATION', 'Any further surveyor notes', 'YET TO BE SURVEYED' + ] +].rename(columns={"YET TO BE SURVEYED": "Surveyors Name"}) + +asset_list = features.drop( + columns=[ + 'Archetype', 'Construction', 'Insulated', 'Material', 'CIGA Check Required', + 'PV, ACCESS ISSUE, SEE NOTES', 'OFF GAS - ROOF ORIENTATION', + 'Any further surveyor notes', 'Surveyors Name', "Postcode" + ] +).merge( + inspections_data, + how="inner", + on="Property ref", +) + +asset_list.to_csv("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing/Ealing_rechecked_cleaned_05042025.csv", + index=False) diff --git a/etl/customers/ealing/prepare_for_hubspot.py b/etl/customers/ealing/prepare_for_hubspot.py new file mode 100644 index 00000000..8cffda57 --- /dev/null +++ b/etl/customers/ealing/prepare_for_hubspot.py @@ -0,0 +1,75 @@ +import numpy as np +import pandas as pd +from asset_list.hubspot.config import HubspotProcessStatus + +project_data = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing/Hubspot/Ealing Flats Completion Tracker JW " + "170625.xlsx", + sheet_name="All_Flats" +) + +project_data["hubspot_status"] = None +project_data["hubspot_status"] = np.where( + (project_data["Status"] == "Submitted") & (project_data["PAS"] == "PAS2023"), + HubspotProcessStatus.SURVEYED_COMPLETED_SIGNED_OFF.label, + project_data["hubspot_status"] +) +project_data["hubspot_status"] = np.where( + (project_data["Status"] == "Submitted") & (project_data["PAS"] == "PAS2019"), + "SURVEYED UNDER 2019 - NEEDS RE-SURVEY", + project_data["hubspot_status"] +) +project_data["project_code"] = "EALING-FLATS-" + project_data["Block Ref"].astype(str) + +asset_list = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing/Hubspot/20250707 Ealing Flats - Standardised.xlsx", + sheet_name="Standardised Asset List" +) +asset_list["landlord_property_id"] = asset_list["landlord_property_id"].astype(str) +asset_list["incorrect_landlord_property_id"] = asset_list["incorrect_landlord_property_id"].astype(str) +project_data["Property ref"] = project_data["Property ref"].astype(str) + +# We need to update the status of properties that already been surveyed +asset_list2 = asset_list.merge( + project_data[["Property ref", "hubspot_status", "project_code"]], + how="left", + right_on="Property ref", + left_on="incorrect_landlord_property_id", + suffixes=("", "_project") +) +asset_list2["hubspot_status"] = np.where( + ~pd.isna(asset_list2["hubspot_status_project"]), + asset_list2["hubspot_status_project"], + asset_list2["hubspot_status"] +) +asset_list2["project_code"] = np.where( + ~pd.isna(asset_list2["project_code"]), + asset_list2["project_code"], + asset_list2["landlord_property_id"] +) + +asset_list2 = asset_list2.drop(columns=["hubspot_status_project", "project_code_project"]) +asset_list2["cavity_reason"] = np.where( + pd.isnull(asset_list2["cavity_reason"]), + "Non-Intrusive Data Shows Empty Cavity: SAP Rating 55-68", + asset_list2["cavity_reason"] +) +asset_list2["solar_reason"] = None + +# Read in block analysis and geographical areas from standardised asset list +block_analysis_df = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing/Hubspot/20250707 Ealing Flats - Standardised.xlsx", + sheet_name="Block Analysis" +) +geographical_areas = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing/Hubspot/20250707 Ealing Flats - Standardised.xlsx", + sheet_name="Geographical Areas" +) + +# Update the new standardised asset list +filename = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing/Hubspot/20250707 Ealing Flats - Prepared " + "programme.xlsx") +with pd.ExcelWriter(filename) as writer: + asset_list2.to_excel(writer, sheet_name="Standardised Asset List", index=False) + block_analysis_df.to_excel(writer, sheet_name="Block Analysis", index=False) + geographical_areas.to_excel(writer, sheet_name="Geographical Areas", index=False) diff --git a/etl/customers/mhs/new_programme.py b/etl/customers/mhs/new_programme.py new file mode 100644 index 00000000..6f1caafe --- /dev/null +++ b/etl/customers/mhs/new_programme.py @@ -0,0 +1,116 @@ +# +import pandas as pd + +asset_list = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/New programme/12052025 MHS Standardised Asset List - " + "programme.xlsx", + sheet_name="Standardised Asset List" +) + +new_cavity_programme = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/New programme/12052025 MHS Standardised Asset List - " + "programme.xlsx", + sheet_name="New Cavity Programme" +) + +new_cavity_pilot = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/New programme/12052025 MHS Standardised Asset List - " + "programme.xlsx", + sheet_name="Empty Cavity Pilot" +) + +new_solar_programme = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/New programme/12052025 MHS Standardised Asset List - " + "programme.xlsx", + sheet_name="New Solar Programme" +) + +in_fill_properties_houses = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/New programme/Domna updated programme list May 2025 (" + "1).xlsx", + sheet_name="Houses and Bungalows" +) +in_fill_properties_flats = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/New programme/Domna updated programme list May 2025 (" + "1).xlsx", + sheet_name="Flats and Maistonettes" +) +# Q1) What are these properties? Do we have them on our list already? +# All of the houses are already in the asset list +in_fill_properties_houses["is_in_asset_list"] = in_fill_properties_houses["UPRN"].isin( + asset_list["landlord_property_id"].values +) +# All of the flats are already in the asset list +in_fill_properties_flats["is_in_asset_list"] = in_fill_properties_flats["UPRN"].isin( + asset_list["landlord_property_id"].values +) + +# Q2) Which properties are excluded from the new programme? +in_fill_properties = pd.concat( + [in_fill_properties_houses, in_fill_properties_flats], ignore_index=True, sort=False +) + +# Merge on the data +in_fill_properties = in_fill_properties.merge( + asset_list, + left_on="UPRN", + right_on="landlord_property_id", + how="left" +) +# How many properties are in the new programme? + +in_fill_properties["in_new_cavity_programme"] = in_fill_properties["UPRN"].isin( + new_cavity_programme["landlord_property_id"].values +) +in_fill_properties["in_new_solar_programme"] = in_fill_properties["UPRN"].isin( + new_solar_programme["landlord_property_id"].values +) +in_fill_properties["in_new_cavity_pilot"] = in_fill_properties["UPRN"].isin( + new_cavity_pilot["landlord_property_id"].values +) +not_in_new_programme = in_fill_properties[ + (~in_fill_properties["in_new_cavity_programme"] & ~in_fill_properties["in_new_solar_programme"] & ~ + in_fill_properties["in_new_cavity_pilot"]) +].copy() + +# Why? +not_in_new_programme["cavity_reason"].value_counts() +not_in_new_programme["solar_reason"].value_counts() + +not_identified_for_anything = not_in_new_programme[ + pd.isnull(not_in_new_programme["cavity_reason"]) & + pd.isnull(not_in_new_programme["solar_reason"]) + ] + +# Flag the potential re-inspections which is 994 properties though any extractions we need to consider the HA funding +# the extraction +not_in_new_programme["funded_extractions"] = not_in_new_programme["cavity_reason"].isin( + [ + "Non-Intrusive Data Shows Cavity Extraction: SAP Rating 69-75", + "Non-Intrusive Data Shows Cavity Extraction: SAP Rating 55-68", + "Non-Intrusive Data Shows Cavity Extraction: SAP Rating 76 or more", + "Non-Intrusive Data Shows Cavity Extraction: SAP Rating 54 or less", + "EPC Shows Empty Cavity, inspections show non-cavity build: SAP Rating 76 or more", + "EPC Shows Empty Cavity, inspections show non-cavity build: SAP Rating 54 or less", + "EPC Shows Empty Cavity, inspections show retro drilled: SAP Rating 54 or less", + "EPC Shows Empty Cavity, inspections show retro drilled: SAP Rating 76 or more", + ] +) + +not_in_new_programme["excluded"] = not_identified_for_anything["landlord_property_id"].isin( + not_identified_for_anything["landlord_property_id"].values +) + +not_in_new_programme[ + not_in_new_programme["funded_extractions"] +].to_csv( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/New programme/funded_extractions.csv", + index=False +) + +not_in_new_programme[ + not_in_new_programme["excluded"] == True + ].to_csv( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/New programme/excluded_properties.csv", + index=False +) diff --git a/etl/customers/ncha/portfolio.py b/etl/customers/ncha/portfolio.py new file mode 100644 index 00000000..f47c87c8 --- /dev/null +++ b/etl/customers/ncha/portfolio.py @@ -0,0 +1,14 @@ +import pandas as pd + +cavity = pd.read_excel( + "/Users/khalimconn-kowlessar/Downloads/Energy Information MASTER June 2025 - Standardised.xlsx", + sheet_name="Cavity Properties (for review)", +) +solar = pd.read_excel( + "/Users/khalimconn-kowlessar/Downloads/Energy Information MASTER June 2025 - Standardised.xlsx", + sheet_name="Solar Properties", +) + +cavity_al = cavity[["domna_address_1", "domna_postcode", "epc_os_uprn"]].rename( + columns={"domna_address_1": "address", "domna_postcode": "postcode", "epc_os_uprn": "uprn"} +) diff --git a/etl/customers/plus dane/prepare_asset_list.py b/etl/customers/plus dane/prepare_asset_list.py new file mode 100644 index 00000000..430c7b5a --- /dev/null +++ b/etl/customers/plus dane/prepare_asset_list.py @@ -0,0 +1,48 @@ +""" +July 2025, this script prepares the asset list for Plus Dane +""" +import pandas as pd + +oldest_asset_list = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Plus Dane/New Programme July 2025/PLUS DANE Asset List.xlsx" +) +solar_asset_list = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Plus Dane/New Programme July 2025/Plus Dane - potential " + "PV List 04.03.2025.xlsx" +) +newest_asset_list = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Plus Dane/New Programme July 2025/Sava Intelligent Energy " + "- Property List - March 2025.xlsx" +) + +old_missed = oldest_asset_list[~oldest_asset_list["UPRN"].isin(newest_asset_list["UPRN"])] +solar_missed = solar_asset_list[~solar_asset_list["UPRN"].isin(newest_asset_list["UPRN"])] # Empty + +# Build new asset list +# NEWEST +# 'UPRN', 'Address', 'Postcode', 'Town', 'EPC SAP Band', 'SAP Rating', +# 'CO₂ Emissions', 'EPC EI Band', 'Data Quality Indicator', +# 'Results Calculated', 'Property Age', 'Property Type', 'Built Form', +# 'Wall Construction', 'Wall Insulation', 'Roof Construction', +# 'Joist Insulation', 'Space Heating System', 'Space Heating Fuel' +# +# SOlAR + +df = newest_asset_list.merge( + solar_asset_list, how="left", on="UPRN", suffixes=("", "_solar"), +).merge( + oldest_asset_list, how="left", on="UPRN", suffixes=("", "_old") +) +df["asset_list_versiion"] = "July 2025" +old_missed["asset_list_versiion"] = "Historic" + +# Append on the old missed? +df = pd.concat( + [df, old_missed], ignore_index=True, sort=False +) +# Store excel +df.to_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Plus Dane/New Programme July 2025/Plus Dane Asset List " + "July 2025.xlsx", + index=False, +) diff --git a/etl/customers/remote_assessments/app.py b/etl/customers/remote_assessments/app.py index a8805a71..df4a16fe 100644 --- a/etl/customers/remote_assessments/app.py +++ b/etl/customers/remote_assessments/app.py @@ -4,7 +4,7 @@ from dotenv import load_dotenv from utils.s3 import save_csv_to_s3 from etl.find_my_epc.AssetListEpcData import AssetListEpcData -PORTFOLIO_ID = 141 +PORTFOLIO_ID = 212 USER_ID = 8 load_dotenv(dotenv_path="backend/.env") @@ -17,25 +17,15 @@ def app(): :return: """ - asset_list = [ - { - "address": "196 Merrow Street", - "postcode": "SE17 2NP", - "uprn": 200003423454, - "patch": True - }, - { - "address": "65 Liverpool Grove", - "postcode": "SE17 2HP", - "uprn": 200003423194 - }, - { - "address": "2 Brettell Street", - "postcode": "SE17 2NZ", - "uprn": 200003423607 - }, - ] - asset_list = pd.DataFrame(asset_list) + asset_list = pd.read_excel( + "/Users/khalimconn-kowlessar/Downloads/Energy Information MASTER June 2025 - Standardised.xlsx", + sheet_name="Solar Properties", + ) + asset_list = asset_list[~asset_list["estimated"]] + asset_list["domna_address_1"] = asset_list["domna_address_1"].astype(str) + asset_list = asset_list[["domna_address_1", "domna_postcode", "epc_os_uprn"]].rename( + columns={"domna_address_1": "address", "domna_postcode": "postcode", "epc_os_uprn": "uprn"} + ) # Store the asset list in s3 filename = f"{USER_ID}/{PORTFOLIO_ID}/asset_list.csv" @@ -98,14 +88,15 @@ def app(): "portfolio_id": str(PORTFOLIO_ID), "housing_type": "Private", "goal": "Increasing EPC", - "goal_value": "C", + "goal_value": "A", "trigger_file_path": filename, "already_installed_file_path": "", "patches_file_path": patches_filename, "non_invasive_recommendations_file_path": non_invasive_recommendations_filename, - "valuation_file_path": valuation_filename, + "valuation_file_path": "", "scenario_name": "Full package remote assessment", "multi_plan": True, "budget": None, + "inclusions": ["cavity_wall_insulation", "ventilation"] } print(body) diff --git a/etl/customers/thrive/Make Insepctions route.py b/etl/customers/thrive/Make Insepctions route.py new file mode 100644 index 00000000..ec4f620b --- /dev/null +++ b/etl/customers/thrive/Make Insepctions route.py @@ -0,0 +1,40 @@ +""" +This script will pull in properties, in neighbouring areas, that have been flagged for CWI +""" +import pandas as pd + +asset_list = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/July 2025 Inspections/Thrive Programme - " + "reconciled.xlsx", + sheet_name="Standardised Asset List" +) + +cavity_areas = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/July 2025 Inspections/Thrive Programme - " + "reconciled.xlsx", + sheet_name="Cavity Areas" +) + +existing_inspections_sheet = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/July 2025 Inspections/Thrive Programme - " + "reconciled.xlsx", + sheet_name="July 2025 Inspections" +) + +empties = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/July 2025 Inspections/Thrive Programme - " + "reconciled.xlsx", + sheet_name="Cavity properties - for review" +) + +cavity_inspections = asset_list[ + asset_list["domna_postcode"].isin(cavity_areas["domna_postcode"].values) +] +cavity_inspections = cavity_inspections[ + ~cavity_inspections["landlord_property_id"].isin(empties["landlord_property_id"].values) +] + +cavity_inspections.to_csv( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/July 2025 Inspections/cavity_inspections.csv", + index=False +) diff --git a/etl/find_my_epc/AssetListEpcData.py b/etl/find_my_epc/AssetListEpcData.py index f085c8fb..2ff9a3e0 100644 --- a/etl/find_my_epc/AssetListEpcData.py +++ b/etl/find_my_epc/AssetListEpcData.py @@ -1,3 +1,4 @@ +import random import time import pandas as pd from tqdm import tqdm @@ -27,6 +28,7 @@ class AssetListEpcData: self.extracted_data = None self.non_invasive_recommendations = None self.patches = None + self.epc_data = None @staticmethod def check_asset_list(asset_list): @@ -49,7 +51,7 @@ class AssetListEpcData: "uprn": r.get("uprn"), "address": r["address"], "postcode": r["postcode"], - "recommendations": r["recommendations"] + "recommendations": r.get("recommendations") } for r in self.extracted_data ] @@ -74,7 +76,9 @@ class AssetListEpcData: # Pull the additional data extracted_data = [] + epc_data = [] for _, home in tqdm(self.asset_list.iterrows(), total=len(self.asset_list)): + add1 = home["address"] pc = home["postcode"] # Retrieve the EPC data @@ -92,9 +96,6 @@ class AssetListEpcData: if epc_searcher.newest_epc is None: continue - if not pd.isnull(home.get("patch")): - epc_searcher.newest_epc["address1"] = add1 - # Attempt both methods: try: find_epc_searcher = RetrieveFindMyEpc( @@ -104,16 +105,37 @@ class AssetListEpcData: find_epc_data = find_epc_searcher.retrieve_newest_find_my_epc_data() except Exception as e: logger.error(f"Error retrieving find my epc data: {e}") - find_epc_searcher = RetrieveFindMyEpc( - address=epc_searcher.newest_epc["address1"], - postcode=epc_searcher.newest_epc["postcode"] - ) - find_epc_data = find_epc_searcher.retrieve_newest_find_my_epc_data() - time.sleep(0.5) + if not pd.isnull(home.get("patch")): + epc_searcher.newest_epc["address1"] = add1 + + try: + find_epc_searcher = RetrieveFindMyEpc( + address=epc_searcher.newest_epc["address1"], + postcode=epc_searcher.newest_epc["postcode"] + ) + find_epc_data = find_epc_searcher.retrieve_newest_find_my_epc_data() + except Exception as e: + logger.error("Error retrieving find my epc data with alternative address format: {e}") + find_epc_data = { + "current_epc_rating": epc_searcher.newest_epc["current-energy-rating"], + "current_epc_efficiency": epc_searcher.newest_epc["current-energy-efficiency"], + "potential_epc_rating": None, + "potential_epc_efficiency": None, + "epc_data": {} + } + + # Sleep for a random amount of time between 0.5 and 1 seconds to avoid hitting the API rate limit + time.sleep(random.sample(range(50, 100), 1)[0] / 100) + + # Every 50 requests, we sleep for 10 seconds to avoid hitting the API rate limit + if len(extracted_data) % 50 == 0 and len(extracted_data) > 0: + logger.info("Sleeping for 10 seconds to avoid hitting API rate limit") + time.sleep(10) + # We need uprn to_append = { - "uprn": home.get("uprn"), + "uprn": home.get("uprn", epc_searcher.newest_epc["uprn"]), "address": home["address"], "postcode": home["postcode"], **find_epc_data, @@ -128,6 +150,8 @@ class AssetListEpcData: } extracted_data.append(to_append) + epc_data.append(epc_searcher.newest_epc) self.extracted_data = extracted_data + self.epc_data = epc_data logger.info("Data Extrction complete") diff --git a/etl/find_my_epc/RetrieveFindMyEpc.py b/etl/find_my_epc/RetrieveFindMyEpc.py index fad0c78e..50955377 100644 --- a/etl/find_my_epc/RetrieveFindMyEpc.py +++ b/etl/find_my_epc/RetrieveFindMyEpc.py @@ -1,3 +1,4 @@ +import time import re import pandas as pd import requests @@ -55,9 +56,11 @@ class RetrieveFindMyEpc: results = {} # 1. Total floor area - results['total-floor-area'] = int(self.get_text( + # We have some isntances of very old EPCs where the total floor area is not available + tfa = self.get_text( soup.find("dt", string="Total floor area").find_next_sibling("dd") - ).split(" ")[0]) + ).split(" ")[0] + results['total-floor-area'] = int(tfa) if tfa != "Not" else None # Table with features rows = soup.select("table.govuk-table tbody tr") @@ -125,9 +128,156 @@ class RetrieveFindMyEpc: return results - def retrieve_newest_find_my_epc_data(self, sap_2012_date=None): + def _extract_epc_from_soup(self, soup, epc_certificate, sap_2012_date=None): + + ratings = soup.find('desc', {'id': 'svg-desc'}).text + current_rating = ratings.split(".")[0] + potential_rating = ratings.split(".")[1] + current_sap = int(current_rating.split(' ')[-1]) + + # Retrieve the energy consumption + bills = soup.find('div', {'id': 'bills-affected'}) + bills_list = bills.find_all('li') + if not bills_list: + # If this is the case, it's usually becaue the EPC was very old. Early EPCs did not have this information + heating_text = None + hot_water_text = None + else: + heating_text = bills_list[0].text + hot_water_text = bills_list[1].text + + # Retrieve the recommendations and SAP points + recommendations = [] + recommendations_div = soup.find('div', class_='epb-recommended-improvements') + if recommendations_div: + # Find all h3 headers for each step and extract their related information + step_headers = recommendations_div.find_all('h3', class_='govuk-heading-m') + previous_sap_score = current_sap + previous_epc = current_rating.split(' ')[-6] + for step_num, step_header in enumerate(step_headers, start=1): + # Extract the step title (the measure) + measure_title = step_header.text.strip().replace(f"Step {step_num}: ", "") + + # Find the div containing the potential rating within the same section + potential_rating_div = step_header.find_next( + 'div', class_='epb-recommended-improvements__potential-rating' + ) + + # Check if the potential rating div is found + if potential_rating_div: + # Extract the rating text within the SVG text element + extracted_rating_text = potential_rating_div.find('text', class_='govuk-!-font-weight-bold') + if extracted_rating_text is not None: + rating_text = extracted_rating_text.text.strip() + else: + rating_text = " ".join([str(previous_sap_score), previous_epc]) + # Parse the rating text to separate the numeric rating and EPC letter + new_rating = int(rating_text.split()[0]) + new_epc = rating_text.split()[1] + + # Append the information as a dictionary to the recommendations list + recommendations.append({ + "step": step_num, + "measure": measure_title, + "new_rating": new_rating, + "new_epc": new_epc, + "sap_points": new_rating - previous_sap_score + }) + previous_sap_score = new_rating + previous_epc = new_epc + + # Search for the assessment informaton + assessment_information = soup.find('div', {'id': 'information'}) + # Parse this information + rows = assessment_information.find_all('div', class_='govuk-summary-list__row') + # Create a dictionary to hold the parsed information + assessment_data = {} + for row in rows: + key = row.find('dt').text.strip() + if key == "Type of assessment": + # We dont reliably extract this + continue + value_tag = row.find('dd') + + # Check if value contains a link (email) + if value_tag.find('a'): + value = value_tag.find('a').text.strip() + elif value_tag.find('summary'): + value = value_tag.find('span').text.strip() + else: + value = value_tag.text.strip() + + # These are keys that we have for both the surveyor and the acreditation scheme. Firstly, we'll + # get the surveyor's name and email so we make that information clear + if key in ["Telephone", "Email"]: + if "Assessor's " + key not in assessment_data: + assessment_data["Assessor's " + key] = value + else: + assessment_data["Accreditation Scheme's " + key] = value + continue + + assessment_data[key] = value + + expected_keys = [ + 'Assessor’s name', + "Assessor's Telephone", + "Assessor's Email", + 'Assessor’s ID', + 'Accreditation scheme', + 'Assessor’s declaration', + "Accreditation Scheme's Telephone", + "Accreditation Scheme's Email", + 'Date of assessment', + 'Date of certificate' + ] + # Check we have all the expected keys + for key in expected_keys: + if key not in assessment_data: + raise ValueError(f"Missing key: {key}") + + # The wall types of the property + property_features_table = soup.find("tbody", class_="govuk-table__body") + property_features_table = property_features_table.find_all("tr") + + # Extract wall types + self.walls = [] + for row in property_features_table: + cells = row.find_all("td") + if row.find("th").text.strip() == "Wall": + self.walls.append(cells[0].text.strip()) + + # Finally, we format the recommendations + recommendations = self.format_recommendations(recommendations, assessment_data, sap_2012_date) + + # 4) Low and zero carbon energy sources + low_carbon_energy_sources = self.extract_low_carbon_sources(soup) + + # 5) Pull out the EPC data + epc_data = self.extract_epc_data(soup) + + resulting_data = { + 'epc_certificate': epc_certificate, + 'current_epc_rating': current_rating.split(' ')[-6], + 'current_epc_efficiency': current_sap, + 'potential_epc_rating': potential_rating.split(' ')[-6], + "potential_epc_efficiency": int(potential_rating.split(' ')[-1]), + "heating_text": heating_text, + "hot_water_text": hot_water_text, + "recommendations": recommendations, + "epc_data": epc_data, + **assessment_data, + **low_carbon_energy_sources, + } + + return resulting_data + + def retrieve_all_find_my_epc_data(self, sap_2012_date=None): + """ - For a post code and address, we pull out all the required data from the find my epc website + This is a quick function to retrieve all the data from the find my epc website for a given postcode and address. + Using this to fulfill a short term need to retrieve all history for a property + :param sap_2012_date: + :return: """ postcode_input = self.postcode.replace(" ", "+") @@ -182,6 +332,98 @@ class RetrieveFindMyEpc: address_response = requests.get(chosen_epc, headers=self.HEADERS) address_res = BeautifulSoup(address_response.text, features="html.parser") + # We check the section on "Other cerificates for this property and get the url" + # Find the section for other certificates + other_cert_section = address_res.find('div', id='other_certificates_and_reports') + + # Extract all certificate number rows (anchor tags within a govuk-summary-list) + other_cert_links = other_cert_section.select('dd.govuk-summary-list__value a') + + other_certificates = [] + for link in other_cert_links: + cert_number = link.text.strip() + cert_url = link['href'].strip() + other_certificates.append({ + "certificate_number": cert_number, + "certificate_url": f"https://find-energy-certificate.service.gov.uk{cert_url}" + }) + + # Always include the currently selected EPC first + soup_list = [address_res] + + # Add additional historic certificates + for link in other_cert_links: + cert_url = f"https://find-energy-certificate.service.gov.uk{link['href'].strip()}" + response = requests.get(cert_url, headers=self.HEADERS) + time.sleep(0.3) + soup_list.append(BeautifulSoup(response.text, features="html.parser")) + + all_find_my_epc_data = [] + for soup in soup_list: + # Start with the primary one + all_find_my_epc_data.append(self._extract_epc_from_soup(soup, epc_certificate, sap_2012_date)) + + return all_find_my_epc_data + + def retrieve_newest_find_my_epc_data(self, sap_2012_date=None): + """ + For a post code and address, we pull out all the required data from the find my epc website + """ + + postcode_input = self.postcode.replace(" ", "+") + postcode_search = self.SEARCH_POSTCODE_URL.format(postcode_input=postcode_input) + postcode_response = requests.get(postcode_search, headers=self.HEADERS) + + postcode_res = BeautifulSoup(postcode_response.text, features="html.parser") + rows = postcode_res.find_all('tr', class_='govuk-table__row') + + extracted_table = [] + for row in rows: + # Extract the address and URL + address_tag = row.find('a', class_='govuk-link') + if address_tag is None: + continue + extracted_address = None + extracted_address_url = None + if address_tag: + extracted_address = address_tag.text.strip() + extracted_address_url = address_tag['href'] + + extracted_address_cleaned = ( + extracted_address.replace(",", "").replace(" ", "").lower() + ) + if not extracted_address_cleaned.startswith(self.address_cleaned): + continue + + # If the address is a match, we can extract the data + + # Extract the expiry date + expiry_date_tag = row.find('td', class_='govuk-table__cell date') + expiry_date = None + if expiry_date_tag is not None: + expiry_date = expiry_date_tag.parent.find('span').text.strip() + + extracted_table.append( + { + "extracted_address": extracted_address, + "extracted_address_url": extracted_address_url, + "expiry_date": datetime.strptime(expiry_date, '%d %B %Y'), + } + ) + + if not extracted_table: + raise ValueError("No EPC found") + + if len(extracted_table) > 1: + # We take the one with the most recent expiry date + extracted_table = sorted(extracted_table, key=lambda x: x['expiry_date'], reverse=True) + + chosen_epc = self.BASE_ENERGY_URL + extracted_table[0]['extracted_address_url'] + epc_certificate = chosen_epc.split('/')[-1] + + address_response = requests.get(chosen_epc, headers=self.HEADERS) + address_res = BeautifulSoup(address_response.text, features="html.parser") + # Key data we want to retrieve: # 1) Rating # 2) Bills estimates @@ -195,9 +437,6 @@ class RetrieveFindMyEpc: potential_rating = ratings.split(".")[1] current_sap = int(current_rating.split(' ')[-1]) - # Floor area - address_res.find() - # Retrieve the energy consumption bills = address_res.find('div', {'id': 'bills-affected'}) bills_list = bills.find_all('li') @@ -432,6 +671,13 @@ class RetrieveFindMyEpc: "Condensing boiler (separate from the range cooker)": ["boiler_upgrade"], "Heating controls (programmer and thermostatic radiator valves)": [ "roomstat_programmer_trvs", "time_temperature_zone_control" + ], + 'Heating controls (programmer room thermostat and thermostatic radiator valves)': [ + "roomstat_programmer_trvs", "time_temperature_zone_control" + ], + "Internal wall insulation": ["internal_wall_insulation"], + "High heat retention storage heaters and dual immersion cylinder and dual rate meter": [ + "high_heat_retention_storage_heater" ] } @@ -466,8 +712,13 @@ class RetrieveFindMyEpc: find_epc_data = searcher.retrieve_newest_find_my_epc_data() except Exception as e: logger.error(f"Error retrieving find my epc data: {e}") + if epc["address1"] == epc["address"]: + # There's no benefit of using the same address, so we split on comma + address1 = epc["address"].split(",")[0] + else: + address1 = epc["address1"] # We attempt with the backup add - searcher = cls(address=epc["address1"], postcode=epc["postcode"]) + searcher = cls(address=address1, postcode=epc["postcode"]) find_epc_data = searcher.retrieve_newest_find_my_epc_data() non_invasive_recommendations = { diff --git a/recommendations/Costs.py b/recommendations/Costs.py index 96eb5d0e..0ef37add 100644 --- a/recommendations/Costs.py +++ b/recommendations/Costs.py @@ -194,7 +194,7 @@ class Costs: IWI_CONTINGENCY = 0.2 # For air source heat pumps, we inflate the assume cost by quite a bit to account for design and installation - ASHP_CONTINGENCY = 0.35 + ASHP_CONTINGENCY = 0.25 # Where there is more uncertainty, a higher contingency rate is used HIGH_RISK_CONTINGENCY = 0.2 # When there is less uncertainty, a lower contingency rate is used @@ -871,10 +871,10 @@ class Costs: if needs_cylinder: # 1000 is the cost of a new hot water cylinder - total_cost = 1200 * number_heated_rooms + 1000 + total_cost = 1300 * number_heated_rooms + 1000 else: # 500 is the cost of a dual immersion heater - a rough estimate - total_cost = 1200 * number_heated_rooms + 500 + total_cost = 1300 * number_heated_rooms + 500 subtotal_before_vat = total_cost / (1 + self.VAT_RATE) vat = total_cost - subtotal_before_vat diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py index 18e1110b..9d1a094e 100644 --- a/recommendations/HeatingRecommender.py +++ b/recommendations/HeatingRecommender.py @@ -57,6 +57,31 @@ class HeatingRecommender: }, # These are the heating types we need to produce a dual heating recommendation "dual": None + }, + 'Electric underfloor heating, electric storage heaters': { + # For this, we would recommend a heat pump + "dual": None + }, + "Room heaters, electric, boiler and radiators, mains gas": { + "hhr": { + "mainheating_description": "Electric storage heaters, radiators", + "recommendation_description": "Install high heat retention electric storage heaters.", + "controls_prefix": "" + }, + "boiler": { + "mainheating_description": "Boiler and radiators, mains gas", + "recommendation_description": "Upgrade to a new condensing boiler.", + "controls_suffix": "" + }, + "dual": None + }, + "Room heaters, electric, electric storage heaters": { + "hhr": { + "mainheating_description": "Electric storage heaters, radiators", + "recommendation_description": "Install high heat retention electric storage heaters.", + "controls_prefix": "" + }, + "dual": None } } @@ -109,6 +134,10 @@ class HeatingRecommender: hhr_suitable = no_mains or self.has_electric_heating_description or self.has_room_heaters + hhr_suitable = hhr_suitable and ( + "underfloor heating" not in self.property.main_heating["clean_description"] + ) + return ( hhr_suitable and (not ashp_only_heating_recommendation) and not self.has_ashp and ("high_heat_retention_storage_heater" in measures) @@ -165,7 +194,8 @@ class HeatingRecommender: ) and (not ashp_only_heating_recommendation) and ("boiler_upgrade" in measures) and - (not self.has_ashp) + (not self.has_ashp) and + (not self.property.main_heating["has_warm_air"]) ) return is_valid, has_gas_boiler @@ -487,17 +517,30 @@ class HeatingRecommender: ] # This is a map from the heating controls description to the description of the air source heat pump set up - ashp_descriptions = { - "Time and temperature zone control": ( - f"Install a {ashp_size}KW air source heat pump, and upgrade heating controls to Smart Thermostats, " - "room sensors and smart radiator valves (time & temperature zone control). Ensure you have an 18 or " - "24 hour tariff" - ), - "Programmer, TRVs and bypass": ( - f"Install a {ashp_size}KW air source heat pump, with programmer, TRVs and a Bypass valve. Ensure you " - "have an 18 or 24 hour tariff" - ), - } + if ashp_size is None: + ashp_descriptions = { + "Time and temperature zone control": ( + f"Install two cascaded air source heat pumps, and upgrade heating controls to Smart Thermostats, " + "room sensors and smart radiator valves (time & temperature zone control). Ensure you have an 18 " + "or " + "24 hour tariff" + ) + } + else: + + ashp_descriptions = { + "Time and temperature zone control": ( + f"Install a {ashp_size}KW air source heat pump, and upgrade heating controls to Smart Thermostats, " + "room sensors and smart radiator valves (time & temperature zone control). Ensure you have an 18 " + "or " + "24 hour tariff" + ), + "Programmer, TRVs and bypass": ( + f"Install a {ashp_size}KW air source heat pump, with programmer, TRVs and a Bypass valve. Ensure " + f"you " + "have an 18 or 24 hour tariff" + ), + } new_heating_description = "Air source heat pump, radiators, electric" new_hot_water_description = "From main system" @@ -924,6 +967,7 @@ class HeatingRecommender: return recommendations self.heating_recommendations.extend(recommendations) + return None @staticmethod def estimate_boiler_size(property_type, built_form, floor_area, floor_height, num_heated_rooms): diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py index 0e73cffe..462d43aa 100644 --- a/recommendations/Recommendations.py +++ b/recommendations/Recommendations.py @@ -679,7 +679,7 @@ class Recommendations: # Handle the case of community schemes if (heating_description == "Community scheme") or (hotwater_description == "Community scheme"): - if main_fuel_description == "mains gas (community)": + if main_fuel_description in ["mains gas (community)", "UNKNOWN"]: return { "heating_fuel_type": "Natural Gas (Community Scheme)", "hotwater_fuel_type": "Natural Gas (Community Scheme)", diff --git a/recommendations/RoofRecommendations.py b/recommendations/RoofRecommendations.py index fa8b831c..31ac2433 100644 --- a/recommendations/RoofRecommendations.py +++ b/recommendations/RoofRecommendations.py @@ -191,11 +191,22 @@ class RoofRecommendations: non_invasive_recommendations = self.property.non_invasive_recommendations + # We check a specific condition - which will imply loft insulation isn't appropriate but room in roof + # insulation is + # 1) We have an uninsulated loft (assumed) + # 2) We have a non-intrusive recommendation for room in roof insulation + + rir_over_loft = ( + self.property.roof["is_pitched"] and + self.property.roof["insulation_thickness"] == "none" and + "room_in_roof_insulation" in [x["type"] for x in non_invasive_recommendations] + ) + # We firstly handle non-intrusive recommendations, which may override the normal roof insulation recommendations if ("loft_insulation" in [x["type"] for x in non_invasive_recommendations]) or ( self.property.roof["is_pitched"] and "loft_insulation" in measures and not self.property.roof["is_at_rafters"] - ): + ) and not rir_over_loft: self.recommend_roof_insulation( u_value=u_value, insulation_thickness=self.insulation_thickness, @@ -223,7 +234,8 @@ class RoofRecommendations: # There are cases where the property might have a room roof as the second roof, but we have a recommendation for # it, so we allow this override if self.property.roof["is_roof_room"] and ("room_roof_insulation" in measures) or ( - "room_roof_insulation" in [x["type"] for x in non_invasive_recommendations] + "room_roof_insulation" in [x["type"] for x in non_invasive_recommendations] or + rir_over_loft ): self.recommend_room_roof_insulation(u_value, phase, default_u_values) return @@ -502,7 +514,7 @@ class RoofRecommendations: # and the cost of the materials rir_non_invasive_recommendation = next( - (x for x in self.property.non_invasive_recommendations if x["type"] == "room_roof_insulation"), {} + (x for x in self.property.non_invasive_recommendations if x["type"] == "room_in_roof_insulation"), {} ) insulation_materials = pd.DataFrame(self.room_roof_insulation_materials) diff --git a/recommendations/VentilationRecommendations.py b/recommendations/VentilationRecommendations.py index a82e4df5..05113acf 100644 --- a/recommendations/VentilationRecommendations.py +++ b/recommendations/VentilationRecommendations.py @@ -10,11 +10,6 @@ class VentilationRecommendations(Definitions): crucial for prevent overheating risks in warmer months """ - VENTILATION_DESCRIPTIONS = [ - 'mechanical, extract only', - 'mechanical, supply and extract' - ] - def __init__( self, property_instance: Property, @@ -26,9 +21,6 @@ class VentilationRecommendations(Definitions): self.recommendation = None self.materials = [part for part in materials if part["type"] == "mechanical_ventilation"] - def identify_ventilation(self): - self.has_ventilaion = self.property.data["mechanical-ventilation"] in self.VENTILATION_DESCRIPTIONS - def recommend(self, phase): """ If there is no ventilation, we recommend installing ventilation @@ -38,8 +30,8 @@ class VentilationRecommendations(Definitions): :return: """ - self.identify_ventilation() - if self.has_ventilaion: + self.property.identify_ventilation() + if self.property.has_ventilation: return if len(self.materials) != 1: diff --git a/recommendations/optimiser/optimiser_functions.py b/recommendations/optimiser/optimiser_functions.py index 05b9ec42..6909a3f0 100644 --- a/recommendations/optimiser/optimiser_functions.py +++ b/recommendations/optimiser/optimiser_functions.py @@ -47,19 +47,19 @@ def prepare_input_measures(property_recommendations, goal, needs_ventilation): # We bundle the impact of ventilation with the measure total = ( rec["total"] + ventilation_recommendation["total"] - if rec["type"] in assumptions.measures_needing_ventilation + if rec["type"] in assumptions.measures_needing_ventilation and needs_ventilation else rec["total"] ) gain = ( rec[goal_key] + ventilation_recommendation[goal_key] - if rec["type"] in assumptions.measures_needing_ventilation + if rec["type"] in assumptions.measures_needing_ventilation and needs_ventilation else rec[goal_key] ) rec_type = ( "+".join( [rec["type"], ventilation_recommendation["type"]] - ) if rec["type"] in assumptions.measures_needing_ventilation + ) if rec["type"] in assumptions.measures_needing_ventilation and needs_ventilation else rec["type"] ) diff --git a/sfr/principal_pitch/0_prepare_sample.py b/sfr/principal_pitch/0_prepare_sample.py new file mode 100644 index 00000000..8150d519 --- /dev/null +++ b/sfr/principal_pitch/0_prepare_sample.py @@ -0,0 +1,71 @@ +""" +This is a script for preparing a sample for testing the end to end process, so that when Spring send us +data, we know it will work. +""" + +import pandas as pd +from utils.s3 import read_csv_from_s3 + +birmingham_epcs = pd.read_csv( + "/Users/khalimconn-kowlessar/Documents/hestia/sfr/Spring JV/domestic-E08000025-Birmingham/certificates.csv" +) + +# We get the newest EPC, by UPRN and LODGEMENT_DATE +birmingham_epcs['LODGEMENT_DATE'] = pd.to_datetime(birmingham_epcs['LODGEMENT_DATE']) + +birmingham_epcs = birmingham_epcs.sort_values( + by=['UPRN', 'LODGEMENT_DATE'], + ascending=[True, False] +).drop_duplicates(subset='UPRN') + +birmingham_epcs["postal_region"] = birmingham_epcs["POSTCODE"].str.split(" ").str[0] + +addressable_market = birmingham_epcs[ + (birmingham_epcs['CURRENT_ENERGY_RATING'].isin(['F', 'G', 'E', 'D'])) & + (birmingham_epcs['LODGEMENT_DATE'] >= '2020-01-01') & + (birmingham_epcs['PROPERTY_TYPE'].isin(['House', 'Bungalow'])) & + (birmingham_epcs['TENURE'].isin( + ['rental (private)', 'Rented (private)'] + )) + ] + +# We take the Spring portfolio and remove the properties in their sample +asset_list = read_csv_from_s3(bucket_name="retrofit-plan-inputs-dev", filepath='8/206/asset_list.csv') +asset_list = pd.DataFrame(asset_list) +asset_list["postal_region"] = asset_list["postcode"].str.split(" ").str[0] + +addressable_market = addressable_market[ + ~addressable_market["UPRN"].astype(int).astype(str).isin(asset_list["uprn"].values) +] +addressable_market = addressable_market[ + addressable_market["postal_region"].isin(asset_list["postal_region"].unique()) +] + +# Take a sample of properties, EPC F or G, EPC lodged in 2025. We focus on houses/bingalows +sample = birmingham_epcs[ + (birmingham_epcs['CURRENT_ENERGY_RATING'].isin(['F', 'G'])) & + (birmingham_epcs['LODGEMENT_DATE'] >= '2025-01-01') & + (birmingham_epcs['PROPERTY_TYPE'].isin(['House', 'Bungalow'])) + ] + +# Prepare the sample, with just the columns we would expect to receive from Spring +# 1) UPRN +# 2) Address +# 3) Postcode +# 4) Property type +# 5) Built form +# 6) Number of bedrooms (we'll simulate this) +# 7) Number of bathrooms (we'll simulate this) +# 8) Valuation (We'll simulate this, around 200,000) + +sample = sample[['UPRN', 'ADDRESS', 'POSTCODE', 'PROPERTY_TYPE', 'BUILT_FORM']].copy() +sample['BEDROOMS'] = 3 # Simulating number of bedrooms +sample['BATHROOMS'] = 1 # Simulating number of bathrooms +sample['VALUATION'] = 200000 # Simulating valuation +sample.columns = [x.lower() for x in sample.columns] + +# Store this as a excel +sample.to_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/sfr/Spring JV/birmingham_sample.xlsx", + index=False +) diff --git a/sfr/principal_pitch/1_prepare_data.py b/sfr/principal_pitch/1_prepare_data.py new file mode 100644 index 00000000..53969ec9 --- /dev/null +++ b/sfr/principal_pitch/1_prepare_data.py @@ -0,0 +1,124 @@ +""" +This script prepares the data for the principal pitch modelling +""" +import os +import pandas as pd +from dotenv import load_dotenv +from utils.s3 import save_csv_to_s3 +from etl.find_my_epc.AssetListEpcData import AssetListEpcData + +load_dotenv(dotenv_path="backend/.env") +EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN") +PORTFOLIO_ID = 206 +USER_ID = 8 +EPC_TARGET = "C" + +# Read the input file + +properties = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/sfr/Spring JV/Birmingham_price_top300.xlsx" +) +# Keep just the D's and below +properties = properties[properties["current_energy_rating"].isin(["D", "E", "F", "G"])].copy() +# Focus on houses +properties = properties[properties["property_type_std"] != "Flat"] +properties = properties[properties["property_type"] != "flat"] + +# Rename the key columns +properties = properties.rename( + columns={ + "address1": "address", + "number_of_bathrooms": "n_bathrooms", + "num_beds": "n_bedrooms" + } +) +properties["patch"] = True + +# Pull the non-invasive recommendations +asset_list_epc_client = AssetListEpcData( + asset_list=properties, + epc_auth_token=EPC_AUTH_TOKEN +) +asset_list_epc_client.get_data() +asset_list_epc_client.get_non_invasive_recommendations() +asset_list_epc_client.get_patch() + +extracted_df = pd.DataFrame(asset_list_epc_client.extracted_data) +epc_df = pd.DataFrame(asset_list_epc_client.epc_data) + +# Find examples where patches are different to the api +compare_epc = [] +for patch in asset_list_epc_client.patches: + extracted = extracted_df[extracted_df["uprn"] == patch["uprn"]].squeeze() + epc = epc_df[epc_df["uprn"] == patch["uprn"]].squeeze() + compare_epc.append( + { + "uprn": extracted["uprn"], + "address": extracted["address"], + "postcode": extracted["postcode"], + "api_epc": int(extracted["current_epc_efficiency"]), + "fme_epc": int(epc["current-energy-efficiency"]), + } + ) +compare_epc = pd.DataFrame(compare_epc) +diff = compare_epc[compare_epc["api_epc"] != compare_epc["fme_epc"]] +# Compare matched addresses to make sure they are the same +compare_addresses = extracted_df[["address", "postcode", "uprn"]].merge( + epc_df[["uprn", "address1", "postcode"]].rename(columns={"address1": "epc_address1", "postcode": "epc_postcode"}), + how="left", + on=["uprn"] +) + +# Add on uprn +properties = properties.merge( + extracted_df[["address", "postcode", "uprn"]], + how="left", + on=["address", "postcode"] +) + +# Store the asset list in s3 +filename = f"{USER_ID}/{PORTFOLIO_ID}/asset_list.csv" +save_csv_to_s3( + dataframe=properties, + bucket_name="retrofit-plan-inputs-dev", + file_name=filename +) + +# Store non-invasive recommendations in S3 +non_invasive_recommendations_filename = f"{USER_ID}/{PORTFOLIO_ID}/non_invasive_recommendations.csv" +save_csv_to_s3( + dataframe=pd.DataFrame(asset_list_epc_client.non_invasive_recommendations), + bucket_name="retrofit-plan-inputs-dev", + file_name=non_invasive_recommendations_filename +) + +# Store patches in S3 +patches_filename = "" +if asset_list_epc_client.patches: + patches_filename = f"{USER_ID}/{PORTFOLIO_ID}/patches.csv" + save_csv_to_s3( + dataframe=pd.DataFrame(asset_list_epc_client.patches), + bucket_name="retrofit-plan-inputs-dev", + file_name=patches_filename + ) + +body = { + "portfolio_id": str(PORTFOLIO_ID), + "housing_type": "Private", + "goal": "Increasing EPC", + "goal_value": "C", + "trigger_file_path": filename, + "already_installed_file_path": "", + "patches_file_path": patches_filename, + "non_invasive_recommendations_file_path": non_invasive_recommendations_filename, + "valuation_file_path": "", + "scenario_name": "EPC C", + "multi_plan": True, + "budget": None, + "ashp_cop": 3.5, + # This is new - when optimising, we drop scores by a few points to account for SAP 10 + "simulate_sap_10": True, + "exclusions": ["external_wall_insulation"], + "required_measures": ["cavity_wall_insulation", "loft_insulation"] +} +print(body) diff --git a/sfr/principal_pitch/2_export_data.py b/sfr/principal_pitch/2_export_data.py new file mode 100644 index 00000000..5660b78d --- /dev/null +++ b/sfr/principal_pitch/2_export_data.py @@ -0,0 +1,224 @@ +""" +This script prepares the data for the financial model +""" + +import pandas as pd +from backend.app.utils import sap_to_epc +from sqlalchemy.orm import sessionmaker +from backend.app.db.connection import db_engine +from backend.app.db.models.recommendations import Recommendation, Plan, PlanRecommendations +from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel + +PORTFOLIO_ID = 206 +SCENARIOS = [389] + + +def get_data(portfolio_id, scenario_ids): + session = sessionmaker(bind=db_engine)() + session.begin() + + # Get properties and their details for a specific portfolio + properties_query = session.query( + PropertyModel, + PropertyDetailsEpcModel + ).join( + PropertyDetailsEpcModel, PropertyModel.id == PropertyDetailsEpcModel.property_id + ).filter( + PropertyModel.portfolio_id == portfolio_id # Filter by portfolio ID + ).all() + + # Transform properties data to include all fields dynamically + properties_data = [ + {**{col.name: getattr(prop.PropertyModel, col.name) for col in PropertyModel.__table__.columns}, + **{col.name: getattr(prop.PropertyDetailsEpcModel, col.name) for col in + PropertyDetailsEpcModel.__table__.columns}} + for prop in properties_query + ] + + # Get property IDs from fetched properties + + # Get plans linked to the fetched properties + plans_query = session.query(Plan).filter(Plan.scenario_id.in_(scenario_ids)).all() + + # Transform plans data to include all fields dynamically + plans_data = [ + {col.name: getattr(plan, col.name) for col in Plan.__table__.columns} + for plan in plans_query + ] + + # Extract plan IDs for filtering recommendations through PlanRecommendations + plan_ids = [plan['id'] for plan in plans_data] + + # Get recommendations through PlanRecommendations for those plans and that are default + recommendations_query = session.query( + Recommendation, + Plan.scenario_id + ).join( + PlanRecommendations, Recommendation.id == PlanRecommendations.recommendation_id + ).join( + Plan, Plan.id == PlanRecommendations.plan_id # Join with Plan to access scenario_id + ).filter( + PlanRecommendations.plan_id.in_(plan_ids), + Recommendation.default == True # Filtering for default recommendations + ).all() + + # Transform recommendations data to include all fields dynamically and include scenario_id + recommendations_data = [ + {**{col.name: getattr(rec.Recommendation, col.name) if hasattr(rec, 'Recommendation') else getattr(rec, + col.name) for + col in Recommendation.__table__.columns}, + "Scenario ID": rec.scenario_id} + for rec in recommendations_query + ] + + session.close() + + return properties_data, plans_data, recommendations_data + + +properties_data, plans_data, recommendations_data = get_data(portfolio_id=PORTFOLIO_ID, scenario_ids=SCENARIOS) + +properties_df = pd.DataFrame(properties_data) +plans_df = pd.DataFrame(plans_data) +recommendations_df = pd.DataFrame(recommendations_data) + +recommended_measures_df = recommendations_df[ + ["property_id", "measure_type", "estimated_cost", "default"] +] +recommended_measures_df = recommended_measures_df[recommended_measures_df["default"]] +recommended_measures_df = recommended_measures_df.drop(columns=["default"]) + +post_install_sap = recommendations_df[["property_id", "default", "sap_points"]] +post_install_sap = post_install_sap[post_install_sap["default"]] +# Sum up the sap points by property id +post_install_sap = post_install_sap.groupby("property_id")[["sap_points"]].sum().reset_index() + +recommendations_measures_pivot = recommended_measures_df.pivot( + index='property_id', + columns='measure_type', + values='estimated_cost' +) +recommendations_measures_pivot = recommendations_measures_pivot.reset_index() + +# Total cost is the row sum, excluding the property_id column +recommendations_measures_pivot["total_retrofit_cost"] = recommendations_measures_pivot.drop( + columns=["property_id"] +).sum(axis=1) + +df = properties_df[ + [ + "property_id", "uprn", "address", "postcode", "property_type", "walls", "roof", "heating", "windows", + "current_epc_rating", + "current_sap_points", "total_floor_area", "number_of_rooms", + ] +].merge( + recommendations_measures_pivot, how="left", on="property_id" +).merge( + post_install_sap, how="left", on="property_id" +) + +df = df.drop(columns=["property_id"]) +df["sap_points"] = df["sap_points"].fillna(0) + +df["predicted_post_works_sap"] = df["current_sap_points"] + df["sap_points"] +df["predicted_post_works_sap"] = df["predicted_post_works_sap"].round() +df["predicted_post_works_epc"] = df["predicted_post_works_sap"].apply(lambda x: sap_to_epc(x)) + +# We merge this back to the main dataframe, which will contain the bathrooms +from utils.s3 import read_csv_from_s3 + +asset_list = read_csv_from_s3(bucket_name="retrofit-plan-inputs-dev", filepath='8/206/asset_list.csv') +asset_list = pd.DataFrame(asset_list) +df["uprn"] = df["uprn"].astype(str) +asset_list = asset_list.merge( + df.drop(columns=["address", "postcode", "property_type", "total_floor_area"]), + how="left", + on="uprn" +) + +condition_costs = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/sfr/Spring JV/Condition costs.xlsx", + sheet_name="Prices - Khalim", + header=35 +) +# Remove unnamed columns and reset index +condition_costs = condition_costs.loc[:, ~condition_costs.columns.str.contains('^Unnamed')] +condition_costs = condition_costs.reset_index(drop=True) + + +# We now estimate condition cost +def simulate_condition(asset_list, condition_costs): + """ + This function is for testing, and will simulate condition cost from 1-10 for each property to see what the + costing array looks like. + :param df: + :return: + """ + + condition_df = [] + for _, row in asset_list.iterrows(): + + n_bathrooms = row["bathrooms"] + + conditions = {} + for condition in reversed(range(1, 11)): + condition_cost = condition_costs[ + condition_costs["Condition"] == condition + ].drop(columns=["Condition"]).iloc[0] + + # Each cost is scaled by floor area + condition_cost = condition_cost * row["total_floor_area"] + condition_cost["Bathroom"] = condition_cost["Bathroom"] * n_bathrooms + + total_condition_cost = condition_cost.sum() + conditions["Condition " + str(condition)] = (total_condition_cost) + + condition_df.append( + { + "uprn": row["uprn"], + **conditions + } + ) + + condition_df = pd.DataFrame(condition_df) + + asset_list = asset_list.merge( + condition_df, + how="left", + on="uprn" + ) + + return asset_list + + +# asset_list = simulate_condition(asset_list, condition_costs) + +# We calculate the condition cost based on the condition +for _, row in asset_list.iterrows(): + + condition = row["condition_score"] + if condition in [None, ""]: + continue + condition = int(float(condition)) + + condition_cost = condition_costs[ + condition_costs["Condition"] == condition + ].drop(columns=["Condition"]).iloc[0] + + # Each cost is scaled by floor area + condition_cost = condition_cost * float(row["total_floor_area"]) + n_bathrooms = row["n_bathrooms"] + condition_cost["Bathroom"] = condition_cost["Bathroom"] * float(n_bathrooms) + + total_condition_cost = condition_cost.sum() + asset_list.loc[asset_list["uprn"] == row["uprn"], "domna_condition_cost"] = total_condition_cost + +# Store output +asset_list.to_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/sfr/Spring JV/20250624_portfolio_retrofit_packages.xlsx", + index=False +) + +condition_cost_comparison = asset_list[ + ["condition_score", "decoration_sum_min ", "decoration_sum_max", "domna_condition_cost"] +]