diff --git a/.idea/Model.iml b/.idea/Model.iml
index 09f2e496..c6561970 100644
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@@ -7,7 +7,7 @@
-
+
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
index fb10c6b0..50cad4ca 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -3,7 +3,7 @@
-
+
diff --git a/asset_list/AssetList.py b/asset_list/AssetList.py
index ad3087c3..21376708 100644
--- a/asset_list/AssetList.py
+++ b/asset_list/AssetList.py
@@ -301,6 +301,14 @@ class AssetList:
"Potential unsafe environment", "Date of Inspection", "Borescoped?"
]
+ # Another version of non-intrusives:
+ NON_INTRUSIVES_NEW_FORMAT_COLNAMES_V2 = [
+ 'Archetype', 'Archetype 2', 'Construction', 'Insulated', 'Material', 'Boroscoped?',
+ 'CIGA Check Required', 'ROOF ORIENTATION', 'TILE HUNG', 'RENDERED',
+ 'CLADDING', 'ACCESS ISSUES', 'FURTHER SURVEYOR NOTES', 'DATE',
+ 'NAME OF SURVEYOR'
+ ]
+
NON_INTRUSIVES_ELIGIBILITY_COLUMN = "Eligibility (Red/Yellow/Green)"
OLD_FORMAT_NON_INTRUSIVE_COLNAMES = ['WFT Findings', 'ECO Eligibility']
@@ -442,6 +450,8 @@ class AssetList:
self.non_intrusives_present = "CIGA Check Required" in self.raw_asset_list.columns
# We detect if we have the old format of non-intruvies
self.old_format_non_intrusives_present = "WFT Findings" in self.raw_asset_list.columns
+ if self.old_format_non_intrusives_present:
+ self.non_intrusives_present = False
self.non_intrusives_eligibility = "Eligibility (Red/Yellow/Green)" in self.raw_asset_list.columns
@@ -449,6 +459,8 @@ class AssetList:
"Has the property been re-walled?" in self.raw_asset_list.columns
)
+ self.new_format_non_insturives_present_v2 = 'TILE HUNG' in self.raw_asset_list.columns
+
# Names of columns
self.landlord_property_id = landlord_property_id
self.address1_colname = address1_colname
@@ -750,7 +762,7 @@ class AssetList:
self.rename_map = {k: v for k, v in self.rename_map.items() if k is not None}
non_intrusive_columns = []
- if self.non_intrusives_present:
+ if self.non_intrusives_present and not self.new_format_non_insturives_present_v2:
non_intrusive_columns = self.NON_INTRUSIVES_COLNAMES
if self.non_intrusives_eligibility:
@@ -759,6 +771,9 @@ class AssetList:
if self.new_format_non_insturives_present:
non_intrusive_columns += self.NON_INTRUSIVES_NEW_FORMAT_COLNAMES
+ if self.new_format_non_insturives_present_v2:
+ non_intrusive_columns += self.NON_INTRUSIVES_NEW_FORMAT_COLNAMES_V2
+
if self.old_format_non_intrusives_present:
# We check if we have the ECO Eligibility column, which we might not have
non_intrusive_columns = [
@@ -827,52 +842,44 @@ class AssetList:
# We attempt to convert the year built to a datetime, by detecting the format and converting
def extract_year(date_str):
- """
- Extracts the year from a date string in the format '01-Jul-YYYY'.
- Returns the extracted year as an integer or None if the format is incorrect.
- """
- known_errors = [
+ known_errors = {
"#MULTIVALUE",
+ "ND",
+ "PIMSS EMPTY",
+ "UNKNOWN",
"This cell has an external reference that can't be shown or edited. Editing this cell will "
"remove the external reference.",
- "ND",
- 'PIMSS EMPTY',
- "UNKNOWN"
- ]
+ 0
+ }
- if pd.isnull(date_str) or date_str in known_errors or (date_str == 0):
+ if pd.isnull(date_str) or date_str in known_errors:
return None
- if isinstance(date_str, str):
- match = re.match(r"\d{1,2}-[A-Za-z]{3}-(\d{4})", date_str)
- if match:
- return int(match.group(1)) # Extract the year and convert to integer
- if "-" in date_str:
-
- # Count the number of times we have "-", as we've seen double ranges
- # (when we have extensions) so the format is like this:
- # 'G: 1983-1990, H: 1991-1995'
- if date_str.count("-") == 2:
- # We have a range
- return int(date_str.split("-")[1].split(",")[0])
- # We probably have a range
- return int(date_str.split("-")[1].strip())
-
+ # Handle datetime
if isinstance(date_str, datetime):
return date_str.year
- if isinstance(date_str, float):
- if str(int(date_str)).isdigit() & (len(str(int(date_str))) == 4):
+ # Handle numeric year (float or int)
+ if isinstance(date_str, (int, float)):
+ if 1000 <= int(date_str) <= 2100:
return int(date_str)
- # Check if date_str is a year itself
- if str(date_str).isdigit() & (len(str(date_str)) == 4):
- return int(date_str)
+ # Now handle string-based logic
+ if isinstance(date_str, str):
+ # Direct date match e.g. 01-Jul-2021
+ match = re.match(r"\d{1,2}-[A-Za-z]{3}-(\d{4})", date_str)
+ if match:
+ return int(match.group(1))
- # Remove any non-numeric characters
- date_str = re.sub(r"\D", "", str(date_str))
- if str(date_str).isdigit() & (len(str(date_str)) == 4):
- return int(date_str)
+ # Find all 4-digit years in string
+ years = [int(y) for y in re.findall(r"\b(?:19|20)\d{2}\b", date_str)]
+ if years:
+ return max(years) # Return most recent year
+
+ # If only numbers are present without format
+ numeric_str = re.sub(r"\D", "", date_str)
+ if len(numeric_str) == 4 and numeric_str.isdigit():
+ return int(numeric_str)
raise NotImplementedError(f"Unhandled format for year built, value is {date_str} - implement me")
@@ -1104,7 +1111,7 @@ class AssetList:
num_floors=x[self.ATTRIBUTE_NUMBER_OF_FLOORS],
floor_height=(
float(x[self.EPC_API_DATA_NAMES["floor-height"]]) if
- x[self.EPC_API_DATA_NAMES["floor-height"]] else 2.5
+ not pd.isnull(x[self.EPC_API_DATA_NAMES["floor-height"]]) else 2.5
),
perimeter=x[self.ATTRIBUTE_ESTIMATED_PERIMETER],
built_form=x[self.EPC_API_DATA_NAMES["built-form"]]
@@ -1315,10 +1322,16 @@ class AssetList:
# Before we being, we identify if a property has solar already as we use this
# for identifying cavity jobs
- if self.non_intrusives_present:
- existing_solar_non_intrusives_check = (
- self.standardised_asset_list["non-intrusives: PV, ACCESS ISSUE, SEE NOTES"] == "SOLAR PV ON ROOF"
- )
+ if self.non_intrusives_present and not self.old_format_non_intrusives_present:
+
+ if self.new_format_non_insturives_present_v2:
+ existing_solar_non_intrusives_check = (
+ self.standardised_asset_list["non-intrusives: ROOF ORIENTATION"] == "ALREADY HAS SOLAR PV"
+ )
+ else:
+ existing_solar_non_intrusives_check = (
+ self.standardised_asset_list["non-intrusives: PV, ACCESS ISSUE, SEE NOTES"] == "SOLAR PV ON ROOF"
+ )
elif self.old_format_non_intrusives_present:
existing_solar_non_intrusives_check = (
self.standardised_asset_list["non-intrusives: WFT Findings"].str.lower().str.strip().isin(
@@ -1557,7 +1570,7 @@ class AssetList:
) & (
~self.standardised_asset_list[self.STANDARD_HEATING_SYSTEM].isin(
["district heating", "communal heating", "communal gas boiler"]
- ) & ~self.standardised_asset_list[self.STANDARD_HEATING_SYSTEM].str.contains("gas ")
+ ) & ~self.standardised_asset_list[self.STANDARD_HEATING_SYSTEM].astype(str).str.contains("gas ")
)
)
@@ -1596,12 +1609,17 @@ class AssetList:
# With this in mind, we look for 2 clases
# 1) The property is fully insulated apart from the loft (<200mm insulation)
# 2) THe property is fully insulated
-
- print("Should we include cavity properties where they might be uninsulated?")
self.standardised_asset_list["solar_landlord_walls_insulated"] = (
self.standardised_asset_list[self.STANDARD_WALL_CONSTRUCTION].isin(
[
- "filled cavity", "insulated solid brick", "insulated timber frame",
+ "filled cavity",
+ "insulated solid brick",
+ "insulated timber frame",
+ "uninsulated cavity",
+ "insulated system built",
+ "insulated granite or whinstone",
+ "insulated sandstone or limestone",
+ "new build - average thermal transmittance"
]
)
)
@@ -1999,24 +2017,6 @@ class AssetList:
self.standardised_asset_list[col]
)
- blocks_of_flats = self.standardised_asset_list[
- self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] == "block of flats"
- ]
-
- non_blocks_of_flats = self.standardised_asset_list[
- self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] != "block of flats"
- ]
-
- # Produce some aggregate figures
- self.work_type_figures = {
- **non_blocks_of_flats["cavity_reason"].value_counts().to_dict(),
- **{
- k + " (Block of flats)": v for k, v in
- blocks_of_flats["solar_reason"].value_counts().to_dict().items()
- },
- **self.standardised_asset_list["solar_reason"].value_counts().to_dict()
- }
-
# We prepare outcomes for output
if self.outcomes is not None:
logger.info("Preparing outcomes for output")
@@ -2047,6 +2047,26 @@ class AssetList:
)
)
+ def get_work_figures(self):
+ blocks_of_flats = self.standardised_asset_list[
+ self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] == "block of flats"
+ ]
+
+ non_blocks_of_flats = self.standardised_asset_list[
+ self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] != "block of flats"
+ ]
+
+ # Produce some aggregate figures
+ self.work_type_figures = {
+ **non_blocks_of_flats["cavity_reason"].value_counts().to_dict(),
+ **{
+ k + " (Block of flats)": v for k, v in
+ blocks_of_flats["solar_reason"].value_counts().to_dict().items()
+ },
+ **self.standardised_asset_list["solar_reason"].value_counts().to_dict()
+ }
+ pprint(self.work_type_figures)
+
def fill_landlord_block_reference(self, has_blocks_of_flats):
if not has_blocks_of_flats:
return
@@ -2082,15 +2102,40 @@ class AssetList:
for _, row in blocks.iterrows():
addr = str(row[self.STANDARD_ADDRESS_1])
+ full_addr = row[self.STANDARD_FULL_ADDRESS]
+
+ # We also look for terms like "Odd", "even", "all" in the address to indicate if it should be just
+ # the odds, evens or all of the numbers
+ has_odd = (
+ "(odd)" in addr.lower() or
+ "(odd)" in full_addr.lower() or
+ "(odds)" in addr.lower() or
+ "(odds)" in full_addr.lower()
+ )
+ has_even = (
+ "(even)" in addr.lower() or
+ "(even)" in full_addr.lower() or
+ "(evens)" in addr.lower() or
+ "(evens)" in full_addr.lower()
+ )
# 1 ─ Range (e.g. 1-7)
m_range = RANGE_RE.search(addr)
if m_range:
+
start, end = m_range.groups()
start, end = int(re.match(r'\d+', start)[0]), int(re.match(r'\d+', end)[0])
if start > end or (end - start) > 100:
raise ValueError(f"Suspicious range '{addr}'")
- for n in range(start, end + 1):
+
+ # We define the looping range on whether we have odd, even or all numbers
+ house_number_range = range(start, end + 1)
+ if has_odd:
+ house_number_range = [x for x in house_number_range if x % 2 != 0]
+ if has_even:
+ house_number_range = [x for x in house_number_range if x % 2 == 0]
+
+ for n in house_number_range:
new = row.copy()
new_addr = RANGE_RE.sub(str(n), addr, count=1)
original_full_address = new[self.STANDARD_FULL_ADDRESS]
@@ -2108,9 +2153,9 @@ class AssetList:
expanded_rows.append(new)
continue
- # 2 ─ Explicit list (e.g. 1, 2, 5 Block)
+ # 2 ─ Explicit list (e.g. 1, 2, 5 Block) or split by an ampersand (e.g. 1 & 2 Block)
nums = NUM_RE.findall(addr)
- if len(nums) > 1 and ',' in addr:
+ if len(nums) > 1 and (',' in addr or '&' in addr):
for n in nums:
new = row.copy()
new_addr = re.sub(NUM_RE, n, addr, count=1) # replace the first number only
@@ -2320,7 +2365,7 @@ class AssetList:
self.standardised_asset_list["cavity_reason"] = np.where(
self.standardised_asset_list[self.STANDARD_BLOCK_REFERENCE].isin(eligible_blocks),
self.standardised_asset_list["cavity_reason"]
- + " " + "(Flat in block with more than 50% eligible, but not eligible itself)",
+ + " " + "(Flat in block with more than 50% eligible)",
self.standardised_asset_list["cavity_reason"]
)
@@ -2375,6 +2420,11 @@ class AssetList:
none_details = [x for x in details_colnames if x is None]
details_colnames = [x for x in details_colnames if x is not None]
+ if local_filepath is None:
+ # Create an empty DataFrame based on the fields in self.contact_detail_fields
+ self.contact_details = pd.DataFrame(columns=list(self.contact_detail_fields.keys()))
+ return
+
contact_details = pd.read_excel(
local_filepath, sheet_name=sheet_name
)[[self.contact_detail_fields["landlord_property_id"]] + details_colnames]
@@ -2486,10 +2536,14 @@ class AssetList:
if reconcile_programme:
programme_data = programme_data[~pd.isnull(programme_data["project_code"])]
else:
+
+ if programme_data["hubspot_status"].nunique() > 1:
+ logger.info("Multiple hubspot_status found - are you sure you don't want to reconcile the programme?")
+
ready_to_be_scheduled = (
(
programme_data["hubspot_status"] == hubspot_config.HubspotProcessStatus.READY_TO_BE_SCHEDULED.label
- ) & (~pd.isnull(programme_data["survey_date"]))
+ )
)
# completed_works = (
# (programme_data["hubspot_status"] !=
@@ -2540,13 +2594,13 @@ class AssetList:
)
else:
# We shouldn't have any missing products
- programme_data = programme_data[
- ~pd.isnull(programme_data["survey_date"])
- ]
+ # programme_data = programme_data[
+ # ~pd.isnull(programme_data["survey_date"])
+ # ]
if pd.isnull(programme_data["domna_product"]).sum():
raise ValueError("Missing products")
- programme_data = programme_data.drop(columns=["solar_product", "cavity_product"])
+ programme_data = programme_data.drop(columns=["solar_product", "cavity_product"])
product_df = (
pd.DataFrame(self.CRM_PRODUCTS).T[["name", "id", "unit_price"]]
@@ -2587,6 +2641,13 @@ class AssetList:
programme_data[self.EPC_API_DATA_NAMES["uprn"]]
)
+ # Remove any negative URPSN which are not valid
+ programme_data[uprn_column] = np.where(
+ programme_data["estimated"].isin([1, True]),
+ None,
+ programme_data[uprn_column]
+ )
+
# Add in some columns if we have them
date_of_inspections = (
"Non-Intrusives: Date of Inspection" if
@@ -2753,6 +2814,7 @@ class AssetList:
columns={v: k for k, v in schema_mappings.items() if v is not None}
)
+ programme_data['Postcode '] = programme_data['Postcode '].copy()
programme_data['Installer '] = installer_name
programme_data['Name '] = (
programme_data['Full Address '] + " ," + programme_data['Postcode ']
@@ -2951,7 +3013,7 @@ class AssetList:
outcomes["row_id"] = outcomes.index
if outcomes_houseno[idx] is None:
- outcomes_houseno = "houseno"
+ outcomes_houseno[idx] = "houseno"
outcomes["houseno"] = outcomes[outcomes_address[idx]].apply(
lambda x: SearchEpc.get_house_number(x, outcomes[outcomes_postcode])
)
@@ -3219,12 +3281,21 @@ class AssetList:
install_col = "INSTALL / CANCELLATION DATE"
elif 'INSTALL/ CANCELLATION DATE' in master_data.columns:
install_col = 'INSTALL/ CANCELLATION DATE'
+ elif "INSTALL/CANCELLATION DATE" in master_data.columns:
+ install_col = "INSTALL/CANCELLATION DATE"
+ elif 'Measure 1 Install Date' in master_data.columns:
+ install_col = 'Measure 1 Install Date'
else:
raise ValueError("No install or cancellation date")
- submission_col = (
- "SUBMISSION DATE" if "SUBMISSION DATE" in master_data.columns else "SUBMISSION DATE TO INSTALLERS"
- )
+ if "SUBMISSION DATE" in master_data.columns:
+ submission_col = "SUBMISSION DATE"
+ elif "SUBMISSION DATE TO INSTALLERS" in master_data.columns:
+ submission_col = "SUBMISSION DATE TO INSTALLERS"
+ elif "Submission Date" in master_data.columns:
+ submission_col = "Submission Date"
+ else:
+ raise ValueError("No submission date column found in master data")
master_data["row_id"] = master_data.index
@@ -3239,6 +3310,10 @@ class AssetList:
scheme_col = "AFFORDABLE WARMTH OR EPC FOR HOUSING ASSOCIATION"
elif "AFFORDABLE WARMTH" in master_data.columns:
scheme_col = "AFFORDABLE WARMTH"
+ elif "Scheme" in master_data.columns:
+ scheme_col = "Scheme"
+ elif "Affordable Warmth" in master_data.columns:
+ scheme_col = "Affordable Warmth"
else:
scheme_col = "OFFICE USE ONLY"
@@ -3254,12 +3329,30 @@ class AssetList:
property_type_col = "PROPERTY TYPE As per table emailed"
elif "PROPERTY TYPE As per table emailed" in master_data.columns:
property_type_col = "PROPERTY TYPE As per table emailed"
+ elif "PROPERTY TYPE" in master_data.columns:
+ property_type_col = "PROPERTY TYPE"
else:
property_type_col = "PROPERTY TYPE (SEE DEEMED SCORES SHEET) Eg. 3W_Flat_1 (As per Matrix)"
+ if "INSTALLERS NOTES ; REASONS FOR CANCELLATIONS" in master_data.columns:
+ installer_notes_col = "INSTALLERS NOTES ; REASONS FOR CANCELLATIONS"
+ elif "INSTALLERS NOTES" in master_data.columns:
+ installer_notes_col = "INSTALLERS NOTES"
+ elif 'Installers Notes' in master_data.columns:
+ installer_notes_col = 'Installers Notes'
+ elif 'NOTES ; REASONS FOR CANCELLATIONS OR WHERE INSTALL DATE WAS OBTAINED FROM' in master_data.columns:
+ installer_notes_col = 'NOTES ; REASONS FOR CANCELLATIONS OR WHERE INSTALL DATE WAS OBTAINED FROM'
+ else:
+ raise ValueError("No installer notes column found in master data")
+
+ if "INSTALLER" in master_data.columns:
+ installer_col = "INSTALLER"
+ elif "Installer" in master_data.columns:
+ installer_col = "Installer"
+ else:
+ raise ValueError("No installer column found in master data")
+
measure_mix_col = "MEASURE COMBO"
- installer_notes_col = "INSTALLERS NOTES ; REASONS FOR CANCELLATIONS"
- installer_col = "INSTALLER"
town_colname = "TOWN" if "TOWN" in master_data.columns else 'Town/Area'
logger.info("Matching master data to asset list")
@@ -3301,6 +3394,10 @@ class AssetList:
]
house_no = row[house_no_col]
+
+ if pd.isnull(house_no):
+ house_no = None
+
if isinstance(house_no, (float, int)):
house_no = str(int(house_no))
@@ -3401,6 +3498,9 @@ class AssetList:
master_data[measure_mix_col] = "Measure mix not recorded"
matched = pd.DataFrame(matched)
+ if matched.empty:
+ continue
+
master_to_append = master_data[
[scheme_col, "row_id", install_col, submission_col, measure_mix_col, installer_notes_col, installer_col]
].merge(
diff --git a/asset_list/abs_estimates.py b/asset_list/abs_estimates.py
new file mode 100644
index 00000000..58adcca6
--- /dev/null
+++ b/asset_list/abs_estimates.py
@@ -0,0 +1,229 @@
+"""
+Simple script to take a standardised asset list and calculate the abs. We'll use this code to estimate
+the ABS for properties, going forward
+"""
+import os
+import pandas as pd
+import numpy as np
+from dotenv import load_dotenv
+from etl.find_my_epc.AssetListEpcData import AssetListEpcData
+from backend.Funding import Funding
+from backend.app.utils import sap_to_epc
+
+load_dotenv(dotenv_path="backend/.env")
+EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
+
+asset_list = pd.read_excel(
+ "/Users/khalimconn-kowlessar/Documents/hestia/Instagroup Review/Livewest South-West - Standardised V2.xlsx",
+ sheet_name="Cavity Route (Insta Review)"
+)
+
+abs_matrix = pd.read_csv(
+ "/Users/khalimconn-kowlessar/Downloads/ECO4 Full Project Scores Matrix.csv"
+)
+pps_matrix = pd.read_excel(
+ "/Users/khalimconn-kowlessar/Downloads/ECO4 Partial Project Scores Matrix v5.xlsx",
+ header=1
+)
+pps_matrix.columns = [c.strip() for c in pps_matrix.columns]
+
+# We need to estimate the number of points the work will produce and the finishing band. For this, we assume 7 for
+# cavity and 15 for solar. We'll be more specific in the future, but for now, this is a good enough estimate.
+route = asset_list[["domna_address_1", "domna_postcode", "epc_os_uprn"]].rename(
+ columns={"domna_address_1": "address", "domna_postcode": "postcode", "epc_os_uprn": "upr"}
+)
+route["address"] = route["address"].astype(str)
+
+asset_list_epc_client = AssetListEpcData(
+ asset_list=route,
+ epc_auth_token=EPC_AUTH_TOKEN
+)
+
+asset_list_epc_client.get_data()
+asset_list_epc_client.get_non_invasive_recommendations()
+
+solar_sap_points = []
+for r in asset_list_epc_client.non_invasive_recommendations:
+ if not r.get("recommendations"):
+ continue
+ solar_recommendations = [
+ x for x in r["recommendations"] if "solar_pv" in x["type"]
+ ]
+ if solar_recommendations:
+ solar_recommendations = solar_recommendations[0]
+ else:
+ continue
+
+ address = r["address"]
+ postcode = r["postcode"]
+
+ solar_sap_points.append(
+ {
+ "address": address,
+ "postcode": postcode,
+ "sap_points": solar_recommendations["sap_points"]
+ }
+ )
+
+solar_sap_points = pd.DataFrame(solar_sap_points)
+solar_sap_points.drop_duplicates(subset=["address", "postcode"], inplace=True)
+# Store the sap points in the cavity route to csv
+# cwi_sap_points.to_csv(
+# "/Users/khalimconn-kowlessar/Documents/hestia/Instagroup Review/cwi_sap_points_livewest_sw.csv",
+# index=False
+# )
+
+avg_solar_points_by_postcode = solar_sap_points.groupby(["postcode"]).agg({"sap_points": "mean"}).reset_index()
+avg_solar_points = solar_sap_points["sap_points"].median()
+asset_list["domna_address_1"] = asset_list["domna_address_1"].astype(str)
+asset_list = asset_list.merge(
+ solar_sap_points, how="left", left_on=["domna_address_1", "domna_postcode"], right_on=["address", "postcode"]
+).drop(
+ columns=["address", "postcode"]
+)
+
+# Fill the sap points with the average cwi points
+asset_list = asset_list.merge(
+ avg_solar_points_by_postcode.rename(columns={"postcode": "domna_postcode"}),
+ how="left", on=["domna_postcode"], suffixes=("", "_avg")
+)
+asset_list["sap_points"] = asset_list["sap_points"].fillna(asset_list["sap_points_avg"])
+asset_list.drop(columns=["sap_points_avg"], inplace=True)
+
+asset_list["sap_points"] = asset_list["sap_points"].fillna(avg_solar_points)
+asset_list["post_works_sap"] = asset_list["epc_sap_score_on_register"] + asset_list["sap_points"]
+asset_list["post_works_epc"] = asset_list["post_works_sap"].apply(lambda x: sap_to_epc(x))
+asset_list["starting_half_band"] = asset_list["epc_sap_score_on_register"].apply(lambda x: Funding.get_sap_band(x))
+asset_list["ending_half_band"] = asset_list["post_works_sap"].apply(lambda x: Funding.get_sap_band(x))
+asset_list["floor_area_band"] = asset_list["epc_total_floor_area"].apply(lambda x: Funding.get_floor_area_band(x))
+
+asset_list["ending_half_band"] = np.where(
+ (asset_list["post_works_epc"] == asset_list["epc_rating_on_register"]),
+ "Low_C",
+ asset_list["ending_half_band"]
+)
+# Realistically, we'll take the properties to a low C at worst
+asset_list["ending_half_band"] = np.where(
+ (asset_list["post_works_sap"] < 69),
+ "Low_C",
+ asset_list["ending_half_band"]
+)
+
+asset_list = asset_list.merge(
+ abs_matrix, how="left", left_on=["starting_half_band", "ending_half_band", "floor_area_band"],
+ right_on=['Starting Band', 'Finishing Band', 'Floor Area Segment', ]
+)
+asset_list = asset_list.drop(columns=['Starting Band', 'Finishing Band', 'Floor Area Segment'])
+
+asset_list = asset_list.rename(
+ columns={"Cost Savings": "funding_abs"}
+)
+
+print(asset_list["domna_property_id"].duplicated().sum())
+
+# Store this data
+asset_list.to_csv(
+ "/Users/khalimconn-kowlessar/Documents/hestia/Instagroup Review/livewest_sw_solar_abs_estimates-solar.csv",
+ index=False
+)
+
+# Cavity process!
+# cwi_sap_points = []
+# for r in asset_list_epc_client.non_invasive_recommendations:
+# if not r.get("recommendations"):
+# continue
+# cwi_recommendations = [
+# x for x in r["recommendations"] if "cavity_wall_insulation" in x["type"]
+# ]
+# if cwi_recommendations:
+# cwi_recommendations = cwi_recommendations[0]
+# else:
+# continue
+#
+# address = r["address"]
+# postcode = r["postcode"]
+#
+# cwi_sap_points.append(
+# {
+# "address": address,
+# "postcode": postcode,
+# "sap_points": cwi_recommendations["sap_points"]
+# }
+# )
+#
+# cwi_sap_points = pd.DataFrame(cwi_sap_points)
+# cwi_sap_points = pd.read_csv(
+# "/Users/khalimconn-kowlessar/Documents/hestia/Instagroup Review/cwi_sap_points_livewest_sw.csv"
+# )
+# cwi_sap_points.drop_duplicates(subset=["address", "postcode"], inplace=True)
+avg_cwi_points_by_postcode = cwi_sap_points.groupby(["postcode"]).agg({"sap_points": "mean"}).reset_index()
+avg_cwi_points = cwi_sap_points["sap_points"].median()
+asset_list = asset_list.merge(
+ cwi_sap_points, how="left", left_on=["domna_address_1", "domna_postcode"], right_on=["address", "postcode"]
+).drop(
+ columns=["address", "postcode"]
+)
+
+# Fill the sap points with the average cwi points
+asset_list = asset_list.merge(
+ avg_cwi_points_by_postcode.rename(columns={"postcode": "domna_postcode"}),
+ how="left", on=["domna_postcode"], suffixes=("", "_avg")
+)
+asset_list["sap_points"] = asset_list["sap_points"].fillna(asset_list["sap_points_avg"])
+asset_list.drop(columns=["sap_points_avg"], inplace=True)
+
+asset_list["sap_points"] = asset_list["sap_points"].fillna(avg_cwi_points)
+asset_list["post_works_sap"] = asset_list["epc_sap_score_on_register"] + asset_list["sap_points"]
+asset_list["post_works_epc"] = asset_list["post_works_sap"].apply(lambda x: sap_to_epc(x))
+asset_list["starting_half_band"] = asset_list["epc_sap_score_on_register"].apply(lambda x: Funding.get_sap_band(x))
+asset_list["ending_half_band"] = asset_list["post_works_sap"].apply(lambda x: Funding.get_sap_band(x))
+asset_list["floor_area_band"] = asset_list["epc_total_floor_area"].apply(lambda x: Funding.get_floor_area_band(x))
+
+asset_list["funding_scheme"] = np.where(
+ (
+ (asset_list["post_works_epc"] == asset_list["epc_rating_on_register"])
+ ),
+ "GBIS",
+ "ECO4"
+)
+asset_list = asset_list.merge(
+ abs_matrix, how="left", left_on=["starting_half_band", "ending_half_band", "floor_area_band"],
+ right_on=['Starting Band', 'Finishing Band', 'Floor Area Segment', ]
+)
+asset_list = asset_list.drop(columns=['Starting Band', 'Finishing Band', 'Floor Area Segment'])
+
+# Using CWI solid 1.7 -> 0.3 rates
+cwi_pps_matrix = pps_matrix[
+ pps_matrix["Measure_Type"].isin(["CWI_0.033"])
+]
+# Merge on
+asset_list = asset_list.merge(
+ cwi_pps_matrix[['Starting Band', 'Total Floor Area Band', 'Cost Savings']].rename(
+ columns={
+ "Cost Savings": "partial_project_score",
+ "Starting Band": "starting_half_band",
+ "Total Floor Area Band": "floor_area_band"
+ }
+ ),
+ how="left",
+ on=["starting_half_band", "floor_area_band"],
+)
+asset_list["partial_project_score"] = np.where(
+ (asset_list["epc_sap_score_on_register"] > 69),
+ None,
+ asset_list["partial_project_score"]
+)
+
+asset_list["funding_abs"] = np.where(
+ asset_list["funding_scheme"] == "GBIS",
+ asset_list["partial_project_score"],
+ asset_list["Cost Savings"]
+)
+
+asset_list["domna_property_id"].duplicated().sum()
+
+# Store this data
+asset_list.to_csv(
+ "/Users/khalimconn-kowlessar/Documents/hestia/Instagroup Review/livewest_sw_abs_estimates.csv",
+ index=False
+)
diff --git a/asset_list/app.py b/asset_list/app.py
index 7c0023ce..e431f723 100644
--- a/asset_list/app.py
+++ b/asset_list/app.py
@@ -1,7 +1,6 @@
import os
import json
import pandas as pd
-from pprint import pprint
from asset_list.AssetList import AssetList
from asset_list.mappings.property_type import PROPERTY_MAPPING
from asset_list.mappings.built_form import BUILT_FORM_MAPPINGS
@@ -60,39 +59,370 @@ def app():
Property UPRN
"""
- # NCHA
- data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/NCHA"
- data_filename = "Energy Information MASTER June 2025.xlsx"
- sheet_name = "Data"
+ # CDS
+ data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/CDS"
+ data_filename = "Founder Estates - Asset List.xlsx"
+ sheet_name = "Combined"
postcode_column = 'Postcode'
fulladdress_column = "Address"
address1_column = None
address1_method = "house_number_extraction"
address_cols_to_concat = []
missing_postcodes_method = None
- landlord_year_built = "Build Date (HAR10)"
+ landlord_year_built = None
landlord_os_uprn = None
- landlord_property_type = "Property Type (HAR10)"
- landlord_built_form = "Build Form (EPC)"
- landlord_wall_construction = "Wall Description"
- landlord_roof_construction = None
- landlord_heating_system = "HEAT Code"
+ landlord_property_type = None
+ landlord_built_form = None
+ landlord_wall_construction = None
+ landlord_heating_system = "Heating Type"
landlord_existing_pv = None
- landlord_property_id = "Place ref"
- landlord_sap = "EPC SAP"
- outcomes_filename = None
- outcomes_sheetname = None
- outcomes_postcode = None
- outcomes_houseno = None
- outcomes_id = None
- outcomes_address = None
+ landlord_property_id = "Row ID"
+ outcomes_filename = []
+ outcomes_sheetname = []
+ outcomes_postcode = []
+ outcomes_houseno = []
+ outcomes_address = []
+ outcomes_id = []
master_filepaths = []
master_to_asset_list_filepath = None
- phase = False
- ecosurv_landlords = None
asset_list_header = 0
landlord_block_reference = None
master_id_colnames = []
+ landlord_roof_construction = None
+ phase = False
+ landlord_sap = None
+ ecosurv_landlords = None
+
+ # Plus Dane
+ data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Plus Dane/New Programme July 2025/"
+ data_filename = "20250711 Plus Dane Asset List.xlsx"
+ sheet_name = "Sheet1"
+ postcode_column = 'Postcode'
+ fulladdress_column = "Address"
+ address1_column = None
+ address1_method = "house_number_extraction"
+ address_cols_to_concat = []
+ missing_postcodes_method = None
+ landlord_year_built = "Property Age"
+ landlord_os_uprn = None
+ landlord_property_type = "Property Type"
+ landlord_built_form = "Built Form"
+ landlord_wall_construction = "Wall Construction"
+ landlord_heating_system = "Full Heating System"
+ landlord_existing_pv = None
+ landlord_property_id = "UPRN"
+ outcomes_filename = [
+ os.path.join(data_folder, "Outcomes - Plus Dane_CWI_2024.xlsx"),
+ os.path.join(data_folder, "Outcomes - Plus Dane_CWI_2025.xlsx"),
+ os.path.join(data_folder, "Outcomes - Plus Dane_PV_2025.xlsx"),
+ ]
+ outcomes_sheetname = [
+ "CWI & LI - 2024", "2025 - CWI", "PV - 2025",
+ ]
+ outcomes_postcode = ["Postcode", "Postcode", "Postcode"]
+ outcomes_houseno = ["No.", "No", "No"]
+ outcomes_address = ["Address", "Address", "Address"]
+ outcomes_id = ["Asset Reference", "LL UPRN", "LL UPRN"]
+ master_filepaths = [
+ os.path.join(data_folder, "submissions/JJC-Table 1.csv"),
+ os.path.join(data_folder, "submissions/SCIS-Table 1.csv")
+ ]
+ master_to_asset_list_filepath = None
+ asset_list_header = 1
+ landlord_block_reference = None
+ master_id_colnames = [None, None]
+ landlord_roof_construction = None
+ phase = False
+ landlord_sap = "SAP Rating"
+ ecosurv_landlords = "plus dane"
+
+ # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Brentwood/July 2025 New Programme"
+ # data_filename = "20250710 Asset List Brentwood.xlsx"
+ # sheet_name = "Sheet1"
+ # postcode_column = 'Postcode'
+ # fulladdress_column = None
+ # address1_column = "House Number"
+ # address1_method = None
+ # address_cols_to_concat = ["House Number", "Address Line 1", "Address Line 2", "Address Line 3"]
+ # missing_postcodes_method = None
+ # landlord_year_built = "Year Built"
+ # landlord_os_uprn = None
+ # landlord_property_type = "Dwelling"
+ # landlord_built_form = None
+ # landlord_wall_construction = None
+ # landlord_heating_system = "Heating"
+ # landlord_existing_pv = None
+ # landlord_property_id = "UPRN"
+ # outcomes_filename = [os.path.join(data_folder, "Brentwood - outcomes for analysis.xlsx")]
+ # outcomes_sheetname = ["OUTCOMES"]
+ # outcomes_postcode = ["POSTCODE"]
+ # outcomes_houseno = [None]
+ # outcomes_address = ["ADDRESS"]
+ # outcomes_id = [None]
+ # master_filepaths = [os.path.join(data_folder, "Submissions.csv")]
+ # master_to_asset_list_filepath = None
+ # asset_list_header = 1
+ # landlord_block_reference = None
+ # master_id_colnames = [None]
+ # landlord_roof_construction = None
+ # phase = False
+ # landlord_sap = None
+ # ecosurv_landlords = "brentwood"
+
+ # Brentwood
+ # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Brentwood/July 2025 New Programme"
+ # data_filename = "20250710 Asset List Brentwood.xlsx"
+ # sheet_name = "Sheet1"
+ # postcode_column = 'Postcode'
+ # fulladdress_column = None
+ # address1_column = "House Number"
+ # address1_method = None
+ # address_cols_to_concat = ["House Number", "Address Line 1", "Address Line 2", "Address Line 3"]
+ # missing_postcodes_method = None
+ # landlord_year_built = "Year Built"
+ # landlord_os_uprn = None
+ # landlord_property_type = "Dwelling"
+ # landlord_built_form = None
+ # landlord_wall_construction = None
+ # landlord_heating_system = "Heating"
+ # landlord_existing_pv = None
+ # landlord_property_id = "UPRN"
+ # outcomes_filename = [os.path.join(data_folder, "Brentwood - outcomes for analysis.xlsx")]
+ # outcomes_sheetname = ["OUTCOMES"]
+ # outcomes_postcode = ["POSTCODE"]
+ # outcomes_houseno = [None]
+ # outcomes_address = ["ADDRESS"]
+ # outcomes_id = [None]
+ # master_filepaths = [os.path.join(data_folder, "Submissions.csv")]
+ # master_to_asset_list_filepath = None
+ # asset_list_header = 1
+ # landlord_block_reference = None
+ # master_id_colnames = [None]
+ # landlord_roof_construction = None
+ # phase = False
+ # landlord_sap = None
+ # ecosurv_landlords = "brentwood"
+ #
+ # # Eastlight
+ # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Eastlight/New Programme"
+ # data_filename = "INSPECTIONS MASTER Non Tech.xlsx"
+ # sheet_name = "EASTLIGHT CW"
+ # postcode_column = 'Postcode'
+ # fulladdress_column = None
+ # address1_column = "HouseName"
+ # address1_method = None
+ # address_cols_to_concat = ["HouseName", "Block", "Address1", "Address2", "Address3"]
+ # missing_postcodes_method = None
+ # landlord_year_built = "Built In Year"
+ # landlord_os_uprn = None
+ # landlord_property_type = "AssetType"
+ # landlord_built_form = "Archetype" # Using inspections archetype
+ # landlord_wall_construction = None
+ # landlord_roof_construction = None
+ # landlord_heating_system = "Main Heating Source"
+ # landlord_existing_pv = None
+ # landlord_property_id = "UPRN"
+ # landlord_sap = "SAP Score"
+ # outcomes_filename = [
+ # os.path.join(data_folder, "Eastlight_CWI_JJC_2025.xlsx"),
+ # os.path.join(data_folder, "Eastlight_CWI_SCIS_2025.xlsx"),
+ # ]
+ # outcomes_sheetname = ["Outcomes", "Feedback"]
+ # outcomes_postcode = ["Postcode", "Postcode"]
+ # outcomes_houseno = ["No", "No."]
+ # outcomes_id = [None, None]
+ # outcomes_address = ["Address", "Address"]
+ # master_filepaths = [
+ # os.path.join(data_folder, "ECO 3-Table 1.csv"),
+ # os.path.join(data_folder, "ECO 4-Table 1.csv"),
+ # ]
+ # master_to_asset_list_filepath = None
+ # phase = False
+ # ecosurv_landlords = "eastlight"
+ # asset_list_header = 0
+ # landlord_block_reference = None
+ # master_id_colnames = [None, None]
+ # landlord_sap = None
+
+ # Pickering and Ferens
+ # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Pickering & Ferens"
+ # data_filename = "SAP 9 vs SAP 10 Sava Intelligent Energy - Property List (190625).xlsx"
+ # sheet_name = "Sava Intelligent Energy - Prope"
+ # postcode_column = 'Postcode'
+ # fulladdress_column = 'Address'
+ # address1_column = None
+ # address1_method = "house_number_extraction"
+ # address_cols_to_concat = []
+ # missing_postcodes_method = None
+ # landlord_year_built = None
+ # landlord_os_uprn = None
+ # landlord_property_type = "Property Type" # Using the inspections property type
+ # landlord_built_form = "Archetype 2"
+ # landlord_wall_construction = None
+ # landlord_roof_construction = None
+ # landlord_heating_system = None
+ # landlord_existing_pv = None
+ # landlord_property_id = "UPRN"
+ # landlord_sap = "SAP Rating (RdSAP 10)"
+ # outcomes_filename = []
+ # outcomes_sheetname = []
+ # outcomes_postcode = []
+ # outcomes_houseno = []
+ # outcomes_id = []
+ # outcomes_address = []
+ # master_filepaths = [
+ # os.path.join(data_folder, "PICKERING & FERENS ROLLING MASTER SHEET HEDGEFUND - 26.7.24 - K.csv"),
+ # os.path.join(data_folder, "PICKERING & FERENS NEW MASTER GBIS UPDATED 21.8.24 - M - For Analysis.csv"),
+ # ]
+ # master_to_asset_list_filepath = None
+ # phase = False
+ # ecosurv_landlords = "pickering"
+ # asset_list_header = 0
+ # landlord_block_reference = None
+ # master_id_colnames = [None, None]
+
+ # Colchester
+ # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Colchester"
+ # data_filename = "Warmfront data- Colchester Borough Homes (Complete).xlsx"
+ # sheet_name = "Sheet1"
+ # postcode_column = 'Full Address.1'
+ # fulladdress_column = "Full Address"
+ # address1_column = None
+ # address1_method = "first_word"
+ # address_cols_to_concat = []
+ # missing_postcodes_method = None
+ # landlord_year_built = "Build Date"
+ # landlord_os_uprn = None
+ # landlord_property_type = "Property Type"
+ # landlord_wall_construction = "Wallinsul"
+ # landlord_heating_system = "HeatSorc"
+ # landlord_existing_pv = None
+ # landlord_property_id = "Property Reference"
+ # outcomes_filename = []
+ # outcomes_sheetname = []
+ # outcomes_postcode = []
+ # outcomes_houseno = []
+ # outcomes_id = []
+ # outcomes_address = []
+ # master_filepaths = []
+ # master_to_asset_list_filepath = None
+ # asset_list_header = 0
+ # landlord_built_form = None
+ # landlord_roof_construction = None
+ # landlord_sap = None
+ # landlord_block_reference = None
+ # phase = False
+ # ecosurv_landlords = None
+ # master_id_colnames = []
+
+ # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing/Hubspot"
+ # data_filename = "EalingFlats.xlsx"
+ # sheet_name = "Sheet1"
+ # postcode_column = 'Postcode'
+ # fulladdress_column = "Address"
+ # address1_column = None
+ # address1_method = "house_number_extraction"
+ # address_cols_to_concat = []
+ # missing_postcodes_method = None
+ # landlord_year_built = None
+ # landlord_os_uprn = None
+ # landlord_property_type = None # Using the inspections property type
+ # landlord_built_form = None
+ # landlord_wall_construction = None
+ # landlord_roof_construction = None
+ # landlord_heating_system = None
+ # landlord_existing_pv = None
+ # landlord_property_id = "Property ref"
+ # landlord_sap = None
+ # outcomes_filename = []
+ # outcomes_sheetname = []
+ # outcomes_postcode = []
+ # outcomes_houseno = []
+ # outcomes_id = []
+ # outcomes_address = []
+ # master_filepaths = []
+ # master_to_asset_list_filepath = None
+ # phase = False
+ # ecosurv_landlords = None
+ # asset_list_header = 0
+ # landlord_block_reference = "Block Ref"
+ # master_id_colnames = []
+
+ # Southern - Jan list
+ # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Southern/New Programme/Jan 2025 List"
+ # data_filename = "SOUTHERN ASSETS January 2025 Additions Query 21.03.2025.xlsx"
+ # sheet_name = "Jan 2025 additions"
+ # postcode_column = 'Post Code'
+ # fulladdress_column = None
+ # address1_column = "NO."
+ # address1_method = None
+ # address_cols_to_concat = ["NO.", "Street / Block Name", "Town/Area"]
+ # missing_postcodes_method = None
+ # landlord_year_built = None
+ # landlord_os_uprn = None
+ # landlord_property_type = None # Using the inspections property type
+ # landlord_built_form = None
+ # landlord_wall_construction = None
+ # landlord_roof_construction = None
+ # landlord_heating_system = None
+ # landlord_existing_pv = None
+ # landlord_property_id = "SH Property Reference"
+ # landlord_sap = None
+ # outcomes_filename = [
+ # os.path.join(data_folder, "RT - Southern Housing Group - JJC.xlsx"),
+ # os.path.join(data_folder, "RT - SOUTHERN OUTCOMES - SCIS Merged.xlsx"),
+ # ]
+ # outcomes_sheetname = ["Feedback", "Collated"]
+ # outcomes_postcode = ["Poscode", "Postcode"]
+ # outcomes_houseno = ["No.", "No"]
+ # outcomes_id = ["UPRNs", None]
+ # outcomes_address = ["Address", "Address"]
+ # master_filepaths = [
+ # os.path.join(data_folder, "southern_submissions/CAVITY'S - DECEMBER 2018-Table 1.csv"),
+ # os.path.join(data_folder, "southern_submissions/CAVITY'S 2019-Table 1.csv"),
+ # os.path.join(data_folder, "southern_submissions/CAVITY'S ECO4-Table 1.csv"),
+ # os.path.join(data_folder, "southern_submissions/LOFT'S-Table 1.csv"),
+ # ]
+ # master_to_asset_list_filepath = None
+ # phase = False
+ # ecosurv_landlords = "southern"
+ # asset_list_header = 0
+ # landlord_block_reference = None
+ # master_id_colnames = [None, None, None, None]
+
+ # NCHA
+ # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/NCHA"
+ # data_filename = "Energy Information MASTER June 2025.xlsx"
+ # sheet_name = "Data"
+ # postcode_column = 'Postcode'
+ # fulladdress_column = "Address"
+ # address1_column = None
+ # address1_method = "house_number_extraction"
+ # address_cols_to_concat = []
+ # missing_postcodes_method = None
+ # landlord_year_built = "Build Date (HAR10)"
+ # landlord_os_uprn = None
+ # landlord_property_type = "Property Type (HAR10)"
+ # landlord_built_form = "Build Form (EPC)"
+ # landlord_wall_construction = "Wall Description"
+ # landlord_roof_construction = None
+ # landlord_heating_system = "HEAT Code"
+ # landlord_existing_pv = None
+ # landlord_property_id = "Place ref"
+ # landlord_sap = "EPC SAP"
+ # outcomes_filename = None
+ # outcomes_sheetname = None
+ # outcomes_postcode = None
+ # outcomes_houseno = None
+ # outcomes_id = None
+ # outcomes_address = None
+ # master_filepaths = []
+ # master_to_asset_list_filepath = None
+ # phase = False
+ # ecosurv_landlords = None
+ # asset_list_header = 0
+ # landlord_block_reference = None
+ # master_id_colnames = []
# data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Calico"
# data_filename = "07.04 CALICO - Final List.xlsx"
@@ -593,6 +923,7 @@ def app():
# We now flag the status of the property
asset_list.label_property_status()
asset_list.analyse_geographies()
+ asset_list.get_work_figures()
# Store as an excel
filename = os.path.join(data_folder, ".".join(data_filename.split(".")[:-1])) + " - Standardised.xlsx"
diff --git a/asset_list/hubspot/config.py b/asset_list/hubspot/config.py
index 5110fb5f..23ff900a 100644
--- a/asset_list/hubspot/config.py
+++ b/asset_list/hubspot/config.py
@@ -17,7 +17,7 @@ class HubspotProcessStatus(IntEnum):
# The property didn't get access and needs sign off
SURVEYED_NO_ACCESS_NEEDS_SIGN_OFF = 2, "SURVEYED - NO ACCESS - NEED SIGN OFF"
# The survey has been completed. We don't have any update as to whether the property has been installed
- SURVEYED_COMPLETED_SIGNED_OFF = 3, "SURVEYED - COMPLETED - SIGNED OFF"
+ SURVEYED_COMPLETED_SIGNED_OFF = 3, "SURVEYED - AUTOMATED SIGNED OFF"
# The property turned out to be ineligibile
NOT_VIABLE = 4, "NOT VIABLE"
# The property is with the installer. This will likely be the default for historic programmes
@@ -79,7 +79,7 @@ CRM_UPLOAD_COLUMNS = [
'Last EPC: Room Height ',
'Last EPC: Age Band ', 'Deal Stage ',
'Pipeline ', 'Expected Commencement Date ',
- 'Deal Name ', 'Project Code ',
+ 'Deal Name ', 'Project Code ', 'Postcode ',
'Product ID ', 'Name ', 'Unit price ',
'Quantity ', 'Deal Owner', 'Amount ', 'Installer '
]
diff --git a/asset_list/hubspot/prepare_for_hubspot.py b/asset_list/hubspot/prepare_for_hubspot.py
index eed6d7e7..b12f4c04 100644
--- a/asset_list/hubspot/prepare_for_hubspot.py
+++ b/asset_list/hubspot/prepare_for_hubspot.py
@@ -2,6 +2,32 @@ import os
import pandas as pd
from asset_list.AssetList import AssetList
+import re
+
+
+def normalize_uk_phone(number: str | float | int) -> str | None:
+ if pd.isna(number):
+ return None
+
+ number = str(number)
+ number = re.sub(r"[^\d+]", "", number)
+
+ # Handle common short inputs: add '0' if likely missing
+ if re.match(r"^7\d{8,9}$", number) or re.match(r"^1\d{8,9}$", number):
+ number = "0" + number
+
+ # Convert to international format
+ if number.startswith("0"):
+ number = "+44" + number[1:]
+ elif number.startswith("0044"):
+ number = "+" + number[2:]
+
+ # Must be +44 followed by 10 digits (some area codes may vary)
+ if re.match(r"^\+44\d{9,10}$", number):
+ return number
+
+ return None
+
def app():
"""
@@ -18,32 +44,28 @@ def app():
"""
# inputs:
- reconcile_programme = False # If True, the hubspot upload will include all properties with a project code
- customer_domain = "https://sandwell.gov.uk"
- installer_name = "J & J CRUMP"
+ reconcile_programme = True # If True, the hubspot upload will include all properties with a project code
+ customer_domain = "https://ealing.gov.uk"
+ installer_name = "SCIS"
asset_list_filepath = (
- "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Sandwell/Hubspot/Sandwell BC - Full Asset List MAIN - "
- "Standardised.xlsx"
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing/Hubspot/20250707 Ealing Flats - Prepared "
+ "programme.xlsx"
)
- asset_list_sheet_name = "Proposed Program"
- asset_list_header = 1
+ asset_list_sheet_name = "Standardised Asset List"
+ asset_list_header = 0
- contact_details_filepath = (
- "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Sandwell/Hubspot/Sandwell Contact Details.xlsx"
- )
- contacts_sheet_name = "Sheet1"
- contacts_landlord_property_id = "landlord_property_id"
+ contact_details_filepath = None
+ contacts_sheet_name = "Sheet 1"
+ contacts_landlord_property_id = "UPRN"
contacts_phone_number_column = "phone_number"
contacts_secondary_phone_number_column = "secondary_phone_number"
contacts_secondary_contact_full_name = "secondary_contact_full_name"
contacts_email_column = "email"
contacts_fullname_column = "fullname"
- contacts_firstname_column = "firstname"
- contacts_lastname_column = "lastname"
+ contacts_firstname_column = "First Name"
+ contacts_lastname_column = "Last Name"
- existing_programme_filepath = (
- "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Sandwell/Hubspot/property-status.csv"
- )
+ existing_programme_filepath = None
asset_list = AssetList.load_standardised_asset_list(
asset_list_filepath, asset_list_sheet_name, asset_list_header
@@ -68,12 +90,12 @@ def app():
)
# Remove the existing programme
- existing_programme = pd.read_csv(existing_programme_filepath, encoding="utf-8-sig")
- asset_list.hubspot_data = asset_list.hubspot_data[
- ~asset_list.hubspot_data["Domna Property ID "].isin(
- existing_programme['Domna Property ID'].values
- )
- ]
+ # existing_programme = pd.read_csv(existing_programme_filepath, encoding="utf-8-sig")
+ # asset_list.hubspot_data = asset_list.hubspot_data[
+ # ~asset_list.hubspot_data["Domna Property ID "].isin(
+ # existing_programme['Domna Property ID'].values
+ # )
+ # ]
# Get the filepath and the filename. Append hubspot upload to the filename. We also change the file type to csv
directory, filename = os.path.split(asset_list_filepath)
@@ -89,3 +111,66 @@ def app():
# Just store locally
asset_list.hubspot_data.to_csv(output_filepath, index=False, encoding="utf-8-sig")
+
+ # # TODO: Set this up separately, but we associate multiple contacts to the same deal
+ # contact_details = pd.read_csv(
+ # "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot "
+ # "Upload/Hubspot/contact "
+ # "details.csv"
+ # )
+ #
+ # # contacts_phone_number_column = "phone_number"
+ # # contacts_secondary_phone_number_column = "secondary_phone_number"
+ # # contacts_secondary_contact_full_name = "secondary_contact_full_name"
+ # # contacts_email_column = "email"
+ # # contacts_fullname_column = "fullname"
+ # # contacts_firstname_column = "First Name"
+ # # contacts_lastname_column = "Last Name"
+ # contact_details["phone_number"] = contact_details["Mobile Phone"].copy()
+ # # If phone number is NaN, we will use the landline number
+ # contact_details["phone_number"] = contact_details["phone_number"].fillna(contact_details["Landline"])
+ # contact_details["secondary_phone_number"] = contact_details["Landline"].copy()
+ # # If secondary phone number is the same as primary, we remove it
+ # import numpy as np
+ # contact_details["secondary_phone_number"] = np.where(
+ # contact_details["secondary_phone_number"] == contact_details["phone_number"],
+ # np.nan,
+ # contact_details["secondary_phone_number"]
+ # )
+ # contact_details = contact_details[
+ # ['Property Reference Number (Main Address) (Property)', "Email Address", "phone_number",
+ # "secondary_phone_number", "First Name", "Last Name"]].copy().rename(
+ # columns={"Property Reference Number (Main Address) (Property)": "landlord_proprty_id"}
+ # )
+ # contact_details["fullname"] = contact_details["First Name"] + " " + contact_details["Last Name"]
+ # # Format the phone numbers
+ #
+ # contact_details["phone_number"] = contact_details["phone_number"].astype(int).astype(str).apply(
+ # normalize_uk_phone)
+ # contact_details["secondary_phone_number"] = contact_details["secondary_phone_number"].astype("Int64").astype(
+ # str).apply(
+ # normalize_uk_phone)
+ #
+ # # Add in the Hubspot deal data
+ # hubspot_data = pd.read_csv(
+ # "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot Upload/Hubspot/"
+ # "property-status.csv",
+ # encoding="utf-8-sig"
+ # )
+ # # Merge on contact details
+ # contact_details = hubspot_data[["Landlord Property ID", "Deal ID"]].merge(
+ # contact_details,
+ # how="left",
+ # right_on="landlord_proprty_id",
+ # left_on="Landlord Property ID"
+ # )
+ #
+ # contact_details = contact_details.drop(columns=["landlord_proprty_id"])
+ #
+ # # Store as csv
+ # contact_details.to_csv(
+ # "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar "
+ # "Programme Hubspot Upload/Hubspot/"
+ # "contact_details.csv",
+ # index=False, encoding="utf-8-sig"
+ # )
diff --git a/asset_list/mappings/built_form.py b/asset_list/mappings/built_form.py
index 45e45c54..c17e0ed4 100644
--- a/asset_list/mappings/built_form.py
+++ b/asset_list/mappings/built_form.py
@@ -3,7 +3,7 @@ import numpy as np
STANDARD_BUILT_FORMS = {
"unknown",
# Houses
- "end-terrace", "semi-detached", "detached", "mid-terrace",
+ "end-terrace", "semi-detached", "detached", "mid-terrace", "enclosed mid-terrace", "enclosed end-terrace",
# Flats
"ground floor", "mid-floor", "top-floor", "basement", "low rise", "high rise",
}
@@ -358,6 +358,19 @@ BUILT_FORM_MAPPINGS = {
'1983- 90 SEMI DET': 'semi-detached',
'1983-90 MID TERR': 'mid-terrace',
'1976-82 SEMI DET': 'semi-detached',
- 'PRE 1900 MID TERR': 'mid-terrace'
+ 'PRE 1900 MID TERR': 'mid-terrace',
+ None: 'unknown',
+ 'SEMI-DETACHED': 'semi-detached',
+ 'DETACHED': 'detached',
+ 'MID TERRACE': 'mid-terrace',
+ 'END TERRACE': 'end-terrace',
+ 'ENCLOSED MID': 'enclosed mid-terrace',
+
+ 'BUILDING': 'unknown',
+ 'FLAT COMMUNAL FACILITIES': 'unknown',
+ 'MAISONETTE': 'unknown',
+ 'HOUSE': 'unknown',
+ 'FLAT': 'unknown',
+ 'BLOCK': 'unknown'
}
diff --git a/asset_list/mappings/heating_systems.py b/asset_list/mappings/heating_systems.py
index 1a46c429..010d49a5 100644
--- a/asset_list/mappings/heating_systems.py
+++ b/asset_list/mappings/heating_systems.py
@@ -28,6 +28,7 @@ STANDARD_HEATING_SYSTEMS = {
"electric underfloor",
"no heating",
"non-electric underfloor",
+ "warm air heating",
}
HEATING_MAPPINGS = {
@@ -326,5 +327,42 @@ HEATING_MAPPINGS = {
'ECO TEC PRO 24 (OLD TYPE)': 'gas combi boiler',
'GREENSTAR 30SI COMPACT': 'gas combi boiler',
'BAXI DUO TEC 28 COMBI ErP': 'gas combi boiler',
- 'Not applicable for this asset type': 'unknown'
+ 'Not applicable for this asset type': 'unknown',
+ 'Boiler: F rated Regular Boiler': 'gas condensing boiler',
+ 'Warm Air Systems: Electric warm air: Electricaire system': 'warm air heating',
+ 'Boiler: B rated Combi': 'gas condensing combi',
+ 'Boiler: G rated Regular Boiler': 'gas condensing boiler',
+ 'Electric Storage Systems: Modern (slimline) storage heaters': 'electric storage heaters',
+ 'Boiler: C rated CPSU': 'gas condensing combi',
+ 'Boiler: D rated Regular Boiler': 'gas condensing boiler',
+ 'Warm Air Systems: Gas fired warm air with balanced or open flue: Ducted or stub-ducted, on-off control, '
+ 'pre 1998': 'warm air heating',
+ 'Electric Storage Systems: Integrated storage+direct-acting heater': 'electric storage heaters',
+ 'Boiler: D rated Combi': 'gas condensing combi',
+ 'Heat Pump: (from database)': 'air source heat pump',
+ 'Community Heating Systems: Community CHP and boilers (RdSAP)': 'communal heating',
+ '': 'unknown',
+
+ 'Solid Fuel Boiler': 'solid fuel',
+ 'Heating (Other)': 'other',
+ 'Solid Fuel Fire Only': 'solid fuel',
+ 'No Main Heat Source': 'no heating',
+ 'Electric Programmable': 'electric storage heaters',
+ 'Linked to Communal Boiler': 'communal heating',
+ 'Bio Mass Boiler': 'solid fuel',
+ 'Electric Non Programmable': 'electric storage heaters',
+
+ 'Room heaters, Mains gas': 'room heaters',
+ 'Boiler, Solid fuel': 'solid fuel',
+ 'Room heaters, Electricity': 'room heaters',
+ 'Room heaters, Solid fuel': 'room heaters',
+ 'Boiler, Oil': 'oil boiler',
+ 'Boiler, Biomass': 'boiler - other fuel',
+ 'Community heating, Community (non-gas)': 'communal heating',
+ 'Heat pump (wet), Electricity': 'air source heat pump',
+ 'Community heating, Community (mains gas)': 'communal gas boiler',
+ 'Boiler, Electricity': 'electric boiler',
+ 'Boiler, LPG': 'gas boiler, radiators',
+ 'Boiler, Mains gas': 'gas boiler, radiators',
+ 'Storage heating, Electricity': 'electric storage heaters'
}
diff --git a/asset_list/mappings/property_type.py b/asset_list/mappings/property_type.py
index bdb6580e..caca0cf0 100644
--- a/asset_list/mappings/property_type.py
+++ b/asset_list/mappings/property_type.py
@@ -256,7 +256,6 @@ PROPERTY_MAPPING = {
'HOUSE (3 STOREY)': 'house',
'FLAT GROUND FLOOR': 'flat',
'FLAT TOP FLOOR': 'flat',
-
'SHARED HOUSE': 'house',
'MAISONETTE': 'maisonette',
'DIRECT ACCESS HOSTEL': 'other',
@@ -266,5 +265,11 @@ PROPERTY_MAPPING = {
'SHOP': 'other',
'Office Block': 'other',
'BLOCK (Non-Communal)': 'block of flats',
- 'Refuge': 'other'
+ 'Refuge': 'other',
+ None: 'unknown',
+ 'HFOP FLAT': 'flat',
+ 'HFOP BEDSIT': 'bedsit',
+ 'LINKED FLAT': 'flat',
+ 'LINKED BUNGALOW': 'bungalow'
+
}
diff --git a/asset_list/mappings/roof.py b/asset_list/mappings/roof.py
index 13359ded..66860bec 100644
--- a/asset_list/mappings/roof.py
+++ b/asset_list/mappings/roof.py
@@ -9,6 +9,7 @@ STANDARD_ROOF_CONSTRUCTIONS = {
"pitched less than 100mm insulation",
"another dwelling above",
"flat unknown insulation",
+ "flat insulated",
"unknown insulated",
"unknown",
}
@@ -51,5 +52,127 @@ ROOF_CONSTRUCTION_MAPPINGS = {
'100MM': 'pitched less than 100mm insulation',
'U/K': 'unknown',
'U/K - 250MM RIR FLAT CEILING': 'flat unknown insulation',
- 'U/K - 200MM RIR FLAT CEILING': 'flat unknown insulation'
+ 'U/K - 200MM RIR FLAT CEILING': 'flat unknown insulation',
+
+ 'AnotherDwellingAbove: Unknown, PitchedNormalLoftAccess: 100mm': 'another dwelling above',
+ 'PitchedNormalNoLoftAccess: 150mm': 'pitched insulated',
+ 'PitchedNormalLoftAccess: As Built, PitchedNormalNoLoftAccess: None': 'pitched insulated',
+ 'PitchedNormalLoftAccess: 150mm, PitchedNormalNoLoftAccess: Unknown': 'pitched insulated',
+ 'Flat: As Built, PitchedNormalLoftAccess: 200mm': 'flat unknown insulation',
+ 'PitchedNormalLoftAccess: 200mm': 'pitched insulated',
+ 'PitchedNormalNoLoftAccess: 50mm': 'unknown',
+ 'PitchedNormalNoLoftAccess: No Insulation': 'pitched less than 100mm insulation',
+ 'PitchedNormalLoftAccess: 50mm, PitchedNormalNoLoftAccess: None': 'pitched less than 100mm insulation',
+ 'PitchedNormalLoftAccess: 50mm, PitchedNormalLoftAccess: No Insulation': 'pitched less than 100mm insulation',
+ 'PitchedNormalLoftAccess: 150mm': 'unknown', 'Flat: None': 'pitched insulated',
+ 'Flat: As Built, PitchedNormalLoftAccess: 150mm, PitchedNormalNoLoftAccess: None': 'flat unknown insulation',
+ 'PitchedNormalNoLoftAccess: 250mm, PitchedNormalNoLoftAccess: Unknown': 'pitched insulated',
+ 'PitchedNormalLoftAccess: 75mm': 'pitched less than 100mm insulation',
+ 'Flat: Unknown, PitchedNormalLoftAccess: 200mm, SameDwellingAbove: Unknown': 'flat unknown insulation',
+ 'Flat: As Built, PitchedNormalLoftAccess: 100mm, PitchedNormalNoLoftAccess: None': 'flat unknown insulation',
+ 'PitchedNormalNoLoftAccess: 250mm': 'pitched insulated',
+ 'PitchedNormalLoftAccess: 150mm, PitchedNormalNoLoftAccess: 100mm': 'pitched insulated',
+ 'AnotherDwellingAbove: Unknown, PitchedNormalLoftAccess: 300mm': 'another dwelling above',
+ 'PitchedNormalLoftAccess: 200mm, PitchedNormalNoLoftAccess: 50mm': 'pitched insulated',
+ 'Flat: As Built, PitchedNormalNoLoftAccess: 100mm': 'flat unknown insulation',
+ 'PitchedNormalLoftAccess: 250mm, PitchedNormalNoLoftAccess: Unknown': 'pitched insulated',
+ 'PitchedNormalLoftAccess: 100mm, PitchedNormalLoftAccess: 150mm': 'pitched less than 100mm insulation',
+ 'PitchedNormalLoftAccess: 100mm, PitchedNormalLoftAccess: 200mm': 'pitched less than 100mm insulation',
+ 'PitchedNormalNoLoftAccess: 75mm': 'pitched less than 100mm insulation',
+ 'Flat: As Built, PitchedNormalLoftAccess: 25mm': 'flat unknown insulation',
+ 'PitchedNormalLoftAccess: 150mm, SameDwellingAbove': 'pitched insulated',
+ 'PitchedNormalLoftAccess: 150mm, PitchedNormalLoftAccess: 50mm': 'pitched insulated',
+ 'Flat: As Built, PitchedNormalLoftAccess: 100mm': 'flat unknown insulation',
+ 'Flat: As Built, PitchedNormalNoLoftAccess: None': 'flat unknown insulation',
+ 'PitchedNormalLoftAccess: 150mm, PitchedNormalLoftAccess: 200mm': 'pitched insulated',
+ 'PitchedNormalNoLoftAccess: 300mm': 'pitched insulated',
+ 'Flat: As Built, PitchedNormalNoLoftAccess: 150mm': 'flat unknown insulation',
+ 'PitchedNormalLoftAccess: 150mm, PitchedNormalNoLoftAccess: None': 'pitched insulated',
+ 'PitchedNormalNoLoftAccess: 200mm': 'pitched insulated',
+ 'PitchedNormalLoftAccess: 300mm, PitchedNormalNoLoftAccess: Unknown': 'pitched insulated',
+ 'PitchedNormalLoftAccess: None': 'pitched less than 100mm insulation',
+ 'Flat: As Built': 'flat unknown insulation',
+ 'PitchedNormalLoftAccess: 100mm, PitchedNormalLoftAccess: 250mm': 'pitched less than 100mm insulation',
+ 'AnotherDwellingAbove: Unknown, PitchedNormalLoftAccess: 50mm': 'another dwelling above',
+ 'PitchedNormalLoftAccess: 200mm, PitchedNormalLoftAccess: 250mm, PitchedNormalNoLoftAccess: None': 'pitched '
+ 'insulated',
+ 'PitchedNormalLoftAccess: 200mm, PitchedNormalLoftAccess: 250mm': 'pitched insulated',
+ 'Flat: 50mm': 'flat unknown insulation',
+ 'AnotherDwellingAbove: Unknown, PitchedNormalNoLoftAccess: None': 'another dwelling above',
+ 'PitchedNormalNoLoftAccess: None': 'pitched uninsulated',
+ 'AnotherDwellingAbove: Unknown, PitchedNormalLoftAccess: 25mm': 'another dwelling above',
+ 'AnotherDwellingAbove: Unknown, Flat: As Built, PitchedNormalNoLoftAccess: Unknown': 'another dwelling above',
+ 'Flat: As Built, PitchedNormalLoftAccess: 200mm, PitchedNormalNoLoftAccess: Unknown': 'flat unknown insulation',
+ 'Flat: Unknown, PitchedNormalLoftAccess: 75mm, PitchedNormalLoftAccess: Unknown': 'flat unknown insulation',
+ 'PitchedNormalLoftAccess: 100mm, PitchedNormalLoftAccess: Unknown': 'pitched less than 100mm insulation',
+ 'PitchedNormalLoftAccess: 200mm, PitchedNormalLoftAccess: 300mm': 'pitched insulated',
+ 'PitchedNormalLoftAccess: 100mm, PitchedNormalNoLoftAccess: 100mm': 'pitched less than 100mm insulation',
+ 'PitchedNormalLoftAccess: 100mm, PitchedNormalLoftAccess: No Insulation': 'pitched less than 100mm insulation',
+ 'AnotherDwellingAbove: Unknown, Flat: As Built, PitchedNormalLoftAccess: 150mm': 'another dwelling above',
+ 'PitchedNormalLoftAccess: 75mm, PitchedNormalNoLoftAccess: Unknown': 'pitched less than 100mm insulation',
+ 'Flat: As Built, PitchedNormalLoftAccess: 300mm': 'unknown', 'Flat: 100mm': 'flat unknown insulation',
+ 'PitchedNormalNoLoftAccess: 150mm, PitchedNormalNoLoftAccess: Unknown': 'pitched insulated',
+ 'PitchedNormalNoLoftAccess: 100mm': 'pitched less than 100mm insulation',
+ 'PitchedNormalLoftAccess: 12mm': 'pitched less than 100mm insulation',
+ 'AnotherDwellingAbove: Unknown, PitchedNormalNoLoftAccess: 150mm': 'another dwelling above',
+ 'PitchedNormalLoftAccess: No Insulation': 'pitched less than 100mm insulation',
+
+ 'PitchedNormalLoftAccess: 25mm, PitchedNormalNoLoftAccess: Unknown': 'pitched less than 100mm insulation',
+
+ 'PitchedNormalLoftAccess: 250mm, PitchedNormalNoLoftAccess: None, PitchedNormalNoLoftAccess: Unknown': 'pitched '
+ 'insulated',
+ 'PitchedNormalNoLoftAccess: 100mm, PitchedNormalNoLoftAccess: Unknown': 'pitched less than 100mm insulation',
+ 'Flat: As Built, PitchedNormalNoLoftAccess: 100mm, PitchedNormalNoLoftAccess: Unknown': 'flat unknown insulation',
+
+ 'PitchedNormalNoLoftAccess: Unknown, SameDwellingAbove: Unknown': 'pitched no access to loft',
+ 'PitchedNormalLoftAccess: 200mm, PitchedNormalNoLoftAccess: None': 'pitched insulated',
+ 'PitchedNormalLoftAccess: 100mm, PitchedNormalNoLoftAccess: As Built': 'pitched less than 100mm insulation',
+ 'PitchedNormalNoLoftAccess: 50mm, PitchedNormalNoLoftAccess: Unknown': 'pitched less than 100mm insulation',
+ 'Flat: As Built, Flat: Unknown, PitchedNormalLoftAccess: 250mm': 'flat unknown insulation',
+ 'PitchedNormalLoftAccess: 50mm': 'pitched less than 100mm insulation',
+ 'PitchedNormalLoftAccess: 250mm, PitchedNormalNoLoftAccess: None': 'pitched insulated',
+ 'Flat: 100mm, Flat: As Built': 'flat unknown insulation',
+ 'PitchedNormalLoftAccess: 100mm, PitchedNormalNoLoftAccess: Unknown': 'pitched less than 100mm insulation',
+ 'AnotherDwellingAbove: Unknown, PitchedNormalLoftAccess: No Insulation': 'another dwelling above',
+ 'PitchedNormalLoftAccess: 100mm, PitchedNormalNoLoftAccess: None': 'pitched less than 100mm insulation',
+ 'PitchedNormalLoftAccess: 300mm': 'pitched insulated',
+ 'PitchedNormalLoftAccess: 100mm': 'pitched less than 100mm insulation',
+ 'PitchedNormalLoftAccess: 270mm': 'pitched insulated',
+ 'PitchedNormalNoLoftAccess: 200mm, PitchedNormalNoLoftAccess: Unknown': 'pitched insulated',
+ 'PitchedNormalLoftAccess: 200mm, PitchedNormalNoLoftAccess: Unknown': 'pitched insulated',
+ 'Flat: As Built, PitchedNormalLoftAccess: 250mm': 'flat unknown insulation',
+ 'PitchedNormalLoftAccess: 50mm, PitchedNormalLoftAccess: 75mm': 'pitched less than 100mm insulation',
+ 'PitchedNormalLoftAccess: 300mm, PitchedNormalLoftAccess: Unknown': 'pitched insulated',
+ 'Flat: As Built, PitchedNormalNoLoftAccess: 250mm': 'flat unknown insulation',
+ 'Flat: As Built, PitchedNormalNoLoftAccess: 50mm': 'flat unknown insulation',
+ 'Flat: As Built, PitchedNormalLoftAccess: 75mm': 'flat unknown insulation',
+ 'PitchedNormalLoftAccess: 250mm, PitchedNormalNoLoftAccess: No Insulation': 'pitched insulated',
+ 'AnotherDwellingAbove: Unknown, PitchedNormalLoftAccess: 150mm': 'another dwelling above',
+ 'PitchedNormalLoftAccess: 75mm, PitchedNormalNoLoftAccess: No Insulation': 'pitched less than 100mm insulation',
+ 'AnotherDwellingAbove: Unknown, PitchedNormalNoLoftAccess: Unknown': 'another dwelling above',
+ 'Flat: As Built, PitchedNormalNoLoftAccess: 200mm': 'flat unknown insulation',
+ 'Flat: As Built, Flat: Unknown, PitchedNormalLoftAccess: 150mm': 'flat unknown insulation',
+ 'Flat: As Built, PitchedNormalLoftAccess: 150mm': 'flat unknown insulation',
+ 'PitchedNormalLoftAccess: 300mm, PitchedNormalNoLoftAccess: 100mm': 'pitched insulated',
+ 'PitchedNormalLoftAccess: 100mm, PitchedNormalLoftAccess: 75mm': 'pitched less than 100mm insulation',
+ 'AnotherDwellingAbove: Unknown, PitchedNormalNoLoftAccess: 50mm, PitchedNormalNoLoftAccess: No Insulation':
+ 'another dwelling above',
+ 'Flat: As Built, PitchedNormalLoftAccess: 50mm': 'flat unknown insulation',
+ 'PitchedNormalLoftAccess: 25mm': 'pitched less than 100mm insulation',
+ 'PitchedNormalLoftAccess: 50mm, PitchedNormalNoLoftAccess: Unknown': 'pitched less than 100mm insulation',
+ 'PitchedNormalNoLoftAccess: 150mm, PitchedNormalNoLoftAccess: Unknown, PitchedThatched: 25mm': 'pitched insulated',
+ 'Flat: 150mm+': 'flat insulated',
+ 'Flat: Unknown, PitchedNormalLoftAccess: 100mm, PitchedNormalNoLoftAccess: Unknown': 'flat unknown insulation',
+ 'PitchedNormalLoftAccess: 150mm, PitchedNormalLoftAccess: Unknown': 'pitched insulated',
+ 'PitchedNormalLoftAccess: 150mm, PitchedNormalLoftAccess: 250mm': 'pitched insulated',
+ 'Flat: As Built, PitchedNormalLoftAccess: 100mm, PitchedNormalNoLoftAccess: Unknown': 'flat unknown insulation',
+ 'PitchedNormalLoftAccess: 250mm': 'pitched insulated',
+ 'PitchedNormalLoftAccess: 250mm, PitchedNormalLoftAccess: 75mm': 'pitched insulated',
+ 'PitchedNormalLoftAccess: 250mm, PitchedNormalLoftAccess: 50mm': 'pitched insulated',
+ 'AnotherDwellingAbove: Unknown, PitchedNormalLoftAccess: 200mm': 'another dwelling above',
+
+ 'PitchedNormalNoLoftAccess: Unknown': 'pitched no access to loft',
+ 'PitchedNormalLoftAccess: Unknown': 'pitched unknown insulation',
+ 'AnotherDwellingAbove: Unknown': 'another dwelling above'
+
}
diff --git a/asset_list/mappings/walls.py b/asset_list/mappings/walls.py
index 2e0a332f..245b7f88 100644
--- a/asset_list/mappings/walls.py
+++ b/asset_list/mappings/walls.py
@@ -7,122 +7,163 @@ STANDARD_WALL_CONSTRUCTIONS = {
"uninsulated solid brick", "insulated solid brick", "solid brick unknown insulation",
# Timber Frame
"timber frame unknown insulation", "insulated timber frame", "uninsulated timber frame",
- "system built", "granite or whinstone", "other",
- "unknown", "sandstone or limestone",
+ # System
+ "system built unknown insulation", "insulated system built", "uninsulated system built",
+ # Granite or Whinstone
+ "granite or whinstone unknown insulation", "insulated granite or whinstone", "uninsulated granite or whinstone",
+ # Sandstone or Limestone
+ "sandstone or limestone unknown insulation", "insulated sandstone or limestone",
+ "uninsulated sandstone or limestone",
+ # Other
+ "other",
"cob",
"new build - average thermal transmittance",
}
WALL_CONSTRUCTION_MAPPINGS = {
"New Build - Average Thermal Transmittance": "new build - average thermal transmittance",
- 'Average thermal transmittance 0.25 W/m?K': 'unknown',
+ 'Average thermal transmittance 0.25 W/m?K': 'new build - average thermal transmittance',
'Cavity wall, as built, insulated (assumed)': 'filled cavity',
'Average thermal transmittance 0.31 W/m?K': 'unknown',
'Cavity wall, as built, no insulation (assumed)': 'uninsulated cavity',
- 'Average thermal transmittance 0.30 W/m?K': 'unknown', 'Average thermal transmittance 0.28 W/m-¦K': 'unknown',
- 'Average thermal transmittance 0.25 W/m-¦K': 'unknown', 'Average thermal transmittance 0.21 W/m-¦K': 'unknown',
- 'Average thermal transmittance 0.20 W/m-¦K': 'unknown', 'Average thermal transmittance 0.29 W/m?K': 'unknown',
- 'Average thermal transmittance 0.16 W/m?K': 'unknown',
- 'Average thermal transmittance 0.27 W/m²K': 'unknown',
- 'Average thermal transmittance 0.15 W/m-¦K': 'unknown', 'Average thermal transmittance 0.23 W/m-¦K': 'unknown',
- 'Average thermal transmittance 0.18 W/m?K': 'unknown',
- 'Granite or whin, with internal insulation': 'granite or whinstone',
- "Granite or whinstone, as built, insulated (assumed)": "granite or whinstone",
- 'Average thermal transmittance 0.22 W/m-¦K': 'unknown', 'Average thermal transmittance 0.24 W/m?K': 'unknown',
- 'Average thermal transmittance 0.16 W/m-¦K': 'unknown', 'Average thermal transmittance 0.35 W/m?K': 'unknown',
- 'Average thermal transmittance 0.26 W/m-¦K': 'unknown', 'Average thermal transmittance 0.62 W/m?K': 'unknown',
- 'Average thermal transmittance 0.64 W/m?K': 'unknown', 'Average thermal transmittance 0.61 W/m?K': 'unknown',
- 'Sandstone or limestone, as built, no insulation (assumed)': 'sandstone or limestone',
- 'Average thermal transmittance 0.33 W/m?K': 'unknown',
+ 'Average thermal transmittance 0.30 W/m?K': 'new build - average thermal transmittance',
+ 'Average thermal transmittance 0.28 W/m-¦K': 'new build - average thermal transmittance',
+ 'Average thermal transmittance 0.25 W/m-¦K': 'new build - average thermal transmittance',
+ 'Average thermal transmittance 0.21 W/m-¦K': 'new build - average thermal transmittance',
+ 'Average thermal transmittance 0.20 W/m-¦K': 'new build - average thermal transmittance',
+ 'Average thermal transmittance 0.29 W/m?K': 'new build - average thermal transmittance',
+ 'Average thermal transmittance 0.16 W/m?K': 'new build - average thermal transmittance',
+ 'Average thermal transmittance 0.27 W/m²K': 'new build - average thermal transmittance',
+ 'Average thermal transmittance 0.15 W/m-¦K': 'new build - average thermal transmittance',
+ 'Average thermal transmittance 0.23 W/m-¦K': 'new build - average thermal transmittance',
+ 'Average thermal transmittance 0.18 W/m?K': 'new build - average thermal transmittance',
+ 'Granite or whin, with internal insulation': 'insulated granite or whinstone',
+ "Granite or whinstone, as built, insulated (assumed)": "uninsulated granite or whinstone",
+ 'Average thermal transmittance 0.22 W/m-¦K': 'new build - average thermal transmittance',
+ 'Average thermal transmittance 0.24 W/m?K': 'new build - average thermal transmittance',
+ 'Average thermal transmittance 0.16 W/m-¦K': 'new build - average thermal transmittance',
+ 'Average thermal transmittance 0.35 W/m?K': 'new build - average thermal transmittance',
+ 'Average thermal transmittance 0.26 W/m-¦K': 'new build - average thermal transmittance',
+ 'Average thermal transmittance 0.62 W/m?K': 'new build - average thermal transmittance',
+ 'Average thermal transmittance 0.64 W/m?K': 'new build - average thermal transmittance',
+ 'Average thermal transmittance 0.61 W/m?K': 'new build - average thermal transmittance',
+ 'Sandstone or limestone, as built, no insulation (assumed)': 'uninsulated sandstone or limestone',
+ 'Average thermal transmittance 0.33 W/m?K': 'new build - average thermal transmittance',
'Cavity wall,': "cavity unknown insulation",
'Cavity wall, as built, partial insulation (assumed)': 'partial insulated cavity',
- 'Average thermal transmittance 0.29 W/m-¦K': 'unknown', 'Average thermal transmittance 0.32 W/m-¦K': 'unknown',
- 'Average thermal transmittance 0.19 W/m-¦K': 'unknown', 'Average thermal transmittance 0.27 W/m?K': 'unknown',
- 'Average thermal transmittance 0.22 W/m?K': 'unknown', 'Average thermal transmittance 0.38 W/m?K': 'unknown',
- 'Average thermal transmittance 0.26 W/m?K': 'unknown', 'Average thermal transmittance 0.27 W/m-¦K': 'unknown',
- 'Average thermal transmittance 0.18 W/m-¦K': 'unknown', 'Average thermal transmittance = 0.27 W/m?K': 'unknown',
- 'Cavity wall, with external insulation': 'filled cavity', 'Average thermal transmittance 0.21 W/m?K': 'unknown',
- 'Average thermal transmittance 0.23 W/m?K': 'unknown', 'Average thermal transmittance 0.20 W/m?K': 'unknown',
- 'Average thermal transmittance 0.32 W/m?K': 'unknown', 'Average thermal transmittance 0.24 W/m-¦K': 'unknown',
+ 'Average thermal transmittance 0.29 W/m-¦K': 'new build - average thermal transmittance',
+ 'Average thermal transmittance 0.32 W/m-¦K': 'new build - average thermal transmittance',
+ 'Average thermal transmittance 0.19 W/m-¦K': 'new build - average thermal transmittance',
+ 'Average thermal transmittance 0.27 W/m?K': 'new build - average thermal transmittance',
+ 'Average thermal transmittance 0.22 W/m?K': 'new build - average thermal transmittance',
+ 'Average thermal transmittance 0.38 W/m?K': 'new build - average thermal transmittance',
+ 'Average thermal transmittance 0.26 W/m?K': 'new build - average thermal transmittance',
+ 'Average thermal transmittance 0.27 W/m-¦K': 'new build - average thermal transmittance',
+ 'Average thermal transmittance 0.18 W/m-¦K': 'new build - average thermal transmittance',
+ 'Average thermal transmittance = 0.27 W/m?K': 'new build - average thermal transmittance',
+ 'Cavity wall, with external insulation': 'filled cavity',
+ 'Average thermal transmittance 0.21 W/m?K': 'new build - average thermal transmittance',
+ 'Average thermal transmittance 0.23 W/m?K': 'new build - average thermal transmittance',
+ 'Average thermal transmittance 0.20 W/m?K': 'new build - average thermal transmittance',
+ 'Average thermal transmittance 0.32 W/m?K': 'new build - average thermal transmittance',
+ 'Average thermal transmittance 0.24 W/m-¦K': 'new build - average thermal transmittance',
'Cavity wall, with internal insulation': 'filled cavity',
- 'Average thermal transmittance 0.17 W/m-¦K': 'unknown', 'Average thermal transmittance 0.28 W/m?K': 'unknown',
+ 'Average thermal transmittance 0.17 W/m-¦K': 'new build - average thermal transmittance',
+ 'Average thermal transmittance 0.28 W/m?K': 'new build - average thermal transmittance',
'new build - average thermal transmittance': 'new build - average thermal transmittance',
- 'average thermal transmittance 0.25 w/m?k': 'unknown',
+ 'average thermal transmittance 0.25 w/m?k': 'new build - average thermal transmittance',
'cavity wall, as built, insulated (assumed)': 'filled cavity',
- 'average thermal transmittance 0.31 w/m?k': 'unknown',
+ 'average thermal transmittance 0.31 w/m?k': 'new build - average thermal transmittance',
'cavity wall, as built, no insulation (assumed)': 'uninsulated cavity',
- 'average thermal transmittance 0.30 w/m?k': 'unknown', 'average thermal transmittance 0.28 w/m-¦k': 'unknown',
- 'average thermal transmittance 0.25 w/m-¦k': 'unknown', 'average thermal transmittance 0.21 w/m-¦k': 'unknown',
- 'average thermal transmittance 0.20 w/m-¦k': 'unknown', 'average thermal transmittance 0.29 w/m?k': 'unknown',
- 'average thermal transmittance 0.16 w/m?k': 'unknown', 'average thermal transmittance 0.27 w/m²k': 'unknown',
- 'average thermal transmittance 0.15 w/m-¦k': 'unknown', 'average thermal transmittance 0.23 w/m-¦k': 'unknown',
- 'average thermal transmittance 0.18 w/m?k': 'unknown',
- 'granite or whin, with internal insulation': 'granite or whinstone',
- 'average thermal transmittance 0.22 w/m-¦k': 'unknown', 'average thermal transmittance 0.24 w/m?k': 'unknown',
- 'average thermal transmittance 0.16 w/m-¦k': 'unknown', 'average thermal transmittance 0.35 w/m?k': 'unknown',
- 'average thermal transmittance 0.26 w/m-¦k': 'unknown', 'average thermal transmittance 0.62 w/m?k': 'unknown',
- 'average thermal transmittance 0.64 w/m?k': 'unknown', 'average thermal transmittance 0.61 w/m?k': 'unknown',
- 'sandstone or limestone, as built, no insulation (assumed)': 'sandstone or limestone',
- 'average thermal transmittance 0.33 w/m?k': 'unknown', 'cavity wall,': "cavity unknown insulation",
+ 'average thermal transmittance 0.30 w/m?k': 'new build - average thermal transmittance',
+ 'average thermal transmittance 0.28 w/m-¦k': 'new build - average thermal transmittance',
+ 'average thermal transmittance 0.25 w/m-¦k': 'new build - average thermal transmittance',
+ 'average thermal transmittance 0.21 w/m-¦k': 'new build - average thermal transmittance',
+ 'average thermal transmittance 0.20 w/m-¦k': 'new build - average thermal transmittance',
+ 'average thermal transmittance 0.29 w/m?k': 'new build - average thermal transmittance',
+ 'average thermal transmittance 0.16 w/m?k': 'new build - average thermal transmittance',
+ 'average thermal transmittance 0.27 w/m²k': 'new build - average thermal transmittance',
+ 'average thermal transmittance 0.15 w/m-¦k': 'new build - average thermal transmittance',
+ 'average thermal transmittance 0.23 w/m-¦k': 'new build - average thermal transmittance',
+ 'average thermal transmittance 0.18 w/m?k': 'new build - average thermal transmittance',
+ 'granite or whin, with internal insulation': 'insulated granite or whinstone',
+ 'average thermal transmittance 0.22 w/m-¦k': 'new build - average thermal transmittance',
+ 'average thermal transmittance 0.24 w/m?k': 'new build - average thermal transmittance',
+ 'average thermal transmittance 0.16 w/m-¦k': 'new build - average thermal transmittance',
+ 'average thermal transmittance 0.35 w/m?k': 'new build - average thermal transmittance',
+ 'average thermal transmittance 0.26 w/m-¦k': 'new build - average thermal transmittance',
+ 'average thermal transmittance 0.62 w/m?k': 'new build - average thermal transmittance',
+ 'average thermal transmittance 0.64 w/m?k': 'new build - average thermal transmittance',
+ 'average thermal transmittance 0.61 w/m?k': 'new build - average thermal transmittance',
+ 'sandstone or limestone, as built, no insulation (assumed)': 'uninsulated sandstone or limestone',
+ 'average thermal transmittance 0.33 w/m?k': 'new build - average thermal transmittance',
+ 'cavity wall,': "cavity unknown insulation",
'cavity wall, as built, partial insulation (assumed)': 'partial insulated cavity',
- 'average thermal transmittance 0.29 w/m-¦k': 'unknown', 'average thermal transmittance 0.32 w/m-¦k': 'unknown',
- 'average thermal transmittance 0.19 w/m-¦k': 'unknown', 'average thermal transmittance 0.27 w/m?k': 'unknown',
- 'average thermal transmittance 0.22 w/m?k': 'unknown', 'average thermal transmittance 0.38 w/m?k': 'unknown',
- 'average thermal transmittance 0.26 w/m?k': 'unknown', 'average thermal transmittance 0.27 w/m-¦k': 'unknown',
- 'average thermal transmittance 0.18 w/m-¦k': 'unknown', 'average thermal transmittance = 0.27 w/m?k': 'unknown',
- 'cavity wall, with external insulation': 'filled cavity', 'average thermal transmittance 0.21 w/m?k': 'unknown',
- 'average thermal transmittance 0.23 w/m?k': 'unknown', 'average thermal transmittance 0.20 w/m?k': 'unknown',
- 'average thermal transmittance 0.32 w/m?k': 'unknown', 'average thermal transmittance 0.24 w/m-¦k': 'unknown',
- 'cavity wall, with internal insulation': 'filled cavity', 'average thermal transmittance 0.17 w/m-¦k': 'unknown',
- 'average thermal transmittance 0.28 w/m?k': 'unknown',
+ 'average thermal transmittance 0.29 w/m-¦k': 'new build - average thermal transmittance',
+ 'average thermal transmittance 0.32 w/m-¦k': 'new build - average thermal transmittance',
+ 'average thermal transmittance 0.19 w/m-¦k': 'new build - average thermal transmittance',
+ 'average thermal transmittance 0.27 w/m?k': 'new build - average thermal transmittance',
+ 'average thermal transmittance 0.22 w/m?k': 'new build - average thermal transmittance',
+ 'average thermal transmittance 0.38 w/m?k': 'new build - average thermal transmittance',
+ 'average thermal transmittance 0.26 w/m?k': 'new build - average thermal transmittance',
+ 'average thermal transmittance 0.27 w/m-¦k': 'new build - average thermal transmittance',
+ 'average thermal transmittance 0.18 w/m-¦k': 'new build - average thermal transmittance',
+ 'average thermal transmittance = 0.27 w/m?k': 'new build - average thermal transmittance',
+ 'cavity wall, with external insulation': 'filled cavity',
+ 'average thermal transmittance 0.21 w/m?k': 'new build - average thermal transmittance',
+ 'average thermal transmittance 0.23 w/m?k': 'new build - average thermal transmittance',
+ 'average thermal transmittance 0.20 w/m?k': 'new build - average thermal transmittance',
+ 'average thermal transmittance 0.32 w/m?k': 'new build - average thermal transmittance',
+ 'average thermal transmittance 0.24 w/m-¦k': 'new build - average thermal transmittance',
+ 'cavity wall, with internal insulation': 'filled cavity',
+ 'average thermal transmittance 0.17 w/m-¦k': 'new build - average thermal transmittance',
+ 'average thermal transmittance 0.28 w/m?k': 'new build - average thermal transmittance',
'Cavity wall, filled cavity': 'filled cavity',
'Cavity wall, filled cavity and external insulation': 'filled cavity',
- 'Granite or whinstone, as built, no insulation (assumed)': 'granite or whinstone',
+ 'Granite or whinstone, as built, no insulation (assumed)': 'uninsulated granite or whinstone',
'Solid brick, as built, insulated (assumed)': 'insulated solid brick',
'Solid brick, as built, no insulation (assumed)': 'uninsulated solid brick',
'Solid brick, with external insulation': 'insulated solid brick',
'Solid brick, with internal insulation': 'insulated solid brick',
- 'System built, as built, insulated (assumed)': 'system built',
- 'System built, as built, no insulation (assumed)': 'system built',
- 'System built, with external insulation': 'system built',
- 'System built, with internal insulation': 'system built',
- 'Timber frame, as built, insulated (assumed)': 'timber frame',
- 'Timber frame, as built, no insulation (assumed)': 'timber frame',
- 'Timber frame, as built, partial insulation (assumed)': 'timber frame',
- 'Timber frame, with additional insulation': 'timber frame',
+ 'System built, as built, insulated (assumed)': 'insulated system built',
+ 'System built, as built, no insulation (assumed)': 'uninsulated system built',
+ 'System built, with external insulation': 'insulated system built',
+ 'System built, with internal insulation': 'insulated system built',
+ 'Timber frame, as built, insulated (assumed)': 'insulated timber frame',
+ 'Timber frame, as built, no insulation (assumed)': 'uninsulated timber frame',
+ 'Timber frame, as built, partial insulation (assumed)': 'insulated timber frame',
+ 'Timber frame, with additional insulation': 'insulated timber frame',
'CAVITY': 'cavity unknown insulation',
'COMB': 'unknown',
'NONE': 'unknown',
'NOTKNOWN': 'unknown',
'SOLID': 'solid brick unknown insulation',
np.nan: 'unknown',
- 'RENDER/TIMBER FRAME': 'timber frame',
- 'SYSTEM BUILT': 'system built',
+ 'RENDER/TIMBER FRAME': 'timber frame unknown insulation',
+ 'SYSTEM BUILT': 'system built unknown insulation',
'PCC PANELS': 'other',
'NOT APPLICABLE - FLAT': 'unknown',
- 'BRICK/TIMBER FRAME': 'timber frame',
+ 'BRICK/TIMBER FRAME': 'timber frame unknown insulation',
'BRICK/BLOCK CAVITY': 'cavity unknown insulation',
- 'STONE SOLID': 'sandstone or limestone',
- 'EXT CLADDING SYSTEM': 'system built',
+ 'STONE SOLID': 'sandstone or limestone unknown insulation',
+ 'EXT CLADDING SYSTEM': 'system built unknown insulation',
'BRICK/BLOCK SOLID': 'solid brick unknown insulation',
-
'Cavity Filled cavity (with internal/external)': 'filled cavity',
'ND (inferred) Filled cavity': 'filled cavity',
'Cavity Filled cavity': 'filled cavity',
'Cavity Unknown insulation': 'cavity unknown insulation',
- 'Timber frame As-built': 'timber frame',
- 'System build Unknown insulation': 'system built',
+ 'Timber frame As-built': 'uninsulated timber frame',
+ 'System build Unknown insulation': 'system built unknown insulation',
'Cavity As-built': 'uninsulated cavity',
- 'System build External': 'system built',
+ 'System build External': 'insulated system built',
'ND (inferred) ND (inferred)': 'unknown',
'Solid brick External': 'insulated solid brick',
'Cavity External': 'filled cavity',
- 'System build As-built': 'system built',
+ 'System build As-built': 'uninsulated system built',
'Solid brick Internal': 'insulated solid brick',
'Cavity Internal': 'filled cavity',
- 'System build Internal': 'system built',
- 'Solid brick As-built': 'solid brick unknown insulation',
-
+ 'System build Internal': 'insulated system built',
+ 'Solid brick As-built': 'uninsulated solid brick',
'Cavity ': 'cavity unknown insulation',
'Solid brick ': 'solid brick unknown insulation',
'Timber frame Timber frame (good insulation)': 'insulated timber frame',
@@ -141,91 +182,90 @@ WALL_CONSTRUCTION_MAPPINGS = {
'Cavity: Unknown': 'cavity unknown insulation',
'Cavity: AsBuilt (Post 1995)': 'filled cavity',
'Cavity: AsBuilt (1976-1982)': 'cavity unknown insulation',
- 'SystemBuilt: AsBuilt': 'system built',
- 'TimberFrame: AsBuilt': "timber frame unknown insulation",
- 'Cavity: AsBuilt (1983-1995)': 'cavity unknown insulation',
+ 'SystemBuilt: AsBuilt': 'uninsulated system built',
+ 'TimberFrame: AsBuilt': "uninsulated timber frame",
+ 'Cavity: AsBuilt (1983-1995)': 'filled cavity',
'Cavity: AsBuilt (1983-1995), Cavity: FilledCavity': 'filled cavity',
- 'SolidBrick: AsBuilt': 'solid brick unknown insulation',
+ 'SolidBrick: AsBuilt': 'uninsulated solid brick',
'Cavity: FilledCavity': 'filled cavity',
'SolidBrick: Internal': 'insulated solid brick',
'Cavity: External': 'filled cavity',
- 'Sandstone: Internal': 'sandstone or limestone',
- 'Cavity: AsBuilt (Pre 1976)': 'cavity unknown insulation',
- 'System build': 'system built',
+ 'Sandstone: Internal': 'insulated sandstone or limestone',
+ 'Cavity: AsBuilt (Pre 1976)': 'uninsulated cavity',
+ 'System build': 'system built unknown insulation',
'Solid brick': 'solid brick unknown insulation',
- 'Stone': 'sandstone or limestone',
+ 'Stone': 'sandstone or limestone unknown insulation',
'Timber frame': 'timber frame unknown insulation',
'2017 onwards': 'new build - average thermal transmittance',
'ND (inferred)': 'unknown',
- 'Flat / maisonette': 'other',
- 'Other': 'other',
+ 'Flat / maisonette': 'unknown',
+ 'Other': 'unknown',
'Timber Frame': 'timber frame unknown insulation',
'Cavity Wall': 'cavity unknown insulation',
- 'Non-Traditional': 'system built',
- 'PRC': 'system built',
- 'Cross Wall': 'system built',
+ 'Non-Traditional': 'system built unknown insulation',
+ 'PRC': 'system built unknown insulation',
+ 'Cross Wall': 'system built unknown insulation',
'Solid Wall': 'solid brick unknown insulation',
'Traditional': 'unknown',
'Solid': 'solid brick unknown insulation',
- 'Wates no fines': 'system built',
- 'Concrete Frame': 'system built',
- 'PRCWATES': 'system built',
- 'Refurbished Cornish': 'system built',
+ 'Wates no fines': 'system built unknown insulation',
+ 'Concrete Frame': 'system built unknown insulation',
+ 'PRCWATES': 'system built unknown insulation',
+ 'Refurbished Cornish': 'system built unknown insulation',
'Bailey Stratton': 'other',
- 'Refurbished Reema': 'system built',
- 'PRCREEMA': 'system built',
- 'Trustsell Type': 'system built',
+ 'Refurbished Reema': 'system built unknown insulation',
+ 'PRCREEMA': 'system built unknown insulation',
+ 'Trustsell Type': 'system built unknown insulation',
'Petra Nissan': 'unknown',
- 'Reinstated Airey': 'system built',
- 'Refurbished Airey': 'system built',
+ 'Reinstated Airey': 'system built unknown insulation',
+ 'Refurbished Airey': 'system built unknown insulation',
# From Abri- slightly unclear on types but not a large portion of the data
- 'No Fines Type': 'system built',
- 'Refurbished Unity': 'system built',
+ 'No Fines Type': 'system built unknown insulation',
+ 'Refurbished Unity': 'system built unknown insulation',
'Timber Framed': 'timber frame unknown insulation',
- 'Refurbished Woolaway': 'system built',
+ 'Refurbished Woolaway': 'system built unknown insulation',
'Modern Methods of Construction': 'other',
- 'BISF - Brit Iron & Steel Federation': 'system built',
- 'Steel Framed': 'system built',
+ 'BISF - Brit Iron & Steel Federation': 'system built unknown insulation',
+ 'Steel Framed': 'system built unknown insulation',
'Timber Framed with confirmed Fire Stopping': 'timber frame unknown insulation',
- 'Sipporex': 'system built',
+ 'Sipporex': 'system built unknown insulation',
- 'Wates': 'system built',
- 'Bryants': 'system built',
- 'Gregory (Crosswall)': 'system built',
- 'Rsmit': 'system built',
- 'Dorman Long': 'system built',
- 'Tarmac': 'system built',
- 'RBIS': 'system built',
- 'Five Oaks': 'system built',
+ 'Wates': 'system built unknown insulation',
+ 'Bryants': 'system built unknown insulation',
+ 'Gregory (Crosswall)': 'system built unknown insulation',
+ 'Rsmit': 'system built unknown insulation',
+ 'Dorman Long': 'system built unknown insulation',
+ 'Tarmac': 'system built unknown insulation',
+ 'RBIS': 'system built unknown insulation',
+ 'Five Oaks': 'system built unknown insulation',
'Not known': 'unknown',
- 'Smiths': 'system built',
- 'Kendrick': 'system built',
- 'IDC': 'system built',
- 'Wimpey (Part Brick)': 'system built',
- 'Whitehall': 'system built',
- 'Wimpey': 'system built',
- 'Bison': 'system built',
- 'Zinns': 'system built',
- 'Bisf': 'system built',
- 'Integer': 'system built',
- 'Cornish': 'system built',
- 'Rwate': 'system built',
- 'Hill Presweld Steel': 'system built',
+ 'Smiths': 'system built unknown insulation',
+ 'Kendrick': 'system built unknown insulation',
+ 'IDC': 'system built unknown insulation',
+ 'Wimpey (Part Brick)': 'system built unknown insulation',
+ 'Whitehall': 'system built unknown insulation',
+ 'Wimpey': 'system built unknown insulation',
+ 'Bison': 'system built unknown insulation',
+ 'Zinns': 'system built unknown insulation',
+ 'Bisf': 'system built unknown insulation',
+ 'Integer': 'system built unknown insulation',
+ 'Cornish': 'system built unknown insulation',
+ 'Rwate': 'system built unknown insulation',
+ 'Hill Presweld Steel': 'system built unknown insulation',
'Cavity Filled Cavity': 'filled cavity',
'Cavity Unknown': 'cavity unknown insulation',
'Cavity Filled Cavity (internal)': 'filled cavity',
'': 'unknown',
'Cavity Internal Insulation': 'filled cavity',
'Cavity As Built': "uninsulated cavity",
- 'Non Trad Large Panel System': 'system built',
- 'Non Trad Cornish': 'system built',
- 'Non Trad Reema': 'system built',
+ 'Non Trad Large Panel System': 'system built unknown insulation',
+ 'Non Trad Cornish': 'system built unknown insulation',
+ 'Non Trad Reema': 'system built unknown insulation',
'Traditional Cavity Brickwork': 'cavity unknown insulation',
- 'System build (undefined)': 'system built',
- 'Non Trad Wimpey': 'system built',
- 'Non Trad Wates': 'system built',
-
+ 'System build (undefined)': 'system built unknown insulation',
+ 'Non Trad Wimpey': 'system built unknown insulation',
+ 'Non Trad Wates': 'system built unknown insulation',
'CAVITY FILLED 270MM': 'filled cavity',
'CAVITY FILLED 270MM': 'filled cavity',
'CAVITY FILLED 250MM': 'filled cavity',
@@ -238,17 +278,60 @@ WALL_CONSTRUCTION_MAPPINGS = {
'CAVITY A/B 270MM': "uninsulated cavity",
'SOLID BRICK/CAVITY EXT': 'solid brick unknown insulation',
'CAVITY EWI': 'filled cavity',
- 'SANDSTONE/CAVITY EXT': 'sandstone or limestone',
- 'SYSTEM BUILD 100MM EWI': 'system built',
+ 'SANDSTONE/CAVITY EXT': 'sandstone or limestone unknown insulation',
+ 'SYSTEM BUILD 100MM EWI': 'insulated system built',
'CAVITY A/B 260MM': "uninsulated cavity",
'CAVITY A/B 270MM': "uninsulated cavity",
'CAVITY A/B 250MM': "uninsulated cavity",
- 'System': 'system built',
- 'Sandstone/Limestone': 'sandstone or limestone',
- 'No Fines': 'system built',
- 'Granite/Whinstone': 'granite or whinstone',
+ 'System': 'system built unknown insulation',
+ 'Sandstone/Limestone': 'sandstone or limestone unknown insulation',
+ 'No Fines': 'system built unknown insulation',
+ 'Granite/Whinstone': 'granite or whinstone unknown insulation',
'Not applicable to this asset type': 'unknown',
- 'Steel Frame': 'system built',
+ 'Steel Frame': 'system built unknown insulation',
'Solid Wall As Built': 'uninsulated solid brick',
- 'Solid As Built': 'uninsulated solid brick'
+ 'Solid As Built': 'uninsulated solid brick',
+ 'Cavity: FilledCavity, Cavity: Unknown': 'filled cavity',
+ 'Cavity: AsBuilt (Pre 1976), TimberFrame: Unknown': 'uninsulated cavity',
+ 'SolidBrick: AsBuilt, SolidBrick: Unknown': 'uninsulated solid brick',
+ 'Cavity: FilledCavity, SolidBrick: Unknown': 'filled cavity',
+ 'Cavity: AsBuilt (Pre 1976), SolidBrick: Unknown': 'uninsulated cavity',
+ 'Cavity: FilledCavity, TimberFrame: Unknown': 'filled cavity',
+ 'Cavity: AsBuilt (1976-1982), Cavity: Unknown': 'uninsulated cavity',
+ 'Cavity: Unknown, SolidBrick: AsBuilt': 'cavity unknown insulation',
+ 'Cavity: AsBuilt (1976-1982), Cavity: FilledCavity': 'filled cavity',
+ 'Cavity: External, Cavity: FilledCavity': 'filled cavity',
+ 'Cavity: AsBuilt (Post 1995), TimberFrame: AsBuilt': 'filled cavity',
+ 'TimberFrame: AsBuilt, TimberFrame: Internal': 'timber frame unknown insulation',
+ 'GraniteOrWhinstone: AsBuilt': 'uninsulated granite or whinstone',
+ 'Cavity: AsBuilt (Post 1995), Cavity: FilledCavity, SolidBrick: Internal': 'filled cavity',
+ 'Cavity: AsBuilt (Pre 1976), Cavity: FilledCavity': 'filled cavity',
+ 'SolidBrick: AsBuilt, SolidBrick: External': 'insulated solid brick',
+ 'Cavity: AsBuilt (Post 1995), Cavity: FilledCavity': 'filled cavity',
+ 'Cavity: FilledCavity, SolidBrick: Internal': 'filled cavity',
+ 'Cavity: AsBuilt (Post 1995), Cavity: FilledCavity, SolidBrick: Unknown': 'filled cavity',
+ 'Cavity: AsBuilt (Pre 1976), SolidBrick: AsBuilt': 'uninsulated cavity',
+ 'Cavity: AsBuilt (1976-1982), SolidBrick: AsBuilt': 'filled cavity',
+
+ 'Cavity: FilledCavity, SolidBrick: AsBuilt': 'filled cavity',
+ 'SolidBrick: External': 'insulated solid brick',
+ 'Cavity: FilledCavity, Cavity: Internal': 'filled cavity',
+ 'Cavity: External, SolidBrick: AsBuilt': 'filled cavity',
+ 'SolidBrick: AsBuilt, TimberFrame: AsBuilt': 'uninsulated solid brick',
+ 'Cavity: FilledCavity, SystemBuilt: AsBuilt': 'filled cavity',
+ 'Cavity: AsBuilt (1976-1982), SystemBuilt: AsBuilt': 'system built',
+ 'Cavity: AsBuilt (Post 1995), SolidBrick: AsBuilt': 'filled cavity',
+ 'Cavity: AsBuilt (1983-1995), TimberFrame: AsBuilt': 'filled cavity',
+ 'SystemBuilt: AsBuilt, TimberFrame: AsBuilt': 'uninsulated system built',
+ 'TimberFrame: Internal': 'insulated timber frame',
+ 'Cavity: Internal': 'filled cavity',
+ 'SystemBuilt: External': 'filled cavity',
+ 'Cavity: AsBuilt (Pre 1976), SystemBuilt: AsBuilt': 'uninsulated cavity',
+ 'SystemBuilt: Internal': 'insulated system built',
+ 'Cavity: AsBuilt (1983-1995), SolidBrick: AsBuilt': 'solid brick unknown insulation',
+ 'Cavity: AsBuilt (Pre 1976), TimberFrame: AsBuilt': 'timber frame unknown insulation',
+ 'SolidBrick: AsBuilt, SolidBrick: Internal': 'uninsulated solid brick',
+ 'Cavity: FilledCavity, TimberFrame: AsBuilt': 'filled cavity',
+ 'Cavity: FilledCavity, SolidBrick: AsBuilt, SolidBrick: Internal': 'filled cavity',
+ 'Cavity: Internal, SolidBrick: AsBuilt': 'filled cavity',
}
diff --git a/asset_list/requirements.txt b/asset_list/requirements.txt
index 99943397..b68706be 100644
--- a/asset_list/requirements.txt
+++ b/asset_list/requirements.txt
@@ -6,7 +6,10 @@ epc-api-python==1.0.2
thefuzz
boto3
openpyxl
-openai
+openai>=1.3.5
tiktoken
msgpack
-beautifulsoup4
\ No newline at end of file
+beautifulsoup4
+pydantic>=1.10.7
+typing-extensions>=4.5.0
+requests>=2.28.2
diff --git a/asset_list/utils.py b/asset_list/utils.py
index 1678b8e9..fe2b7d14 100644
--- a/asset_list/utils.py
+++ b/asset_list/utils.py
@@ -1,5 +1,5 @@
import time
-import numpy as np
+import random
import pandas as pd
from backend.SearchEpc import SearchEpc
from etl.find_my_epc.RetrieveFindMyEpc import RetrieveFindMyEpc
@@ -37,7 +37,9 @@ def get_data(
"mid-terrace": "Mid-Terrace",
"end-terrace": "End-Terrace",
"semi-detached": "Semi-Detached",
- "detached": "Detached"
+ "detached": "Detached",
+ "enclosed end-terrace": "End-Terrace",
+ "enclosed mid-terrace": "Mid-Terrace",
}
epc_data = []
@@ -101,7 +103,6 @@ def get_data(
else:
# Try splitting on space
add1 = full_address.split(" ")[0].strip()
-
else:
add1 = str(house_number)
searcher = SearchEpc(
@@ -172,7 +173,7 @@ def get_data(
find_epc_data = {}
except Exception as e:
raise Exception(f"Error retrieving FindMyEPC data: {e}")
- time.sleep(np.random.uniform(0.1, 1))
+ time.sleep(random.sample(range(50, 100), 1)[0] / 100)
epc = {
row_id_name: home[row_id_name],
@@ -182,6 +183,11 @@ def get_data(
}
epc_data.append(epc)
+
+ if len(epc_data) % 50 == 0 and len(epc_data) > 0:
+ logger.info("Sleeping for 10 seconds to avoid hitting API rate limit")
+ time.sleep(10)
+
except Exception as e:
errors.append(home[row_id_name])
time.sleep(5)
diff --git a/backend/Property.py b/backend/Property.py
index 91c1265a..22eb2fc3 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -217,6 +217,9 @@ class Property:
self.eco4_eligibility = None
self.whlg_eligibility = None
+ # Ventilation
+ self.has_ventilation = self.identify_ventilation()
+
@classmethod
def extract_kwargs(cls, kwargs):
"""
@@ -1197,7 +1200,7 @@ class Property:
self.heating_energy_source = self.heating_energy_source[0]
if self.heating_energy_source == "Varied (Community Scheme)":
- if self.main_fuel["fuel_type"] == "mains gas":
+ if self.main_fuel["fuel_type"] in ["mains gas", None]: # We assume when None as it's unknown
self.heating_energy_source = "Natural Gas (Community Scheme)"
else:
raise Exception("Implement me")
@@ -1233,6 +1236,13 @@ class Property:
if "air_source_heat_pump" not in measures:
return False
+ # If we have a house over a floor area threshold, we recommend an ASHP
+ if (
+ self.data["property-type"] in ["House", "Bungalow"] and
+ self.floor_area > assumptions.ASHP_FLOOR_AREA_THRESHOLD
+ ):
+ return True
+
suitable_house = self.data["property-type"] == "House" and self.data["built-form"] in [
"Detached", "Semi-Detached", "End-Terrace",
]
@@ -1342,3 +1352,12 @@ class Property:
self.gbis_eligibiltiy = funding_calulator.gbis_eligibiltiy
self.eco4_eligibility = funding_calulator.eco4_eligibility
self.whlg_eligibility = funding_calulator.whlg_eligibility
+
+ def identify_ventilation(self):
+
+ ventilation_descriptions = [
+ 'mechanical, extract only',
+ 'mechanical, supply and extract'
+ ]
+
+ return self.data["mechanical-ventilation"] in ventilation_descriptions
diff --git a/backend/app/assumptions.py b/backend/app/assumptions.py
index f1090ef3..d36266d3 100644
--- a/backend/app/assumptions.py
+++ b/backend/app/assumptions.py
@@ -58,6 +58,19 @@ DESCRIPTIONS_TO_FUEL_TYPES = {
"Room heaters, wood logs": {"fuel": "Wood Logs", "cop": 1},
"Boiler and radiators, coal": {"fuel": "Coal", "cop": 0.85},
"From main system, no cylinderstat": {"fuel": "Natural Gas", "cop": 0.85},
+ "Room heaters, coal": {"fuel": "Coal", "cop": 0.85},
+ "Electric underfloor heating, Electric storage heaters": {"fuel": "Electricity", "cop": 1},
+ 'Room heaters, electric, Boiler and radiators, mains gas': {"fuel": "Natural Gas", "cop": 0.85},
+ 'Boiler and radiators, mains gas, Boiler and radiators, mains gas': {"fuel": "Natural Gas", "cop": 0.85},
+ 'Room heaters, electric, Electric storage heaters': {"fuel": "Electricity", "cop": 1},
+ "Boiler and radiators, mains gas, Electric storage heaters": {"fuel": "Natural Gas", "cop": 0.85},
+ "Boiler and radiators, anthracite": {"fuel": "Anthracite", "cop": 0.85},
+ 'Electric immersion, off-peak, plus solar': {"fuel": "Electricity + Solar Thermal", "cop": 1},
+ 'Ground source heat pump, radiators, electric': {
+ "fuel": "Electricity", "cop": AVERAGE_ASHP_EFFICIENCY / 100
+ },
+ 'Electric instantaneous at point of use, plus solar': {"fuel": "Electricity + Solar Thermal", "cop": 1},
+ "Electric storage heaters, Room heaters, electric": {"fuel": "Electricity", "cop": 1},
}
# These are the measure types where if there is a ventilation recommendation, we force the inclusion of it
@@ -65,3 +78,6 @@ DESCRIPTIONS_TO_FUEL_TYPES = {
measures_needing_ventilation = [
"internal_wall_insulation", "external_wall_insulation", "cavity_wall_insulation"
]
+
+# If we have a property beyond this size, we assume it's likely large enough to have an ASHP
+ASHP_FLOOR_AREA_THRESHOLD = 120 # m2
diff --git a/backend/app/plan/schemas.py b/backend/app/plan/schemas.py
index 48300f2a..6b8b192d 100644
--- a/backend/app/plan/schemas.py
+++ b/backend/app/plan/schemas.py
@@ -96,3 +96,7 @@ class PlanTriggerRequest(BaseModel):
# When performing a remote assessment, if this has been set, it will allow the engine to
# pull data from the find my epc website, to utilise as part of a remote assessment
event_type: Optional[Literal["remote_assessment"]] = None
+
+ # If true, before optimising the engine will select a slightly larger package, to account for the SAP 10 causing
+ # scores to drop by a few points
+ simulate_sap_10: Optional[bool] = False
diff --git a/backend/engine/engine.py b/backend/engine/engine.py
index 5316fd03..d631e349 100644
--- a/backend/engine/engine.py
+++ b/backend/engine/engine.py
@@ -30,7 +30,6 @@ import backend.app.assumptions as assumptions
from backend.ml_models.api import ModelApi
from backend.Property import Property
-from backend.Funding import Funding
from backend.apis.GoogleSolarApi import GoogleSolarApi
from recommendations.optimiser.CostOptimiser import CostOptimiser
@@ -507,7 +506,7 @@ async def model_engine(body: PlanTriggerRequest):
)
# if we have a remote assment data type, we pull the additional data and include it
- if (body.event_type == "remote_assessment") and not (epc_searcher.newest_epc["estimated"]):
+ if (body.event_type == "remote_assessment") and not (epc_searcher.newest_epc.get("estimated")):
logger.info("Retrieving find my epc data")
try:
property_non_invasive_recommendations, patch = RetrieveFindMyEpc.get_from_epc(
@@ -728,7 +727,8 @@ async def model_engine(body: PlanTriggerRequest):
# Additionally, if we have required measures, they should also be included. Therefore
# we can discount the number of points required to get to the target SAP band (or increase)
# in the case of ventilation
- needs_ventilation = any(x in property_measure_types for x in assumptions.measures_needing_ventilation)
+ needs_ventilation = any(
+ x in property_measure_types for x in assumptions.measures_needing_ventilation) and not p.has_ventilation
input_measures = prepare_input_measures(measures_to_optimise, body.goal, needs_ventilation)
@@ -772,6 +772,10 @@ async def model_engine(body: PlanTriggerRequest):
epc_to_sap_lower_bound(body.goal_value) - current_sap_points
) - fixed_gain
+ if body.simulate_sap_10:
+ # We add 3 additional SAP points to the required gain to account for SAP 10
+ sap_gain += 3
+
if not body.optimise:
if body.goal != "Increasing EPC":
raise NotImplementedError("Only EPC optimisation is currently supported")
@@ -826,7 +830,11 @@ async def model_engine(body: PlanTriggerRequest):
)
# If wall insulation is selected, we also include mechanical ventilation as a best practice measure
- if any(x in [r["type"] for r in solution] for x in assumptions.measures_needing_ventilation):
+ ventilation_selected = [
+ r for r in solution if "+mechanical_ventilation" in r["type"]
+ ]
+ if (any(x in [r["type"] for r in solution] for x in assumptions.measures_needing_ventilation) or
+ len(ventilation_selected)):
ventilation_rec = next(
(r[0] for r in recommendations[p.id] if r[0]["type"] == "mechanical_ventilation"),
None
diff --git a/backend/ml_models/AnnualBillSavings.py b/backend/ml_models/AnnualBillSavings.py
index b22837d8..4291b1d1 100644
--- a/backend/ml_models/AnnualBillSavings.py
+++ b/backend/ml_models/AnnualBillSavings.py
@@ -28,8 +28,8 @@ class AnnualBillSavings:
# Latest price cap figures from Ofgem are for April 2024
# https://www.ofgem.gov.uk/energy-price-cap
- ELECTRICITY_PRICE_CAP = 0.2486
- GAS_PRICE_CAP = 0.0634
+ ELECTRICITY_PRICE_CAP = 0.2573
+ GAS_PRICE_CAP = 0.0633
# This is the most recent export payment figure, at 9.28p/kWh
# Smart export guarantee rates can be found here:
# https://www.sunsave.energy/solar-panels-advice/exporting-to-the-grid/best-seg-rates
@@ -39,8 +39,8 @@ class AnnualBillSavings:
PRICE_FACTOR = 0.09549999999999999
# Daily standard charge, based on average across England, Scotland and Wales, and includes VAT
- DAILY_STANDARD_CHARGE_GAS = 0.3165
- DAILY_STANDARD_CHARGE_ELECTRICITY = 0.6097
+ DAILY_STANDARD_CHARGE_GAS = 0.2982
+ DAILY_STANDARD_CHARGE_ELECTRICITY = 0.5137
# Based on https://www.nottenergy.com/advice-and-tools/project-energy-cost-comparison
# For July 2024. These quotes are based on the east midlands region, so we
diff --git a/etl/customers/Brentwood/compile_new_asset_list.py b/etl/customers/Brentwood/compile_new_asset_list.py
new file mode 100644
index 00000000..e3ced5ab
--- /dev/null
+++ b/etl/customers/Brentwood/compile_new_asset_list.py
@@ -0,0 +1,38 @@
+"""
+Brentwood sent us a new asset list in July 2025. This script will combine the data in the new asset list with the
+old, so we have a single picture
+"""
+
+import pandas as pd
+
+new_asset_list = pd.read_excel(
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Brentwood/July 2025 New Programme/All Assets "
+ "29.05.2025.xlsx",
+ sheet_name="Sheet1",
+ header=1
+)
+
+old_asset_list = pd.read_excel(
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Brentwood/July 2025 New Programme/BRENTWOOD Asset "
+ "list.xlsx",
+ sheet_name="Asset List"
+)
+
+# We combine based on the data we want
+compiled = new_asset_list.merge(
+ old_asset_list[["UPRN", "Asset Type", "Year Built", "Dwelling", "Bedrooms", "Ownership", 'Asbestos Full Survey',
+ 'Stock Condition Survey', 'Cat', 'Heating',
+ 'WFT Findings', 'ECO Eligibility', 'CIGA Requested', 'CIGA Guarantee',
+ 'ECO Survey completed']],
+ how="left",
+ on="UPRN"
+)
+
+compiled["WFT Findings"] = compiled["WFT Findings"].fillna("Not Inspected")
+
+# Store this data
+compiled.to_excel(
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Brentwood/July 2025 New Programme/20250710 Asset List "
+ "Brentwood.xlsx",
+ index=False
+)
diff --git a/etl/customers/Colchester/July 2025 Finalised Route.py b/etl/customers/Colchester/July 2025 Finalised Route.py
new file mode 100644
index 00000000..f3ecf2d9
--- /dev/null
+++ b/etl/customers/Colchester/July 2025 Finalised Route.py
@@ -0,0 +1,54 @@
+import pandas as pd
+
+comments_df = pd.read_excel(
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Colchester/July Finalised "
+ "Project/CBH_RetroTeamList_amended_25-06-05.xlsx",
+)
+
+cavity_route = pd.read_excel(
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Colchester/July Finalised "
+ "Project/20250708 Colchester Borough Homes- Standardised.xlsx",
+ sheet_name="July 2025 Route - Cavity"
+)
+
+solar_route = pd.read_excel(
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Colchester/July Finalised "
+ "Project/20250708 Colchester Borough Homes- Standardised.xlsx",
+ sheet_name="July 2025 Route - Solar"
+)
+
+# Merge on the comments
+comments = comments_df[
+ ["URPN", 'Unnamed: 6', 'SHDF Live', 'SHDF Removed', 'SHDF Reserve', '25-26 List (138 to EPC)']
+].copy()
+
+cavity_route = cavity_route.merge(
+ comments, left_on="landlord_property_id", right_on="URPN", how="left"
+)
+solar_route = solar_route.merge(
+ comments, left_on="landlord_property_id", right_on="URPN", how="left"
+)
+
+# Get properties that are not on either route
+not_on_routes = comments_df[
+ ~comments_df["URPN"].isin(cavity_route["landlord_property_id"]) &
+ ~comments_df["URPN"].isin(solar_route["landlord_property_id"])
+ ]
+
+# Store
+not_on_routes.to_excel(
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Colchester/July Finalised "
+ "Project/Properties not on routes.xlsx",
+ index=False
+)
+# Save the routes
+cavity_route.to_excel(
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Colchester/July Finalised "
+ "Project/Cavity Route.xlsx",
+ index=False
+)
+solar_route.to_excel(
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Colchester/July Finalised "
+ "Project/Solar Route.xlsx",
+ index=False
+)
diff --git a/etl/customers/acis/solid_wall_funding.py b/etl/customers/acis/solid_wall_funding.py
new file mode 100644
index 00000000..5515b29c
--- /dev/null
+++ b/etl/customers/acis/solid_wall_funding.py
@@ -0,0 +1,144 @@
+import os
+import pandas as pd
+import numpy as np
+from dotenv import load_dotenv
+from etl.find_my_epc.AssetListEpcData import AssetListEpcData
+from backend.Funding import Funding
+from backend.app.utils import sap_to_epc
+from recommendations.recommendation_utils import estimate_external_wall_area
+
+load_dotenv(dotenv_path="backend/.env")
+EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
+
+abs_matrix = pd.read_csv(
+ "/Users/khalimconn-kowlessar/Downloads/ECO4 Full Project Scores Matrix.csv"
+)
+pps_matrix = pd.read_excel(
+ "/Users/khalimconn-kowlessar/Downloads/ECO4 Partial Project Scores Matrix v5.xlsx",
+ header=1
+)
+pps_matrix.columns = [c.strip() for c in pps_matrix.columns]
+
+asset_list = pd.read_excel(
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/ACIS/Solid Wall Properties - Standardised_2.xlsx",
+ sheet_name="Standardised Asset List"
+)
+
+asset_list = asset_list.rename(
+ columns={"domna_address_1": "address", "domna_postcode": "postcode"}
+)
+asset_list["address"] = asset_list["address"].astype(str)
+
+# Pull the find my EPC data and get the SAP points for solid wall
+asset_list_epc_client = AssetListEpcData(
+ asset_list=asset_list,
+ epc_auth_token=EPC_AUTH_TOKEN
+)
+asset_list_epc_client.get_data()
+asset_list_epc_client.get_non_invasive_recommendations()
+# We pull out solid wall insulation
+solid_wall_sap_points = []
+for r in asset_list_epc_client.non_invasive_recommendations:
+ solid_recommendations = [
+ x for x in r["recommendations"] if ("internal_wall_insulation" in x["type"]) or (
+ "external_wall_insulation" in x["type"]
+ )
+ ]
+ if solid_recommendations:
+ solid_recommendations = solid_recommendations[0]
+ else:
+ continue
+
+ address = r["address"]
+ postcode = r["postcode"]
+
+ solid_wall_sap_points.append(
+ {
+ "address": address,
+ "postcode": postcode,
+ "sap_points": solid_recommendations["sap_points"]
+ }
+ )
+
+solid_wall_sap_points = pd.DataFrame(solid_wall_sap_points)
+avg_points = solid_wall_sap_points["sap_points"].median()
+
+asset_list = asset_list.merge(solid_wall_sap_points, how="left", on=["address", "postcode"])
+asset_list["sap_points"] = asset_list["sap_points"].fillna(avg_points)
+asset_list["post_works_sap"] = asset_list["epc_sap_score_on_register"] + asset_list["sap_points"]
+asset_list["post_works_epc"] = asset_list["post_works_sap"].apply(lambda x: sap_to_epc(x))
+asset_list["starting_half_band"] = asset_list["epc_sap_score_on_register"].apply(lambda x: Funding.get_sap_band(x))
+asset_list["ending_half_band"] = asset_list["post_works_sap"].apply(lambda x: Funding.get_sap_band(x))
+asset_list["floor_area_band"] = asset_list["epc_total_floor_area"].apply(lambda x: Funding.get_floor_area_band(x))
+
+asset_list["funding_scheme"] = np.where(
+ (
+ (asset_list["post_works_epc"] == asset_list["epc_rating_on_register"])
+ ),
+ "GBIS",
+ "ECO4"
+)
+
+# Merge on the ABS matrix
+asset_list = asset_list.merge(
+ abs_matrix, how="left", left_on=["starting_half_band", "ending_half_band", "floor_area_band"],
+ right_on=['Starting Band', 'Finishing Band', 'Floor Area Segment', ]
+)
+asset_list = asset_list.drop(columns=['Starting Band', 'Finishing Band', 'Floor Area Segment'])
+
+# store for backup
+# asset_list.to_csv(
+# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/ACIS/Solid Wall Properties -
+# Standardised_2_with_funding.csv",
+# index=False
+# )
+
+# For GBIS, we use the PPS
+# Almost all properties are gas
+
+# Using IWI solid 1.7 -> 0.3 rates
+pps_matrix = pps_matrix[
+ pps_matrix["Measure_Type"].isin(["IWI_solid_1.7_0.3"])
+]
+
+# Merge on
+asset_list = asset_list.merge(
+ pps_matrix[['Starting Band', 'Total Floor Area Band', 'Cost Savings']].rename(
+ columns={
+ "Cost Savings": "partial_project_score",
+ "Starting Band": "starting_half_band",
+ "Total Floor Area Band": "floor_area_band"
+ }
+ ),
+ how="left",
+ on=["starting_half_band", "floor_area_band"],
+)
+asset_list["partial_project_score"] = np.where(
+ asset_list["starting_half_band"].isin(["Low_C", "High_C"]),
+ None,
+ asset_list["partial_project_score"]
+)
+
+asset_list["funding_abs"] = np.where(
+ asset_list["funding_scheme"] == "GBIS",
+ asset_list["partial_project_score"],
+ asset_list["Cost Savings"]
+)
+
+asset_list["heat_loss_area"] = asset_list.apply(
+ lambda x: estimate_external_wall_area(
+ num_floors=x["attribute_est_number_floors"],
+ floor_height=(
+ float(x["epc_floor_height"]) if
+ not pd.isnull(x["epc_floor_height"]) else 2.5
+ ),
+ perimeter=x["attribute_est_perimter"],
+ built_form=x["epc_archetype"]
+ ),
+ axis=1
+)
+
+filename = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/ACIS/20250624 ACIS solid wall - standardised.xlsx"
+
+with pd.ExcelWriter(filename) as writer:
+ asset_list.to_excel(writer, sheet_name="Standardised Asset List", index=False)
diff --git a/etl/customers/blakeridge_mill/data.py b/etl/customers/blakeridge_mill/data.py
new file mode 100644
index 00000000..c9d7f9e6
--- /dev/null
+++ b/etl/customers/blakeridge_mill/data.py
@@ -0,0 +1,49 @@
+# Get units for postcodes WF17 8RA, WF17 8RB
+import os
+
+import pandas as pd
+from epc_api.client import EpcClient
+from dotenv import load_dotenv
+
+load_dotenv(dotenv_path="backend/.env")
+EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
+
+postcodes = [
+ "WF17 8RA",
+ "WF17 8RB",
+]
+
+client = EpcClient(auth_token=EPC_AUTH_TOKEN)
+
+data = []
+for postcode in postcodes:
+ resp = client.domestic.search(
+ params={"postcode": postcode, "address": None, "local-authority": None, "property-type": None,
+ "floor-area": None,
+ "energy-band": None, "from-month": None, "from-year": None, "to-month": None, "to-year": None,
+ 'constituency': None},
+ size=1000
+ )
+ data.extend(resp["rows"])
+
+df = pd.DataFrame(data)
+# Get newest field by UPRN, inspection-date
+df["inspection-date"] = pd.to_datetime(df["inspection-date"])
+df = df.sort_values(by=["uprn", "inspection-date"], ascending=[True, False])
+df = df.drop_duplicates(subset=["uprn"], keep="first")
+
+df.to_excel(
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Blakeridge Mill/blakeridge_mill_epc_data.xlsx", index=False
+)
+
+df = df[df["address"] != "The Tower Blakeridge Mill, Upper Blakeridge Lane"]
+df["walls-description"].value_counts()
+df["roof-description"].value_counts()
+
+df["total-floor-area"].astype(float).mean()
+df["current-energy-efficiency"] = pd.to_numeric(df["current-energy-efficiency"], errors='coerce')
+
+df.groupby("transaction-type")["current-energy-efficiency"].mean()
+df["transaction-type"].value_counts()
+
+df[df["transaction-type"] == "rental"]["built-form"].value_counts()
diff --git a/etl/customers/bromford/solar_pv_cleanup.py b/etl/customers/bromford/solar_pv_cleanup.py
new file mode 100644
index 00000000..c2c541da
--- /dev/null
+++ b/etl/customers/bromford/solar_pv_cleanup.py
@@ -0,0 +1,289 @@
+import pandas as pd
+from tqdm import tqdm
+from backend.SearchEpc import SearchEpc
+import numpy as np
+
+contact_list = pd.read_csv(
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot Upload/Bromford - Solar "
+ "PV address list - second wave KLD - PP.csv"
+)
+contact_list["house_no"] = contact_list.apply(lambda x: SearchEpc.get_house_number(
+ address=str(x["Address 1: Street 1"]).strip(),
+ postcode=str(x["Postal Code"]).strip(),
+), axis=1)
+
+asset_list = pd.read_excel(
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot Upload/asset_list - "
+ "Standardised (1).xlsx",
+ sheet_name="Standardised Asset List"
+)
+
+lookup = []
+missed = []
+for _, x in tqdm(contact_list.iterrows(), total=len(contact_list)):
+
+ if x["Address 1: Street 1"] == '1 The Beck':
+ lookup.append(
+ {
+ "UPRN": x["UPRN"],
+ "landlord_property_id": 40692,
+ }
+ )
+ continue
+
+ if x["Address 1: Street 1"] == '3 The Beck ':
+ lookup.append(
+ {
+ "UPRN": x["UPRN"],
+ "landlord_property_id": 40693,
+ }
+ )
+ continue
+
+ if x["Address 1: Street 1"] == '2 Orchard Close ':
+ lookup.append(
+ {
+ "UPRN": x["UPRN"],
+ "landlord_property_id": 7924,
+ }
+ )
+ continue
+
+ if x["Address 1: Street 1"] == '2 Orchard Close ':
+ lookup.append(
+ {
+ "UPRN": x["UPRN"],
+ "landlord_property_id": 7924,
+ }
+ )
+ continue
+
+ if x["Address 1: Street 1"] == '3 Croxall Road':
+ lookup.append(
+ {
+ "UPRN": x["UPRN"],
+ "landlord_property_id": 40650,
+ }
+ )
+ continue
+
+ if x["Address 1: Street 1"] == '4 Ward Road ':
+ lookup.append(
+ {
+ "UPRN": x["UPRN"],
+ "landlord_property_id": 33175,
+ }
+ )
+ continue
+
+ df = asset_list[
+ asset_list["domna_full_address"].str.replace(",", "").str.contains(x["Address 1: Street 1"].strip()) &
+ asset_list["domna_postcode"].str.contains(x["Postal Code"].strip())
+ ]
+
+ if df.shape[0] != 1:
+ df = asset_list[
+ asset_list["domna_full_address"].str.replace(",", "") == x["Address 1: Street 1"].strip() &
+ asset_list["domna_postcode"].str.contains(x["Postal Code"].strip())
+ ]
+
+ if df.shape[0] != 1:
+ df = asset_list[
+ (asset_list["domna_address_1"].astype(str) == str(x["house_no"])) &
+ (asset_list["domna_postcode"].str.contains(x["Postal Code"].strip()) == True)
+ ]
+
+ if df.shape[0] != 1:
+ missed.append(x["UPRN"])
+ continue
+
+ lookup.append(
+ {
+ "UPRN": x["UPRN"],
+ "landlord_property_id": df["landlord_property_id"].values[0],
+ }
+ )
+
+lookup = pd.DataFrame(lookup)
+
+contact_list = contact_list.merge(lookup, how="left", on="UPRN")
+# Store
+contact_list.to_csv(
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot Upload/Bromford - Solar "
+ "PV address list - second wave KLD - PP with landlord_property_id.csv",
+ index=False
+)
+
+# I manually completed the lookup for the missed ones. We now read it back in and pull in the properties for the
+# stndardised asset list
+contacts_complete = pd.read_csv(
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot Upload/Bromford - Solar "
+ "PV address list - second wave KLD - PP with landlord_property_id.csv"
+)
+
+new_data = pd.read_excel(
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot Upload/Master Sheet "
+ "Solar PV installs.xlsx",
+ sheet_name="Sheet1"
+)
+
+contact_list = contact_list.merge(
+ new_data,
+ how="left",
+ left_on="UPRN",
+ right_on="CE UPRN"
+)
+route = asset_list[
+ asset_list["landlord_property_id"].isin(contact_list["Legacy UPRN"].astype("Int64").astype(str))
+].copy()
+
+# Add the new heating data
+contact_list["Legacy UPRN"] = contact_list["Legacy UPRN"].astype("Int64").astype(str)
+route2 = contact_list.merge(
+ route,
+ how="left",
+ right_on="landlord_property_id",
+ left_on="Legacy UPRN"
+)
+
+# Because I did a data pull, we can fill the other bits of information
+missed = contact_list[~contact_list["Legacy UPRN"].isin(route["landlord_property_id"].astype(int))]
+
+# Store both the route and missed
+route2.to_csv(
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot Upload/route.csv",
+ index=False
+)
+
+# Add on phone number
+contact_details_filepath = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme "
+ "Hubspot Upload/Hubspot/Bromford - Solar PV address list - second wave KLD - PP with "
+ "landlord_property_id.xlsx")
+
+contacts_filenames = [
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot Upload/contact "
+ "details/FAO Paul Contact Details-Table 1.csv",
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot Upload/contact "
+ "details/Green Contact Details-Table 1.csv",
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot Upload/contact "
+ "details/Main Contact Details-Table 1.csv",
+]
+
+merge_to = pd.read_excel(contact_details_filepath)
+
+lookup = []
+for fn in contacts_filenames:
+ df = pd.read_csv(fn, encoding="utf-8-sig")
+ # Merge on phone
+ details = df[
+ df["Property Reference Number (Main Address) (Property)"].isin(merge_to["UPRN"].astype(str))
+ ][[
+ "Property Reference Number (Main Address) (Property)", "Landline", "Mobile Phone", "Email Address",
+ "First Name", "Last Name"
+ ]]
+
+ lookup.append(details)
+
+lookup = pd.concat(lookup)
+
+# Drop entries where landline, mobile and email are all NaN
+lookup = lookup.dropna(subset=["Landline", "Mobile Phone", "Email Address"], how="all")
+lookup = lookup.drop_duplicates(["Landline", "Mobile Phone", "Email Address"])
+# Sort so email is first, then landline, then mobile
+lookup = lookup.sort_values(
+ ["Property Reference Number (Main Address) (Property)", "Email Address", "Landline", "Mobile Phone"],
+ ascending=[True, True, True, True]
+)
+
+# Store
+lookup.to_csv(
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot Upload/Hubspot/contact "
+ "details.csv",
+ index=False
+)
+
+lookup2 = []
+for _, x in lookup.groupby("Property Reference Number (Main Address) (Property)"):
+
+ # We any entries have an email, we take that
+ if x["Email Address"].notna().any():
+ x = x[x["Email Address"].notna()]
+ # We then take the entry with a phone number
+ if x["Landline"].notna().any() or x["Mobile Phone"].notna().any():
+ x = x[x["Landline"].notna() | x["Mobile Phone"].notna()]
+
+ # Take the first entry
+ x = x.iloc[0]
+ lookup2.append(x)
+
+lookup2 = pd.DataFrame(lookup2)
+
+import pandas as pd
+
+# Sample structure based on your columns
+columns = ['Property Reference Number (Main Address) (Property)', 'Landline', 'Mobile Phone', 'Email Address']
+
+# Simulating example input DataFrame
+# In practice, you would use: lookup = pd.read_csv(...) or similar
+lookup = pd.DataFrame(columns=columns)
+
+# Grouping and transforming
+results = []
+
+for prop_id, group in lookup.groupby("Property Reference Number (Main Address) (Property)"):
+ # Filter rows with any contact information
+ filtered = group[
+ group["Email Address"].notna() &
+ (group["Landline"].notna() | group["Mobile Phone"].notna())
+ ]
+
+ if filtered.empty:
+ continue
+
+ # Sort by presence of phone numbers (prioritize those with both)
+ filtered["contact_score"] = (
+ filtered["Landline"].notna().astype(int) +
+ filtered["Mobile Phone"].notna().astype(int)
+ )
+ filtered = filtered.sort_values("contact_score", ascending=False)
+
+ primary = filtered.iloc[0]
+ # Make sure secondary is not the same as primary
+ if not pd.isnull(primary["Mobile Phone"]):
+ secondary = filtered[
+ (filtered["Mobile Phone"] != primary["Mobile Phone"])
+ ]
+ elif not pd.isnull(primary["Landline"]):
+ secondary = filtered[
+ (filtered["Landline"] != primary["Landline"])
+ ]
+ else:
+ raise Exception("Look at me")
+
+ secondary = filtered.iloc[1] if len(filtered) > 1 else None
+
+ results.append({
+ "Property ID": prop_id,
+ "Primary Email": primary["Email Address"],
+ "Primary Phone": primary["Mobile Phone"] or primary["Landline"],
+ "Secondary Email": secondary["Email Address"] if secondary is not None else None,
+ "Secondary Phone": secondary["Mobile Phone"] or secondary["Landline"] if secondary is not None else None,
+ })
+
+final_df = pd.DataFrame(results)
+
+import ace_tools as tools;
+
+tools.display_dataframe_to_user(name="Cleaned Contact Lookup", dataframe=final_df)
+
+# We set up primary and secondary phone numbers. We use mobile as the primary
+
+
+# We have duplicates, we prioritise entries, by ID, that have a email
+lookup2 = lookup.sort_values("Property Reference Number (Main Address) (Property)").drop_duplicates(
+ "Property Reference Number (Main Address) (Property)", keep="last"
+)
+
+# TODO: Get into the standardised asset list format
+# TODO: Add the deal postcode to Hubspot
+# TODO: Upload the deal postcode
diff --git a/etl/customers/ealing/fixing houses asset list.py b/etl/customers/ealing/fixing houses asset list.py
new file mode 100644
index 00000000..4a39428a
--- /dev/null
+++ b/etl/customers/ealing/fixing houses asset list.py
@@ -0,0 +1,45 @@
+import pandas as pd
+
+houses_list = pd.read_csv(
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing/Ealing BC - HOUSES(UNCHECKED).csv"
+)
+
+features = pd.read_csv(
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing/Ealing BC - HOUSES(IGNORE - FULL MAIN).csv"
+)
+features = features.drop(
+ columns=[
+ 'Archetype', 'Construction', 'Insulated', 'Material',
+ 'CIGA Check Required', 'PV, ACCESS ISSUE, SEE NOTES',
+ 'OFF GAS - ROOF ORIENTATION', 'Any further surveyor notes', 'Surveyors Name',
+ 'Unnamed: 30', 'Unnamed: 31'
+ ]
+)
+
+demolitions = pd.read_excel(
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing/Ealing - Demolished or due to be.xlsx",
+ sheet_name="Demolished or due to be"
+)
+
+inspections_data = houses_list[
+ [
+ "Property ref", "Postcode", 'Archetype', 'Construction', 'Insulated', 'Material',
+ 'CIGA Check Required', 'PV, ACCESS ISSUE, SEE NOTES',
+ 'OFF GAS - ROOF ORIENTATION', 'Any further surveyor notes', 'YET TO BE SURVEYED'
+ ]
+].rename(columns={"YET TO BE SURVEYED": "Surveyors Name"})
+
+asset_list = features.drop(
+ columns=[
+ 'Archetype', 'Construction', 'Insulated', 'Material', 'CIGA Check Required',
+ 'PV, ACCESS ISSUE, SEE NOTES', 'OFF GAS - ROOF ORIENTATION',
+ 'Any further surveyor notes', 'Surveyors Name', "Postcode"
+ ]
+).merge(
+ inspections_data,
+ how="inner",
+ on="Property ref",
+)
+
+asset_list.to_csv("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing/Ealing_rechecked_cleaned_05042025.csv",
+ index=False)
diff --git a/etl/customers/ealing/prepare_for_hubspot.py b/etl/customers/ealing/prepare_for_hubspot.py
new file mode 100644
index 00000000..8cffda57
--- /dev/null
+++ b/etl/customers/ealing/prepare_for_hubspot.py
@@ -0,0 +1,75 @@
+import numpy as np
+import pandas as pd
+from asset_list.hubspot.config import HubspotProcessStatus
+
+project_data = pd.read_excel(
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing/Hubspot/Ealing Flats Completion Tracker JW "
+ "170625.xlsx",
+ sheet_name="All_Flats"
+)
+
+project_data["hubspot_status"] = None
+project_data["hubspot_status"] = np.where(
+ (project_data["Status"] == "Submitted") & (project_data["PAS"] == "PAS2023"),
+ HubspotProcessStatus.SURVEYED_COMPLETED_SIGNED_OFF.label,
+ project_data["hubspot_status"]
+)
+project_data["hubspot_status"] = np.where(
+ (project_data["Status"] == "Submitted") & (project_data["PAS"] == "PAS2019"),
+ "SURVEYED UNDER 2019 - NEEDS RE-SURVEY",
+ project_data["hubspot_status"]
+)
+project_data["project_code"] = "EALING-FLATS-" + project_data["Block Ref"].astype(str)
+
+asset_list = pd.read_excel(
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing/Hubspot/20250707 Ealing Flats - Standardised.xlsx",
+ sheet_name="Standardised Asset List"
+)
+asset_list["landlord_property_id"] = asset_list["landlord_property_id"].astype(str)
+asset_list["incorrect_landlord_property_id"] = asset_list["incorrect_landlord_property_id"].astype(str)
+project_data["Property ref"] = project_data["Property ref"].astype(str)
+
+# We need to update the status of properties that already been surveyed
+asset_list2 = asset_list.merge(
+ project_data[["Property ref", "hubspot_status", "project_code"]],
+ how="left",
+ right_on="Property ref",
+ left_on="incorrect_landlord_property_id",
+ suffixes=("", "_project")
+)
+asset_list2["hubspot_status"] = np.where(
+ ~pd.isna(asset_list2["hubspot_status_project"]),
+ asset_list2["hubspot_status_project"],
+ asset_list2["hubspot_status"]
+)
+asset_list2["project_code"] = np.where(
+ ~pd.isna(asset_list2["project_code"]),
+ asset_list2["project_code"],
+ asset_list2["landlord_property_id"]
+)
+
+asset_list2 = asset_list2.drop(columns=["hubspot_status_project", "project_code_project"])
+asset_list2["cavity_reason"] = np.where(
+ pd.isnull(asset_list2["cavity_reason"]),
+ "Non-Intrusive Data Shows Empty Cavity: SAP Rating 55-68",
+ asset_list2["cavity_reason"]
+)
+asset_list2["solar_reason"] = None
+
+# Read in block analysis and geographical areas from standardised asset list
+block_analysis_df = pd.read_excel(
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing/Hubspot/20250707 Ealing Flats - Standardised.xlsx",
+ sheet_name="Block Analysis"
+)
+geographical_areas = pd.read_excel(
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing/Hubspot/20250707 Ealing Flats - Standardised.xlsx",
+ sheet_name="Geographical Areas"
+)
+
+# Update the new standardised asset list
+filename = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing/Hubspot/20250707 Ealing Flats - Prepared "
+ "programme.xlsx")
+with pd.ExcelWriter(filename) as writer:
+ asset_list2.to_excel(writer, sheet_name="Standardised Asset List", index=False)
+ block_analysis_df.to_excel(writer, sheet_name="Block Analysis", index=False)
+ geographical_areas.to_excel(writer, sheet_name="Geographical Areas", index=False)
diff --git a/etl/customers/mhs/new_programme.py b/etl/customers/mhs/new_programme.py
new file mode 100644
index 00000000..6f1caafe
--- /dev/null
+++ b/etl/customers/mhs/new_programme.py
@@ -0,0 +1,116 @@
+#
+import pandas as pd
+
+asset_list = pd.read_excel(
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/New programme/12052025 MHS Standardised Asset List - "
+ "programme.xlsx",
+ sheet_name="Standardised Asset List"
+)
+
+new_cavity_programme = pd.read_excel(
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/New programme/12052025 MHS Standardised Asset List - "
+ "programme.xlsx",
+ sheet_name="New Cavity Programme"
+)
+
+new_cavity_pilot = pd.read_excel(
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/New programme/12052025 MHS Standardised Asset List - "
+ "programme.xlsx",
+ sheet_name="Empty Cavity Pilot"
+)
+
+new_solar_programme = pd.read_excel(
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/New programme/12052025 MHS Standardised Asset List - "
+ "programme.xlsx",
+ sheet_name="New Solar Programme"
+)
+
+in_fill_properties_houses = pd.read_excel(
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/New programme/Domna updated programme list May 2025 ("
+ "1).xlsx",
+ sheet_name="Houses and Bungalows"
+)
+in_fill_properties_flats = pd.read_excel(
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/New programme/Domna updated programme list May 2025 ("
+ "1).xlsx",
+ sheet_name="Flats and Maistonettes"
+)
+# Q1) What are these properties? Do we have them on our list already?
+# All of the houses are already in the asset list
+in_fill_properties_houses["is_in_asset_list"] = in_fill_properties_houses["UPRN"].isin(
+ asset_list["landlord_property_id"].values
+)
+# All of the flats are already in the asset list
+in_fill_properties_flats["is_in_asset_list"] = in_fill_properties_flats["UPRN"].isin(
+ asset_list["landlord_property_id"].values
+)
+
+# Q2) Which properties are excluded from the new programme?
+in_fill_properties = pd.concat(
+ [in_fill_properties_houses, in_fill_properties_flats], ignore_index=True, sort=False
+)
+
+# Merge on the data
+in_fill_properties = in_fill_properties.merge(
+ asset_list,
+ left_on="UPRN",
+ right_on="landlord_property_id",
+ how="left"
+)
+# How many properties are in the new programme?
+
+in_fill_properties["in_new_cavity_programme"] = in_fill_properties["UPRN"].isin(
+ new_cavity_programme["landlord_property_id"].values
+)
+in_fill_properties["in_new_solar_programme"] = in_fill_properties["UPRN"].isin(
+ new_solar_programme["landlord_property_id"].values
+)
+in_fill_properties["in_new_cavity_pilot"] = in_fill_properties["UPRN"].isin(
+ new_cavity_pilot["landlord_property_id"].values
+)
+not_in_new_programme = in_fill_properties[
+ (~in_fill_properties["in_new_cavity_programme"] & ~in_fill_properties["in_new_solar_programme"] & ~
+ in_fill_properties["in_new_cavity_pilot"])
+].copy()
+
+# Why?
+not_in_new_programme["cavity_reason"].value_counts()
+not_in_new_programme["solar_reason"].value_counts()
+
+not_identified_for_anything = not_in_new_programme[
+ pd.isnull(not_in_new_programme["cavity_reason"]) &
+ pd.isnull(not_in_new_programme["solar_reason"])
+ ]
+
+# Flag the potential re-inspections which is 994 properties though any extractions we need to consider the HA funding
+# the extraction
+not_in_new_programme["funded_extractions"] = not_in_new_programme["cavity_reason"].isin(
+ [
+ "Non-Intrusive Data Shows Cavity Extraction: SAP Rating 69-75",
+ "Non-Intrusive Data Shows Cavity Extraction: SAP Rating 55-68",
+ "Non-Intrusive Data Shows Cavity Extraction: SAP Rating 76 or more",
+ "Non-Intrusive Data Shows Cavity Extraction: SAP Rating 54 or less",
+ "EPC Shows Empty Cavity, inspections show non-cavity build: SAP Rating 76 or more",
+ "EPC Shows Empty Cavity, inspections show non-cavity build: SAP Rating 54 or less",
+ "EPC Shows Empty Cavity, inspections show retro drilled: SAP Rating 54 or less",
+ "EPC Shows Empty Cavity, inspections show retro drilled: SAP Rating 76 or more",
+ ]
+)
+
+not_in_new_programme["excluded"] = not_identified_for_anything["landlord_property_id"].isin(
+ not_identified_for_anything["landlord_property_id"].values
+)
+
+not_in_new_programme[
+ not_in_new_programme["funded_extractions"]
+].to_csv(
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/New programme/funded_extractions.csv",
+ index=False
+)
+
+not_in_new_programme[
+ not_in_new_programme["excluded"] == True
+ ].to_csv(
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/New programme/excluded_properties.csv",
+ index=False
+)
diff --git a/etl/customers/ncha/portfolio.py b/etl/customers/ncha/portfolio.py
new file mode 100644
index 00000000..f47c87c8
--- /dev/null
+++ b/etl/customers/ncha/portfolio.py
@@ -0,0 +1,14 @@
+import pandas as pd
+
+cavity = pd.read_excel(
+ "/Users/khalimconn-kowlessar/Downloads/Energy Information MASTER June 2025 - Standardised.xlsx",
+ sheet_name="Cavity Properties (for review)",
+)
+solar = pd.read_excel(
+ "/Users/khalimconn-kowlessar/Downloads/Energy Information MASTER June 2025 - Standardised.xlsx",
+ sheet_name="Solar Properties",
+)
+
+cavity_al = cavity[["domna_address_1", "domna_postcode", "epc_os_uprn"]].rename(
+ columns={"domna_address_1": "address", "domna_postcode": "postcode", "epc_os_uprn": "uprn"}
+)
diff --git a/etl/customers/plus dane/prepare_asset_list.py b/etl/customers/plus dane/prepare_asset_list.py
new file mode 100644
index 00000000..430c7b5a
--- /dev/null
+++ b/etl/customers/plus dane/prepare_asset_list.py
@@ -0,0 +1,48 @@
+"""
+July 2025, this script prepares the asset list for Plus Dane
+"""
+import pandas as pd
+
+oldest_asset_list = pd.read_excel(
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Plus Dane/New Programme July 2025/PLUS DANE Asset List.xlsx"
+)
+solar_asset_list = pd.read_excel(
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Plus Dane/New Programme July 2025/Plus Dane - potential "
+ "PV List 04.03.2025.xlsx"
+)
+newest_asset_list = pd.read_excel(
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Plus Dane/New Programme July 2025/Sava Intelligent Energy "
+ "- Property List - March 2025.xlsx"
+)
+
+old_missed = oldest_asset_list[~oldest_asset_list["UPRN"].isin(newest_asset_list["UPRN"])]
+solar_missed = solar_asset_list[~solar_asset_list["UPRN"].isin(newest_asset_list["UPRN"])] # Empty
+
+# Build new asset list
+# NEWEST
+# 'UPRN', 'Address', 'Postcode', 'Town', 'EPC SAP Band', 'SAP Rating',
+# 'CO₂ Emissions', 'EPC EI Band', 'Data Quality Indicator',
+# 'Results Calculated', 'Property Age', 'Property Type', 'Built Form',
+# 'Wall Construction', 'Wall Insulation', 'Roof Construction',
+# 'Joist Insulation', 'Space Heating System', 'Space Heating Fuel'
+#
+# SOlAR
+
+df = newest_asset_list.merge(
+ solar_asset_list, how="left", on="UPRN", suffixes=("", "_solar"),
+).merge(
+ oldest_asset_list, how="left", on="UPRN", suffixes=("", "_old")
+)
+df["asset_list_versiion"] = "July 2025"
+old_missed["asset_list_versiion"] = "Historic"
+
+# Append on the old missed?
+df = pd.concat(
+ [df, old_missed], ignore_index=True, sort=False
+)
+# Store excel
+df.to_excel(
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Plus Dane/New Programme July 2025/Plus Dane Asset List "
+ "July 2025.xlsx",
+ index=False,
+)
diff --git a/etl/customers/remote_assessments/app.py b/etl/customers/remote_assessments/app.py
index a8805a71..df4a16fe 100644
--- a/etl/customers/remote_assessments/app.py
+++ b/etl/customers/remote_assessments/app.py
@@ -4,7 +4,7 @@ from dotenv import load_dotenv
from utils.s3 import save_csv_to_s3
from etl.find_my_epc.AssetListEpcData import AssetListEpcData
-PORTFOLIO_ID = 141
+PORTFOLIO_ID = 212
USER_ID = 8
load_dotenv(dotenv_path="backend/.env")
@@ -17,25 +17,15 @@ def app():
:return:
"""
- asset_list = [
- {
- "address": "196 Merrow Street",
- "postcode": "SE17 2NP",
- "uprn": 200003423454,
- "patch": True
- },
- {
- "address": "65 Liverpool Grove",
- "postcode": "SE17 2HP",
- "uprn": 200003423194
- },
- {
- "address": "2 Brettell Street",
- "postcode": "SE17 2NZ",
- "uprn": 200003423607
- },
- ]
- asset_list = pd.DataFrame(asset_list)
+ asset_list = pd.read_excel(
+ "/Users/khalimconn-kowlessar/Downloads/Energy Information MASTER June 2025 - Standardised.xlsx",
+ sheet_name="Solar Properties",
+ )
+ asset_list = asset_list[~asset_list["estimated"]]
+ asset_list["domna_address_1"] = asset_list["domna_address_1"].astype(str)
+ asset_list = asset_list[["domna_address_1", "domna_postcode", "epc_os_uprn"]].rename(
+ columns={"domna_address_1": "address", "domna_postcode": "postcode", "epc_os_uprn": "uprn"}
+ )
# Store the asset list in s3
filename = f"{USER_ID}/{PORTFOLIO_ID}/asset_list.csv"
@@ -98,14 +88,15 @@ def app():
"portfolio_id": str(PORTFOLIO_ID),
"housing_type": "Private",
"goal": "Increasing EPC",
- "goal_value": "C",
+ "goal_value": "A",
"trigger_file_path": filename,
"already_installed_file_path": "",
"patches_file_path": patches_filename,
"non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
- "valuation_file_path": valuation_filename,
+ "valuation_file_path": "",
"scenario_name": "Full package remote assessment",
"multi_plan": True,
"budget": None,
+ "inclusions": ["cavity_wall_insulation", "ventilation"]
}
print(body)
diff --git a/etl/customers/thrive/Make Insepctions route.py b/etl/customers/thrive/Make Insepctions route.py
new file mode 100644
index 00000000..ec4f620b
--- /dev/null
+++ b/etl/customers/thrive/Make Insepctions route.py
@@ -0,0 +1,40 @@
+"""
+This script will pull in properties, in neighbouring areas, that have been flagged for CWI
+"""
+import pandas as pd
+
+asset_list = pd.read_excel(
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/July 2025 Inspections/Thrive Programme - "
+ "reconciled.xlsx",
+ sheet_name="Standardised Asset List"
+)
+
+cavity_areas = pd.read_excel(
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/July 2025 Inspections/Thrive Programme - "
+ "reconciled.xlsx",
+ sheet_name="Cavity Areas"
+)
+
+existing_inspections_sheet = pd.read_excel(
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/July 2025 Inspections/Thrive Programme - "
+ "reconciled.xlsx",
+ sheet_name="July 2025 Inspections"
+)
+
+empties = pd.read_excel(
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/July 2025 Inspections/Thrive Programme - "
+ "reconciled.xlsx",
+ sheet_name="Cavity properties - for review"
+)
+
+cavity_inspections = asset_list[
+ asset_list["domna_postcode"].isin(cavity_areas["domna_postcode"].values)
+]
+cavity_inspections = cavity_inspections[
+ ~cavity_inspections["landlord_property_id"].isin(empties["landlord_property_id"].values)
+]
+
+cavity_inspections.to_csv(
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/July 2025 Inspections/cavity_inspections.csv",
+ index=False
+)
diff --git a/etl/find_my_epc/AssetListEpcData.py b/etl/find_my_epc/AssetListEpcData.py
index f085c8fb..2ff9a3e0 100644
--- a/etl/find_my_epc/AssetListEpcData.py
+++ b/etl/find_my_epc/AssetListEpcData.py
@@ -1,3 +1,4 @@
+import random
import time
import pandas as pd
from tqdm import tqdm
@@ -27,6 +28,7 @@ class AssetListEpcData:
self.extracted_data = None
self.non_invasive_recommendations = None
self.patches = None
+ self.epc_data = None
@staticmethod
def check_asset_list(asset_list):
@@ -49,7 +51,7 @@ class AssetListEpcData:
"uprn": r.get("uprn"),
"address": r["address"],
"postcode": r["postcode"],
- "recommendations": r["recommendations"]
+ "recommendations": r.get("recommendations")
} for r in self.extracted_data
]
@@ -74,7 +76,9 @@ class AssetListEpcData:
# Pull the additional data
extracted_data = []
+ epc_data = []
for _, home in tqdm(self.asset_list.iterrows(), total=len(self.asset_list)):
+
add1 = home["address"]
pc = home["postcode"]
# Retrieve the EPC data
@@ -92,9 +96,6 @@ class AssetListEpcData:
if epc_searcher.newest_epc is None:
continue
- if not pd.isnull(home.get("patch")):
- epc_searcher.newest_epc["address1"] = add1
-
# Attempt both methods:
try:
find_epc_searcher = RetrieveFindMyEpc(
@@ -104,16 +105,37 @@ class AssetListEpcData:
find_epc_data = find_epc_searcher.retrieve_newest_find_my_epc_data()
except Exception as e:
logger.error(f"Error retrieving find my epc data: {e}")
- find_epc_searcher = RetrieveFindMyEpc(
- address=epc_searcher.newest_epc["address1"],
- postcode=epc_searcher.newest_epc["postcode"]
- )
- find_epc_data = find_epc_searcher.retrieve_newest_find_my_epc_data()
- time.sleep(0.5)
+ if not pd.isnull(home.get("patch")):
+ epc_searcher.newest_epc["address1"] = add1
+
+ try:
+ find_epc_searcher = RetrieveFindMyEpc(
+ address=epc_searcher.newest_epc["address1"],
+ postcode=epc_searcher.newest_epc["postcode"]
+ )
+ find_epc_data = find_epc_searcher.retrieve_newest_find_my_epc_data()
+ except Exception as e:
+ logger.error("Error retrieving find my epc data with alternative address format: {e}")
+ find_epc_data = {
+ "current_epc_rating": epc_searcher.newest_epc["current-energy-rating"],
+ "current_epc_efficiency": epc_searcher.newest_epc["current-energy-efficiency"],
+ "potential_epc_rating": None,
+ "potential_epc_efficiency": None,
+ "epc_data": {}
+ }
+
+ # Sleep for a random amount of time between 0.5 and 1 seconds to avoid hitting the API rate limit
+ time.sleep(random.sample(range(50, 100), 1)[0] / 100)
+
+ # Every 50 requests, we sleep for 10 seconds to avoid hitting the API rate limit
+ if len(extracted_data) % 50 == 0 and len(extracted_data) > 0:
+ logger.info("Sleeping for 10 seconds to avoid hitting API rate limit")
+ time.sleep(10)
+
# We need uprn
to_append = {
- "uprn": home.get("uprn"),
+ "uprn": home.get("uprn", epc_searcher.newest_epc["uprn"]),
"address": home["address"],
"postcode": home["postcode"],
**find_epc_data,
@@ -128,6 +150,8 @@ class AssetListEpcData:
}
extracted_data.append(to_append)
+ epc_data.append(epc_searcher.newest_epc)
self.extracted_data = extracted_data
+ self.epc_data = epc_data
logger.info("Data Extrction complete")
diff --git a/etl/find_my_epc/RetrieveFindMyEpc.py b/etl/find_my_epc/RetrieveFindMyEpc.py
index fad0c78e..50955377 100644
--- a/etl/find_my_epc/RetrieveFindMyEpc.py
+++ b/etl/find_my_epc/RetrieveFindMyEpc.py
@@ -1,3 +1,4 @@
+import time
import re
import pandas as pd
import requests
@@ -55,9 +56,11 @@ class RetrieveFindMyEpc:
results = {}
# 1. Total floor area
- results['total-floor-area'] = int(self.get_text(
+ # We have some isntances of very old EPCs where the total floor area is not available
+ tfa = self.get_text(
soup.find("dt", string="Total floor area").find_next_sibling("dd")
- ).split(" ")[0])
+ ).split(" ")[0]
+ results['total-floor-area'] = int(tfa) if tfa != "Not" else None
# Table with features
rows = soup.select("table.govuk-table tbody tr")
@@ -125,9 +128,156 @@ class RetrieveFindMyEpc:
return results
- def retrieve_newest_find_my_epc_data(self, sap_2012_date=None):
+ def _extract_epc_from_soup(self, soup, epc_certificate, sap_2012_date=None):
+
+ ratings = soup.find('desc', {'id': 'svg-desc'}).text
+ current_rating = ratings.split(".")[0]
+ potential_rating = ratings.split(".")[1]
+ current_sap = int(current_rating.split(' ')[-1])
+
+ # Retrieve the energy consumption
+ bills = soup.find('div', {'id': 'bills-affected'})
+ bills_list = bills.find_all('li')
+ if not bills_list:
+ # If this is the case, it's usually becaue the EPC was very old. Early EPCs did not have this information
+ heating_text = None
+ hot_water_text = None
+ else:
+ heating_text = bills_list[0].text
+ hot_water_text = bills_list[1].text
+
+ # Retrieve the recommendations and SAP points
+ recommendations = []
+ recommendations_div = soup.find('div', class_='epb-recommended-improvements')
+ if recommendations_div:
+ # Find all h3 headers for each step and extract their related information
+ step_headers = recommendations_div.find_all('h3', class_='govuk-heading-m')
+ previous_sap_score = current_sap
+ previous_epc = current_rating.split(' ')[-6]
+ for step_num, step_header in enumerate(step_headers, start=1):
+ # Extract the step title (the measure)
+ measure_title = step_header.text.strip().replace(f"Step {step_num}: ", "")
+
+ # Find the div containing the potential rating within the same section
+ potential_rating_div = step_header.find_next(
+ 'div', class_='epb-recommended-improvements__potential-rating'
+ )
+
+ # Check if the potential rating div is found
+ if potential_rating_div:
+ # Extract the rating text within the SVG text element
+ extracted_rating_text = potential_rating_div.find('text', class_='govuk-!-font-weight-bold')
+ if extracted_rating_text is not None:
+ rating_text = extracted_rating_text.text.strip()
+ else:
+ rating_text = " ".join([str(previous_sap_score), previous_epc])
+ # Parse the rating text to separate the numeric rating and EPC letter
+ new_rating = int(rating_text.split()[0])
+ new_epc = rating_text.split()[1]
+
+ # Append the information as a dictionary to the recommendations list
+ recommendations.append({
+ "step": step_num,
+ "measure": measure_title,
+ "new_rating": new_rating,
+ "new_epc": new_epc,
+ "sap_points": new_rating - previous_sap_score
+ })
+ previous_sap_score = new_rating
+ previous_epc = new_epc
+
+ # Search for the assessment informaton
+ assessment_information = soup.find('div', {'id': 'information'})
+ # Parse this information
+ rows = assessment_information.find_all('div', class_='govuk-summary-list__row')
+ # Create a dictionary to hold the parsed information
+ assessment_data = {}
+ for row in rows:
+ key = row.find('dt').text.strip()
+ if key == "Type of assessment":
+ # We dont reliably extract this
+ continue
+ value_tag = row.find('dd')
+
+ # Check if value contains a link (email)
+ if value_tag.find('a'):
+ value = value_tag.find('a').text.strip()
+ elif value_tag.find('summary'):
+ value = value_tag.find('span').text.strip()
+ else:
+ value = value_tag.text.strip()
+
+ # These are keys that we have for both the surveyor and the acreditation scheme. Firstly, we'll
+ # get the surveyor's name and email so we make that information clear
+ if key in ["Telephone", "Email"]:
+ if "Assessor's " + key not in assessment_data:
+ assessment_data["Assessor's " + key] = value
+ else:
+ assessment_data["Accreditation Scheme's " + key] = value
+ continue
+
+ assessment_data[key] = value
+
+ expected_keys = [
+ 'Assessor’s name',
+ "Assessor's Telephone",
+ "Assessor's Email",
+ 'Assessor’s ID',
+ 'Accreditation scheme',
+ 'Assessor’s declaration',
+ "Accreditation Scheme's Telephone",
+ "Accreditation Scheme's Email",
+ 'Date of assessment',
+ 'Date of certificate'
+ ]
+ # Check we have all the expected keys
+ for key in expected_keys:
+ if key not in assessment_data:
+ raise ValueError(f"Missing key: {key}")
+
+ # The wall types of the property
+ property_features_table = soup.find("tbody", class_="govuk-table__body")
+ property_features_table = property_features_table.find_all("tr")
+
+ # Extract wall types
+ self.walls = []
+ for row in property_features_table:
+ cells = row.find_all("td")
+ if row.find("th").text.strip() == "Wall":
+ self.walls.append(cells[0].text.strip())
+
+ # Finally, we format the recommendations
+ recommendations = self.format_recommendations(recommendations, assessment_data, sap_2012_date)
+
+ # 4) Low and zero carbon energy sources
+ low_carbon_energy_sources = self.extract_low_carbon_sources(soup)
+
+ # 5) Pull out the EPC data
+ epc_data = self.extract_epc_data(soup)
+
+ resulting_data = {
+ 'epc_certificate': epc_certificate,
+ 'current_epc_rating': current_rating.split(' ')[-6],
+ 'current_epc_efficiency': current_sap,
+ 'potential_epc_rating': potential_rating.split(' ')[-6],
+ "potential_epc_efficiency": int(potential_rating.split(' ')[-1]),
+ "heating_text": heating_text,
+ "hot_water_text": hot_water_text,
+ "recommendations": recommendations,
+ "epc_data": epc_data,
+ **assessment_data,
+ **low_carbon_energy_sources,
+ }
+
+ return resulting_data
+
+ def retrieve_all_find_my_epc_data(self, sap_2012_date=None):
+
"""
- For a post code and address, we pull out all the required data from the find my epc website
+ This is a quick function to retrieve all the data from the find my epc website for a given postcode and address.
+ Using this to fulfill a short term need to retrieve all history for a property
+ :param sap_2012_date:
+ :return:
"""
postcode_input = self.postcode.replace(" ", "+")
@@ -182,6 +332,98 @@ class RetrieveFindMyEpc:
address_response = requests.get(chosen_epc, headers=self.HEADERS)
address_res = BeautifulSoup(address_response.text, features="html.parser")
+ # We check the section on "Other cerificates for this property and get the url"
+ # Find the section for other certificates
+ other_cert_section = address_res.find('div', id='other_certificates_and_reports')
+
+ # Extract all certificate number rows (anchor tags within a govuk-summary-list)
+ other_cert_links = other_cert_section.select('dd.govuk-summary-list__value a')
+
+ other_certificates = []
+ for link in other_cert_links:
+ cert_number = link.text.strip()
+ cert_url = link['href'].strip()
+ other_certificates.append({
+ "certificate_number": cert_number,
+ "certificate_url": f"https://find-energy-certificate.service.gov.uk{cert_url}"
+ })
+
+ # Always include the currently selected EPC first
+ soup_list = [address_res]
+
+ # Add additional historic certificates
+ for link in other_cert_links:
+ cert_url = f"https://find-energy-certificate.service.gov.uk{link['href'].strip()}"
+ response = requests.get(cert_url, headers=self.HEADERS)
+ time.sleep(0.3)
+ soup_list.append(BeautifulSoup(response.text, features="html.parser"))
+
+ all_find_my_epc_data = []
+ for soup in soup_list:
+ # Start with the primary one
+ all_find_my_epc_data.append(self._extract_epc_from_soup(soup, epc_certificate, sap_2012_date))
+
+ return all_find_my_epc_data
+
+ def retrieve_newest_find_my_epc_data(self, sap_2012_date=None):
+ """
+ For a post code and address, we pull out all the required data from the find my epc website
+ """
+
+ postcode_input = self.postcode.replace(" ", "+")
+ postcode_search = self.SEARCH_POSTCODE_URL.format(postcode_input=postcode_input)
+ postcode_response = requests.get(postcode_search, headers=self.HEADERS)
+
+ postcode_res = BeautifulSoup(postcode_response.text, features="html.parser")
+ rows = postcode_res.find_all('tr', class_='govuk-table__row')
+
+ extracted_table = []
+ for row in rows:
+ # Extract the address and URL
+ address_tag = row.find('a', class_='govuk-link')
+ if address_tag is None:
+ continue
+ extracted_address = None
+ extracted_address_url = None
+ if address_tag:
+ extracted_address = address_tag.text.strip()
+ extracted_address_url = address_tag['href']
+
+ extracted_address_cleaned = (
+ extracted_address.replace(",", "").replace(" ", "").lower()
+ )
+ if not extracted_address_cleaned.startswith(self.address_cleaned):
+ continue
+
+ # If the address is a match, we can extract the data
+
+ # Extract the expiry date
+ expiry_date_tag = row.find('td', class_='govuk-table__cell date')
+ expiry_date = None
+ if expiry_date_tag is not None:
+ expiry_date = expiry_date_tag.parent.find('span').text.strip()
+
+ extracted_table.append(
+ {
+ "extracted_address": extracted_address,
+ "extracted_address_url": extracted_address_url,
+ "expiry_date": datetime.strptime(expiry_date, '%d %B %Y'),
+ }
+ )
+
+ if not extracted_table:
+ raise ValueError("No EPC found")
+
+ if len(extracted_table) > 1:
+ # We take the one with the most recent expiry date
+ extracted_table = sorted(extracted_table, key=lambda x: x['expiry_date'], reverse=True)
+
+ chosen_epc = self.BASE_ENERGY_URL + extracted_table[0]['extracted_address_url']
+ epc_certificate = chosen_epc.split('/')[-1]
+
+ address_response = requests.get(chosen_epc, headers=self.HEADERS)
+ address_res = BeautifulSoup(address_response.text, features="html.parser")
+
# Key data we want to retrieve:
# 1) Rating
# 2) Bills estimates
@@ -195,9 +437,6 @@ class RetrieveFindMyEpc:
potential_rating = ratings.split(".")[1]
current_sap = int(current_rating.split(' ')[-1])
- # Floor area
- address_res.find()
-
# Retrieve the energy consumption
bills = address_res.find('div', {'id': 'bills-affected'})
bills_list = bills.find_all('li')
@@ -432,6 +671,13 @@ class RetrieveFindMyEpc:
"Condensing boiler (separate from the range cooker)": ["boiler_upgrade"],
"Heating controls (programmer and thermostatic radiator valves)": [
"roomstat_programmer_trvs", "time_temperature_zone_control"
+ ],
+ 'Heating controls (programmer room thermostat and thermostatic radiator valves)': [
+ "roomstat_programmer_trvs", "time_temperature_zone_control"
+ ],
+ "Internal wall insulation": ["internal_wall_insulation"],
+ "High heat retention storage heaters and dual immersion cylinder and dual rate meter": [
+ "high_heat_retention_storage_heater"
]
}
@@ -466,8 +712,13 @@ class RetrieveFindMyEpc:
find_epc_data = searcher.retrieve_newest_find_my_epc_data()
except Exception as e:
logger.error(f"Error retrieving find my epc data: {e}")
+ if epc["address1"] == epc["address"]:
+ # There's no benefit of using the same address, so we split on comma
+ address1 = epc["address"].split(",")[0]
+ else:
+ address1 = epc["address1"]
# We attempt with the backup add
- searcher = cls(address=epc["address1"], postcode=epc["postcode"])
+ searcher = cls(address=address1, postcode=epc["postcode"])
find_epc_data = searcher.retrieve_newest_find_my_epc_data()
non_invasive_recommendations = {
diff --git a/recommendations/Costs.py b/recommendations/Costs.py
index 96eb5d0e..0ef37add 100644
--- a/recommendations/Costs.py
+++ b/recommendations/Costs.py
@@ -194,7 +194,7 @@ class Costs:
IWI_CONTINGENCY = 0.2
# For air source heat pumps, we inflate the assume cost by quite a bit to account for design and installation
- ASHP_CONTINGENCY = 0.35
+ ASHP_CONTINGENCY = 0.25
# Where there is more uncertainty, a higher contingency rate is used
HIGH_RISK_CONTINGENCY = 0.2
# When there is less uncertainty, a lower contingency rate is used
@@ -871,10 +871,10 @@ class Costs:
if needs_cylinder:
# 1000 is the cost of a new hot water cylinder
- total_cost = 1200 * number_heated_rooms + 1000
+ total_cost = 1300 * number_heated_rooms + 1000
else:
# 500 is the cost of a dual immersion heater - a rough estimate
- total_cost = 1200 * number_heated_rooms + 500
+ total_cost = 1300 * number_heated_rooms + 500
subtotal_before_vat = total_cost / (1 + self.VAT_RATE)
vat = total_cost - subtotal_before_vat
diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py
index 18e1110b..9d1a094e 100644
--- a/recommendations/HeatingRecommender.py
+++ b/recommendations/HeatingRecommender.py
@@ -57,6 +57,31 @@ class HeatingRecommender:
},
# These are the heating types we need to produce a dual heating recommendation
"dual": None
+ },
+ 'Electric underfloor heating, electric storage heaters': {
+ # For this, we would recommend a heat pump
+ "dual": None
+ },
+ "Room heaters, electric, boiler and radiators, mains gas": {
+ "hhr": {
+ "mainheating_description": "Electric storage heaters, radiators",
+ "recommendation_description": "Install high heat retention electric storage heaters.",
+ "controls_prefix": ""
+ },
+ "boiler": {
+ "mainheating_description": "Boiler and radiators, mains gas",
+ "recommendation_description": "Upgrade to a new condensing boiler.",
+ "controls_suffix": ""
+ },
+ "dual": None
+ },
+ "Room heaters, electric, electric storage heaters": {
+ "hhr": {
+ "mainheating_description": "Electric storage heaters, radiators",
+ "recommendation_description": "Install high heat retention electric storage heaters.",
+ "controls_prefix": ""
+ },
+ "dual": None
}
}
@@ -109,6 +134,10 @@ class HeatingRecommender:
hhr_suitable = no_mains or self.has_electric_heating_description or self.has_room_heaters
+ hhr_suitable = hhr_suitable and (
+ "underfloor heating" not in self.property.main_heating["clean_description"]
+ )
+
return (
hhr_suitable and (not ashp_only_heating_recommendation) and not self.has_ashp and
("high_heat_retention_storage_heater" in measures)
@@ -165,7 +194,8 @@ class HeatingRecommender:
) and
(not ashp_only_heating_recommendation) and
("boiler_upgrade" in measures) and
- (not self.has_ashp)
+ (not self.has_ashp) and
+ (not self.property.main_heating["has_warm_air"])
)
return is_valid, has_gas_boiler
@@ -487,17 +517,30 @@ class HeatingRecommender:
]
# This is a map from the heating controls description to the description of the air source heat pump set up
- ashp_descriptions = {
- "Time and temperature zone control": (
- f"Install a {ashp_size}KW air source heat pump, and upgrade heating controls to Smart Thermostats, "
- "room sensors and smart radiator valves (time & temperature zone control). Ensure you have an 18 or "
- "24 hour tariff"
- ),
- "Programmer, TRVs and bypass": (
- f"Install a {ashp_size}KW air source heat pump, with programmer, TRVs and a Bypass valve. Ensure you "
- "have an 18 or 24 hour tariff"
- ),
- }
+ if ashp_size is None:
+ ashp_descriptions = {
+ "Time and temperature zone control": (
+ f"Install two cascaded air source heat pumps, and upgrade heating controls to Smart Thermostats, "
+ "room sensors and smart radiator valves (time & temperature zone control). Ensure you have an 18 "
+ "or "
+ "24 hour tariff"
+ )
+ }
+ else:
+
+ ashp_descriptions = {
+ "Time and temperature zone control": (
+ f"Install a {ashp_size}KW air source heat pump, and upgrade heating controls to Smart Thermostats, "
+ "room sensors and smart radiator valves (time & temperature zone control). Ensure you have an 18 "
+ "or "
+ "24 hour tariff"
+ ),
+ "Programmer, TRVs and bypass": (
+ f"Install a {ashp_size}KW air source heat pump, with programmer, TRVs and a Bypass valve. Ensure "
+ f"you "
+ "have an 18 or 24 hour tariff"
+ ),
+ }
new_heating_description = "Air source heat pump, radiators, electric"
new_hot_water_description = "From main system"
@@ -924,6 +967,7 @@ class HeatingRecommender:
return recommendations
self.heating_recommendations.extend(recommendations)
+ return None
@staticmethod
def estimate_boiler_size(property_type, built_form, floor_area, floor_height, num_heated_rooms):
diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py
index 0e73cffe..462d43aa 100644
--- a/recommendations/Recommendations.py
+++ b/recommendations/Recommendations.py
@@ -679,7 +679,7 @@ class Recommendations:
# Handle the case of community schemes
if (heating_description == "Community scheme") or (hotwater_description == "Community scheme"):
- if main_fuel_description == "mains gas (community)":
+ if main_fuel_description in ["mains gas (community)", "UNKNOWN"]:
return {
"heating_fuel_type": "Natural Gas (Community Scheme)",
"hotwater_fuel_type": "Natural Gas (Community Scheme)",
diff --git a/recommendations/RoofRecommendations.py b/recommendations/RoofRecommendations.py
index fa8b831c..31ac2433 100644
--- a/recommendations/RoofRecommendations.py
+++ b/recommendations/RoofRecommendations.py
@@ -191,11 +191,22 @@ class RoofRecommendations:
non_invasive_recommendations = self.property.non_invasive_recommendations
+ # We check a specific condition - which will imply loft insulation isn't appropriate but room in roof
+ # insulation is
+ # 1) We have an uninsulated loft (assumed)
+ # 2) We have a non-intrusive recommendation for room in roof insulation
+
+ rir_over_loft = (
+ self.property.roof["is_pitched"] and
+ self.property.roof["insulation_thickness"] == "none" and
+ "room_in_roof_insulation" in [x["type"] for x in non_invasive_recommendations]
+ )
+
# We firstly handle non-intrusive recommendations, which may override the normal roof insulation recommendations
if ("loft_insulation" in [x["type"] for x in non_invasive_recommendations]) or (
self.property.roof["is_pitched"] and "loft_insulation" in measures and
not self.property.roof["is_at_rafters"]
- ):
+ ) and not rir_over_loft:
self.recommend_roof_insulation(
u_value=u_value,
insulation_thickness=self.insulation_thickness,
@@ -223,7 +234,8 @@ class RoofRecommendations:
# There are cases where the property might have a room roof as the second roof, but we have a recommendation for
# it, so we allow this override
if self.property.roof["is_roof_room"] and ("room_roof_insulation" in measures) or (
- "room_roof_insulation" in [x["type"] for x in non_invasive_recommendations]
+ "room_roof_insulation" in [x["type"] for x in non_invasive_recommendations] or
+ rir_over_loft
):
self.recommend_room_roof_insulation(u_value, phase, default_u_values)
return
@@ -502,7 +514,7 @@ class RoofRecommendations:
# and the cost of the materials
rir_non_invasive_recommendation = next(
- (x for x in self.property.non_invasive_recommendations if x["type"] == "room_roof_insulation"), {}
+ (x for x in self.property.non_invasive_recommendations if x["type"] == "room_in_roof_insulation"), {}
)
insulation_materials = pd.DataFrame(self.room_roof_insulation_materials)
diff --git a/recommendations/VentilationRecommendations.py b/recommendations/VentilationRecommendations.py
index a82e4df5..05113acf 100644
--- a/recommendations/VentilationRecommendations.py
+++ b/recommendations/VentilationRecommendations.py
@@ -10,11 +10,6 @@ class VentilationRecommendations(Definitions):
crucial for prevent overheating risks in warmer months
"""
- VENTILATION_DESCRIPTIONS = [
- 'mechanical, extract only',
- 'mechanical, supply and extract'
- ]
-
def __init__(
self,
property_instance: Property,
@@ -26,9 +21,6 @@ class VentilationRecommendations(Definitions):
self.recommendation = None
self.materials = [part for part in materials if part["type"] == "mechanical_ventilation"]
- def identify_ventilation(self):
- self.has_ventilaion = self.property.data["mechanical-ventilation"] in self.VENTILATION_DESCRIPTIONS
-
def recommend(self, phase):
"""
If there is no ventilation, we recommend installing ventilation
@@ -38,8 +30,8 @@ class VentilationRecommendations(Definitions):
:return:
"""
- self.identify_ventilation()
- if self.has_ventilaion:
+ self.property.identify_ventilation()
+ if self.property.has_ventilation:
return
if len(self.materials) != 1:
diff --git a/recommendations/optimiser/optimiser_functions.py b/recommendations/optimiser/optimiser_functions.py
index 05b9ec42..6909a3f0 100644
--- a/recommendations/optimiser/optimiser_functions.py
+++ b/recommendations/optimiser/optimiser_functions.py
@@ -47,19 +47,19 @@ def prepare_input_measures(property_recommendations, goal, needs_ventilation):
# We bundle the impact of ventilation with the measure
total = (
rec["total"] + ventilation_recommendation["total"]
- if rec["type"] in assumptions.measures_needing_ventilation
+ if rec["type"] in assumptions.measures_needing_ventilation and needs_ventilation
else rec["total"]
)
gain = (
rec[goal_key] + ventilation_recommendation[goal_key]
- if rec["type"] in assumptions.measures_needing_ventilation
+ if rec["type"] in assumptions.measures_needing_ventilation and needs_ventilation
else rec[goal_key]
)
rec_type = (
"+".join(
[rec["type"], ventilation_recommendation["type"]]
- ) if rec["type"] in assumptions.measures_needing_ventilation
+ ) if rec["type"] in assumptions.measures_needing_ventilation and needs_ventilation
else rec["type"]
)
diff --git a/sfr/principal_pitch/0_prepare_sample.py b/sfr/principal_pitch/0_prepare_sample.py
new file mode 100644
index 00000000..8150d519
--- /dev/null
+++ b/sfr/principal_pitch/0_prepare_sample.py
@@ -0,0 +1,71 @@
+"""
+This is a script for preparing a sample for testing the end to end process, so that when Spring send us
+data, we know it will work.
+"""
+
+import pandas as pd
+from utils.s3 import read_csv_from_s3
+
+birmingham_epcs = pd.read_csv(
+ "/Users/khalimconn-kowlessar/Documents/hestia/sfr/Spring JV/domestic-E08000025-Birmingham/certificates.csv"
+)
+
+# We get the newest EPC, by UPRN and LODGEMENT_DATE
+birmingham_epcs['LODGEMENT_DATE'] = pd.to_datetime(birmingham_epcs['LODGEMENT_DATE'])
+
+birmingham_epcs = birmingham_epcs.sort_values(
+ by=['UPRN', 'LODGEMENT_DATE'],
+ ascending=[True, False]
+).drop_duplicates(subset='UPRN')
+
+birmingham_epcs["postal_region"] = birmingham_epcs["POSTCODE"].str.split(" ").str[0]
+
+addressable_market = birmingham_epcs[
+ (birmingham_epcs['CURRENT_ENERGY_RATING'].isin(['F', 'G', 'E', 'D'])) &
+ (birmingham_epcs['LODGEMENT_DATE'] >= '2020-01-01') &
+ (birmingham_epcs['PROPERTY_TYPE'].isin(['House', 'Bungalow'])) &
+ (birmingham_epcs['TENURE'].isin(
+ ['rental (private)', 'Rented (private)']
+ ))
+ ]
+
+# We take the Spring portfolio and remove the properties in their sample
+asset_list = read_csv_from_s3(bucket_name="retrofit-plan-inputs-dev", filepath='8/206/asset_list.csv')
+asset_list = pd.DataFrame(asset_list)
+asset_list["postal_region"] = asset_list["postcode"].str.split(" ").str[0]
+
+addressable_market = addressable_market[
+ ~addressable_market["UPRN"].astype(int).astype(str).isin(asset_list["uprn"].values)
+]
+addressable_market = addressable_market[
+ addressable_market["postal_region"].isin(asset_list["postal_region"].unique())
+]
+
+# Take a sample of properties, EPC F or G, EPC lodged in 2025. We focus on houses/bingalows
+sample = birmingham_epcs[
+ (birmingham_epcs['CURRENT_ENERGY_RATING'].isin(['F', 'G'])) &
+ (birmingham_epcs['LODGEMENT_DATE'] >= '2025-01-01') &
+ (birmingham_epcs['PROPERTY_TYPE'].isin(['House', 'Bungalow']))
+ ]
+
+# Prepare the sample, with just the columns we would expect to receive from Spring
+# 1) UPRN
+# 2) Address
+# 3) Postcode
+# 4) Property type
+# 5) Built form
+# 6) Number of bedrooms (we'll simulate this)
+# 7) Number of bathrooms (we'll simulate this)
+# 8) Valuation (We'll simulate this, around 200,000)
+
+sample = sample[['UPRN', 'ADDRESS', 'POSTCODE', 'PROPERTY_TYPE', 'BUILT_FORM']].copy()
+sample['BEDROOMS'] = 3 # Simulating number of bedrooms
+sample['BATHROOMS'] = 1 # Simulating number of bathrooms
+sample['VALUATION'] = 200000 # Simulating valuation
+sample.columns = [x.lower() for x in sample.columns]
+
+# Store this as a excel
+sample.to_excel(
+ "/Users/khalimconn-kowlessar/Documents/hestia/sfr/Spring JV/birmingham_sample.xlsx",
+ index=False
+)
diff --git a/sfr/principal_pitch/1_prepare_data.py b/sfr/principal_pitch/1_prepare_data.py
new file mode 100644
index 00000000..53969ec9
--- /dev/null
+++ b/sfr/principal_pitch/1_prepare_data.py
@@ -0,0 +1,124 @@
+"""
+This script prepares the data for the principal pitch modelling
+"""
+import os
+import pandas as pd
+from dotenv import load_dotenv
+from utils.s3 import save_csv_to_s3
+from etl.find_my_epc.AssetListEpcData import AssetListEpcData
+
+load_dotenv(dotenv_path="backend/.env")
+EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
+PORTFOLIO_ID = 206
+USER_ID = 8
+EPC_TARGET = "C"
+
+# Read the input file
+
+properties = pd.read_excel(
+ "/Users/khalimconn-kowlessar/Documents/hestia/sfr/Spring JV/Birmingham_price_top300.xlsx"
+)
+# Keep just the D's and below
+properties = properties[properties["current_energy_rating"].isin(["D", "E", "F", "G"])].copy()
+# Focus on houses
+properties = properties[properties["property_type_std"] != "Flat"]
+properties = properties[properties["property_type"] != "flat"]
+
+# Rename the key columns
+properties = properties.rename(
+ columns={
+ "address1": "address",
+ "number_of_bathrooms": "n_bathrooms",
+ "num_beds": "n_bedrooms"
+ }
+)
+properties["patch"] = True
+
+# Pull the non-invasive recommendations
+asset_list_epc_client = AssetListEpcData(
+ asset_list=properties,
+ epc_auth_token=EPC_AUTH_TOKEN
+)
+asset_list_epc_client.get_data()
+asset_list_epc_client.get_non_invasive_recommendations()
+asset_list_epc_client.get_patch()
+
+extracted_df = pd.DataFrame(asset_list_epc_client.extracted_data)
+epc_df = pd.DataFrame(asset_list_epc_client.epc_data)
+
+# Find examples where patches are different to the api
+compare_epc = []
+for patch in asset_list_epc_client.patches:
+ extracted = extracted_df[extracted_df["uprn"] == patch["uprn"]].squeeze()
+ epc = epc_df[epc_df["uprn"] == patch["uprn"]].squeeze()
+ compare_epc.append(
+ {
+ "uprn": extracted["uprn"],
+ "address": extracted["address"],
+ "postcode": extracted["postcode"],
+ "api_epc": int(extracted["current_epc_efficiency"]),
+ "fme_epc": int(epc["current-energy-efficiency"]),
+ }
+ )
+compare_epc = pd.DataFrame(compare_epc)
+diff = compare_epc[compare_epc["api_epc"] != compare_epc["fme_epc"]]
+# Compare matched addresses to make sure they are the same
+compare_addresses = extracted_df[["address", "postcode", "uprn"]].merge(
+ epc_df[["uprn", "address1", "postcode"]].rename(columns={"address1": "epc_address1", "postcode": "epc_postcode"}),
+ how="left",
+ on=["uprn"]
+)
+
+# Add on uprn
+properties = properties.merge(
+ extracted_df[["address", "postcode", "uprn"]],
+ how="left",
+ on=["address", "postcode"]
+)
+
+# Store the asset list in s3
+filename = f"{USER_ID}/{PORTFOLIO_ID}/asset_list.csv"
+save_csv_to_s3(
+ dataframe=properties,
+ bucket_name="retrofit-plan-inputs-dev",
+ file_name=filename
+)
+
+# Store non-invasive recommendations in S3
+non_invasive_recommendations_filename = f"{USER_ID}/{PORTFOLIO_ID}/non_invasive_recommendations.csv"
+save_csv_to_s3(
+ dataframe=pd.DataFrame(asset_list_epc_client.non_invasive_recommendations),
+ bucket_name="retrofit-plan-inputs-dev",
+ file_name=non_invasive_recommendations_filename
+)
+
+# Store patches in S3
+patches_filename = ""
+if asset_list_epc_client.patches:
+ patches_filename = f"{USER_ID}/{PORTFOLIO_ID}/patches.csv"
+ save_csv_to_s3(
+ dataframe=pd.DataFrame(asset_list_epc_client.patches),
+ bucket_name="retrofit-plan-inputs-dev",
+ file_name=patches_filename
+ )
+
+body = {
+ "portfolio_id": str(PORTFOLIO_ID),
+ "housing_type": "Private",
+ "goal": "Increasing EPC",
+ "goal_value": "C",
+ "trigger_file_path": filename,
+ "already_installed_file_path": "",
+ "patches_file_path": patches_filename,
+ "non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
+ "valuation_file_path": "",
+ "scenario_name": "EPC C",
+ "multi_plan": True,
+ "budget": None,
+ "ashp_cop": 3.5,
+ # This is new - when optimising, we drop scores by a few points to account for SAP 10
+ "simulate_sap_10": True,
+ "exclusions": ["external_wall_insulation"],
+ "required_measures": ["cavity_wall_insulation", "loft_insulation"]
+}
+print(body)
diff --git a/sfr/principal_pitch/2_export_data.py b/sfr/principal_pitch/2_export_data.py
new file mode 100644
index 00000000..5660b78d
--- /dev/null
+++ b/sfr/principal_pitch/2_export_data.py
@@ -0,0 +1,224 @@
+"""
+This script prepares the data for the financial model
+"""
+
+import pandas as pd
+from backend.app.utils import sap_to_epc
+from sqlalchemy.orm import sessionmaker
+from backend.app.db.connection import db_engine
+from backend.app.db.models.recommendations import Recommendation, Plan, PlanRecommendations
+from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel
+
+PORTFOLIO_ID = 206
+SCENARIOS = [389]
+
+
+def get_data(portfolio_id, scenario_ids):
+ session = sessionmaker(bind=db_engine)()
+ session.begin()
+
+ # Get properties and their details for a specific portfolio
+ properties_query = session.query(
+ PropertyModel,
+ PropertyDetailsEpcModel
+ ).join(
+ PropertyDetailsEpcModel, PropertyModel.id == PropertyDetailsEpcModel.property_id
+ ).filter(
+ PropertyModel.portfolio_id == portfolio_id # Filter by portfolio ID
+ ).all()
+
+ # Transform properties data to include all fields dynamically
+ properties_data = [
+ {**{col.name: getattr(prop.PropertyModel, col.name) for col in PropertyModel.__table__.columns},
+ **{col.name: getattr(prop.PropertyDetailsEpcModel, col.name) for col in
+ PropertyDetailsEpcModel.__table__.columns}}
+ for prop in properties_query
+ ]
+
+ # Get property IDs from fetched properties
+
+ # Get plans linked to the fetched properties
+ plans_query = session.query(Plan).filter(Plan.scenario_id.in_(scenario_ids)).all()
+
+ # Transform plans data to include all fields dynamically
+ plans_data = [
+ {col.name: getattr(plan, col.name) for col in Plan.__table__.columns}
+ for plan in plans_query
+ ]
+
+ # Extract plan IDs for filtering recommendations through PlanRecommendations
+ plan_ids = [plan['id'] for plan in plans_data]
+
+ # Get recommendations through PlanRecommendations for those plans and that are default
+ recommendations_query = session.query(
+ Recommendation,
+ Plan.scenario_id
+ ).join(
+ PlanRecommendations, Recommendation.id == PlanRecommendations.recommendation_id
+ ).join(
+ Plan, Plan.id == PlanRecommendations.plan_id # Join with Plan to access scenario_id
+ ).filter(
+ PlanRecommendations.plan_id.in_(plan_ids),
+ Recommendation.default == True # Filtering for default recommendations
+ ).all()
+
+ # Transform recommendations data to include all fields dynamically and include scenario_id
+ recommendations_data = [
+ {**{col.name: getattr(rec.Recommendation, col.name) if hasattr(rec, 'Recommendation') else getattr(rec,
+ col.name) for
+ col in Recommendation.__table__.columns},
+ "Scenario ID": rec.scenario_id}
+ for rec in recommendations_query
+ ]
+
+ session.close()
+
+ return properties_data, plans_data, recommendations_data
+
+
+properties_data, plans_data, recommendations_data = get_data(portfolio_id=PORTFOLIO_ID, scenario_ids=SCENARIOS)
+
+properties_df = pd.DataFrame(properties_data)
+plans_df = pd.DataFrame(plans_data)
+recommendations_df = pd.DataFrame(recommendations_data)
+
+recommended_measures_df = recommendations_df[
+ ["property_id", "measure_type", "estimated_cost", "default"]
+]
+recommended_measures_df = recommended_measures_df[recommended_measures_df["default"]]
+recommended_measures_df = recommended_measures_df.drop(columns=["default"])
+
+post_install_sap = recommendations_df[["property_id", "default", "sap_points"]]
+post_install_sap = post_install_sap[post_install_sap["default"]]
+# Sum up the sap points by property id
+post_install_sap = post_install_sap.groupby("property_id")[["sap_points"]].sum().reset_index()
+
+recommendations_measures_pivot = recommended_measures_df.pivot(
+ index='property_id',
+ columns='measure_type',
+ values='estimated_cost'
+)
+recommendations_measures_pivot = recommendations_measures_pivot.reset_index()
+
+# Total cost is the row sum, excluding the property_id column
+recommendations_measures_pivot["total_retrofit_cost"] = recommendations_measures_pivot.drop(
+ columns=["property_id"]
+).sum(axis=1)
+
+df = properties_df[
+ [
+ "property_id", "uprn", "address", "postcode", "property_type", "walls", "roof", "heating", "windows",
+ "current_epc_rating",
+ "current_sap_points", "total_floor_area", "number_of_rooms",
+ ]
+].merge(
+ recommendations_measures_pivot, how="left", on="property_id"
+).merge(
+ post_install_sap, how="left", on="property_id"
+)
+
+df = df.drop(columns=["property_id"])
+df["sap_points"] = df["sap_points"].fillna(0)
+
+df["predicted_post_works_sap"] = df["current_sap_points"] + df["sap_points"]
+df["predicted_post_works_sap"] = df["predicted_post_works_sap"].round()
+df["predicted_post_works_epc"] = df["predicted_post_works_sap"].apply(lambda x: sap_to_epc(x))
+
+# We merge this back to the main dataframe, which will contain the bathrooms
+from utils.s3 import read_csv_from_s3
+
+asset_list = read_csv_from_s3(bucket_name="retrofit-plan-inputs-dev", filepath='8/206/asset_list.csv')
+asset_list = pd.DataFrame(asset_list)
+df["uprn"] = df["uprn"].astype(str)
+asset_list = asset_list.merge(
+ df.drop(columns=["address", "postcode", "property_type", "total_floor_area"]),
+ how="left",
+ on="uprn"
+)
+
+condition_costs = pd.read_excel(
+ "/Users/khalimconn-kowlessar/Documents/hestia/sfr/Spring JV/Condition costs.xlsx",
+ sheet_name="Prices - Khalim",
+ header=35
+)
+# Remove unnamed columns and reset index
+condition_costs = condition_costs.loc[:, ~condition_costs.columns.str.contains('^Unnamed')]
+condition_costs = condition_costs.reset_index(drop=True)
+
+
+# We now estimate condition cost
+def simulate_condition(asset_list, condition_costs):
+ """
+ This function is for testing, and will simulate condition cost from 1-10 for each property to see what the
+ costing array looks like.
+ :param df:
+ :return:
+ """
+
+ condition_df = []
+ for _, row in asset_list.iterrows():
+
+ n_bathrooms = row["bathrooms"]
+
+ conditions = {}
+ for condition in reversed(range(1, 11)):
+ condition_cost = condition_costs[
+ condition_costs["Condition"] == condition
+ ].drop(columns=["Condition"]).iloc[0]
+
+ # Each cost is scaled by floor area
+ condition_cost = condition_cost * row["total_floor_area"]
+ condition_cost["Bathroom"] = condition_cost["Bathroom"] * n_bathrooms
+
+ total_condition_cost = condition_cost.sum()
+ conditions["Condition " + str(condition)] = (total_condition_cost)
+
+ condition_df.append(
+ {
+ "uprn": row["uprn"],
+ **conditions
+ }
+ )
+
+ condition_df = pd.DataFrame(condition_df)
+
+ asset_list = asset_list.merge(
+ condition_df,
+ how="left",
+ on="uprn"
+ )
+
+ return asset_list
+
+
+# asset_list = simulate_condition(asset_list, condition_costs)
+
+# We calculate the condition cost based on the condition
+for _, row in asset_list.iterrows():
+
+ condition = row["condition_score"]
+ if condition in [None, ""]:
+ continue
+ condition = int(float(condition))
+
+ condition_cost = condition_costs[
+ condition_costs["Condition"] == condition
+ ].drop(columns=["Condition"]).iloc[0]
+
+ # Each cost is scaled by floor area
+ condition_cost = condition_cost * float(row["total_floor_area"])
+ n_bathrooms = row["n_bathrooms"]
+ condition_cost["Bathroom"] = condition_cost["Bathroom"] * float(n_bathrooms)
+
+ total_condition_cost = condition_cost.sum()
+ asset_list.loc[asset_list["uprn"] == row["uprn"], "domna_condition_cost"] = total_condition_cost
+
+# Store output
+asset_list.to_excel(
+ "/Users/khalimconn-kowlessar/Documents/hestia/sfr/Spring JV/20250624_portfolio_retrofit_packages.xlsx",
+ index=False
+)
+
+condition_cost_comparison = asset_list[
+ ["condition_score", "decoration_sum_min ", "decoration_sum_max", "domna_condition_cost"]
+]