Merge pull request #446 from Hestia-Homes/main

SFR project + app debugging
This commit is contained in:
KhalimCK 2025-07-14 10:41:24 +01:00 committed by GitHub
commit 831faa31f8
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
51 changed files with 5532 additions and 708 deletions

File diff suppressed because it is too large Load diff

229
asset_list/abs_estimates.py Normal file
View file

@ -0,0 +1,229 @@
"""
Simple script to take a standardised asset list and calculate the abs. We'll use this code to estimate
the ABS for properties, going forward
"""
import os
import pandas as pd
import numpy as np
from dotenv import load_dotenv
from etl.find_my_epc.AssetListEpcData import AssetListEpcData
from backend.Funding import Funding
from backend.app.utils import sap_to_epc
load_dotenv(dotenv_path="backend/.env")
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
asset_list = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Instagroup Review/Livewest South-West - Standardised V2.xlsx",
sheet_name="Cavity Route (Insta Review)"
)
abs_matrix = pd.read_csv(
"/Users/khalimconn-kowlessar/Downloads/ECO4 Full Project Scores Matrix.csv"
)
pps_matrix = pd.read_excel(
"/Users/khalimconn-kowlessar/Downloads/ECO4 Partial Project Scores Matrix v5.xlsx",
header=1
)
pps_matrix.columns = [c.strip() for c in pps_matrix.columns]
# We need to estimate the number of points the work will produce and the finishing band. For this, we assume 7 for
# cavity and 15 for solar. We'll be more specific in the future, but for now, this is a good enough estimate.
route = asset_list[["domna_address_1", "domna_postcode", "epc_os_uprn"]].rename(
columns={"domna_address_1": "address", "domna_postcode": "postcode", "epc_os_uprn": "upr"}
)
route["address"] = route["address"].astype(str)
asset_list_epc_client = AssetListEpcData(
asset_list=route,
epc_auth_token=EPC_AUTH_TOKEN
)
asset_list_epc_client.get_data()
asset_list_epc_client.get_non_invasive_recommendations()
solar_sap_points = []
for r in asset_list_epc_client.non_invasive_recommendations:
if not r.get("recommendations"):
continue
solar_recommendations = [
x for x in r["recommendations"] if "solar_pv" in x["type"]
]
if solar_recommendations:
solar_recommendations = solar_recommendations[0]
else:
continue
address = r["address"]
postcode = r["postcode"]
solar_sap_points.append(
{
"address": address,
"postcode": postcode,
"sap_points": solar_recommendations["sap_points"]
}
)
solar_sap_points = pd.DataFrame(solar_sap_points)
solar_sap_points.drop_duplicates(subset=["address", "postcode"], inplace=True)
# Store the sap points in the cavity route to csv
# cwi_sap_points.to_csv(
# "/Users/khalimconn-kowlessar/Documents/hestia/Instagroup Review/cwi_sap_points_livewest_sw.csv",
# index=False
# )
avg_solar_points_by_postcode = solar_sap_points.groupby(["postcode"]).agg({"sap_points": "mean"}).reset_index()
avg_solar_points = solar_sap_points["sap_points"].median()
asset_list["domna_address_1"] = asset_list["domna_address_1"].astype(str)
asset_list = asset_list.merge(
solar_sap_points, how="left", left_on=["domna_address_1", "domna_postcode"], right_on=["address", "postcode"]
).drop(
columns=["address", "postcode"]
)
# Fill the sap points with the average cwi points
asset_list = asset_list.merge(
avg_solar_points_by_postcode.rename(columns={"postcode": "domna_postcode"}),
how="left", on=["domna_postcode"], suffixes=("", "_avg")
)
asset_list["sap_points"] = asset_list["sap_points"].fillna(asset_list["sap_points_avg"])
asset_list.drop(columns=["sap_points_avg"], inplace=True)
asset_list["sap_points"] = asset_list["sap_points"].fillna(avg_solar_points)
asset_list["post_works_sap"] = asset_list["epc_sap_score_on_register"] + asset_list["sap_points"]
asset_list["post_works_epc"] = asset_list["post_works_sap"].apply(lambda x: sap_to_epc(x))
asset_list["starting_half_band"] = asset_list["epc_sap_score_on_register"].apply(lambda x: Funding.get_sap_band(x))
asset_list["ending_half_band"] = asset_list["post_works_sap"].apply(lambda x: Funding.get_sap_band(x))
asset_list["floor_area_band"] = asset_list["epc_total_floor_area"].apply(lambda x: Funding.get_floor_area_band(x))
asset_list["ending_half_band"] = np.where(
(asset_list["post_works_epc"] == asset_list["epc_rating_on_register"]),
"Low_C",
asset_list["ending_half_band"]
)
# Realistically, we'll take the properties to a low C at worst
asset_list["ending_half_band"] = np.where(
(asset_list["post_works_sap"] < 69),
"Low_C",
asset_list["ending_half_band"]
)
asset_list = asset_list.merge(
abs_matrix, how="left", left_on=["starting_half_band", "ending_half_band", "floor_area_band"],
right_on=['Starting Band', 'Finishing Band', 'Floor Area Segment', ]
)
asset_list = asset_list.drop(columns=['Starting Band', 'Finishing Band', 'Floor Area Segment'])
asset_list = asset_list.rename(
columns={"Cost Savings": "funding_abs"}
)
print(asset_list["domna_property_id"].duplicated().sum())
# Store this data
asset_list.to_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Instagroup Review/livewest_sw_solar_abs_estimates-solar.csv",
index=False
)
# Cavity process!
# cwi_sap_points = []
# for r in asset_list_epc_client.non_invasive_recommendations:
# if not r.get("recommendations"):
# continue
# cwi_recommendations = [
# x for x in r["recommendations"] if "cavity_wall_insulation" in x["type"]
# ]
# if cwi_recommendations:
# cwi_recommendations = cwi_recommendations[0]
# else:
# continue
#
# address = r["address"]
# postcode = r["postcode"]
#
# cwi_sap_points.append(
# {
# "address": address,
# "postcode": postcode,
# "sap_points": cwi_recommendations["sap_points"]
# }
# )
#
# cwi_sap_points = pd.DataFrame(cwi_sap_points)
# cwi_sap_points = pd.read_csv(
# "/Users/khalimconn-kowlessar/Documents/hestia/Instagroup Review/cwi_sap_points_livewest_sw.csv"
# )
# cwi_sap_points.drop_duplicates(subset=["address", "postcode"], inplace=True)
avg_cwi_points_by_postcode = cwi_sap_points.groupby(["postcode"]).agg({"sap_points": "mean"}).reset_index()
avg_cwi_points = cwi_sap_points["sap_points"].median()
asset_list = asset_list.merge(
cwi_sap_points, how="left", left_on=["domna_address_1", "domna_postcode"], right_on=["address", "postcode"]
).drop(
columns=["address", "postcode"]
)
# Fill the sap points with the average cwi points
asset_list = asset_list.merge(
avg_cwi_points_by_postcode.rename(columns={"postcode": "domna_postcode"}),
how="left", on=["domna_postcode"], suffixes=("", "_avg")
)
asset_list["sap_points"] = asset_list["sap_points"].fillna(asset_list["sap_points_avg"])
asset_list.drop(columns=["sap_points_avg"], inplace=True)
asset_list["sap_points"] = asset_list["sap_points"].fillna(avg_cwi_points)
asset_list["post_works_sap"] = asset_list["epc_sap_score_on_register"] + asset_list["sap_points"]
asset_list["post_works_epc"] = asset_list["post_works_sap"].apply(lambda x: sap_to_epc(x))
asset_list["starting_half_band"] = asset_list["epc_sap_score_on_register"].apply(lambda x: Funding.get_sap_band(x))
asset_list["ending_half_band"] = asset_list["post_works_sap"].apply(lambda x: Funding.get_sap_band(x))
asset_list["floor_area_band"] = asset_list["epc_total_floor_area"].apply(lambda x: Funding.get_floor_area_band(x))
asset_list["funding_scheme"] = np.where(
(
(asset_list["post_works_epc"] == asset_list["epc_rating_on_register"])
),
"GBIS",
"ECO4"
)
asset_list = asset_list.merge(
abs_matrix, how="left", left_on=["starting_half_band", "ending_half_band", "floor_area_band"],
right_on=['Starting Band', 'Finishing Band', 'Floor Area Segment', ]
)
asset_list = asset_list.drop(columns=['Starting Band', 'Finishing Band', 'Floor Area Segment'])
# Using CWI solid 1.7 -> 0.3 rates
cwi_pps_matrix = pps_matrix[
pps_matrix["Measure_Type"].isin(["CWI_0.033"])
]
# Merge on
asset_list = asset_list.merge(
cwi_pps_matrix[['Starting Band', 'Total Floor Area Band', 'Cost Savings']].rename(
columns={
"Cost Savings": "partial_project_score",
"Starting Band": "starting_half_band",
"Total Floor Area Band": "floor_area_band"
}
),
how="left",
on=["starting_half_band", "floor_area_band"],
)
asset_list["partial_project_score"] = np.where(
(asset_list["epc_sap_score_on_register"] > 69),
None,
asset_list["partial_project_score"]
)
asset_list["funding_abs"] = np.where(
asset_list["funding_scheme"] == "GBIS",
asset_list["partial_project_score"],
asset_list["Cost Savings"]
)
asset_list["domna_property_id"].duplicated().sum()
# Store this data
asset_list.to_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Instagroup Review/livewest_sw_abs_estimates.csv",
index=False
)

View file

@ -1,9 +1,6 @@
import os
import json
import pandas as pd
from pprint import pprint
import msgpack
from utils.s3 import read_from_s3
from asset_list.AssetList import AssetList
from asset_list.mappings.property_type import PROPERTY_MAPPING
from asset_list.mappings.built_form import BUILT_FORM_MAPPINGS
@ -62,98 +59,558 @@ def app():
Property UPRN
"""
# Thurrock
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thurrock"
data_filename = "THURROCK COUNCIL - For analysis.xlsx"
sheet_name = "Assets"
# CDS
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/CDS"
data_filename = "Founder Estates - Asset List.xlsx"
sheet_name = "Combined"
postcode_column = 'Postcode'
fulladdress_column = "Full Address"
fulladdress_column = "Address"
address1_column = None
address1_method = "house_number_extraction"
address_cols_to_concat = []
missing_postcodes_method = None
landlord_year_built = "Construction Date"
landlord_year_built = None
landlord_os_uprn = None
landlord_property_type = "Property Type"
landlord_built_form = "Property Subtype"
landlord_property_type = None
landlord_built_form = None
landlord_wall_construction = None
landlord_roof_construction = None
landlord_heating_system = "Main Heating Type"
landlord_existing_pv = None
landlord_property_id = "Property Reference"
landlord_sap = None
outcomes_filename = []
outcomes_sheetname = []
outcomes_postcode = []
outcomes_houseno = []
outcomes_id = []
outcomes_address = []
master_filepaths = []
master_to_asset_list_filepath = None
phase = False
ecosurv_landlords = None
# Medway
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Medway"
data_filename = "MEDWAY Asset List.xlsx"
sheet_name = "Asset list"
postcode_column = 'Postcode'
fulladdress_column = None
address1_column = "House Number"
address1_method = None
address_cols_to_concat = ["House Number", "Street 1"]
missing_postcodes_method = None
landlord_year_built = "Year Built"
landlord_os_uprn = None
landlord_property_type = "Property Type - Academy"
landlord_built_form = "Property Type - Academy"
landlord_wall_construction = None
landlord_roof_construction = None
landlord_heating_system = None
landlord_heating_system = "Heating Type"
landlord_existing_pv = None
landlord_property_id = "Row ID"
landlord_sap = None
outcomes_filename = []
outcomes_sheetname = []
outcomes_postcode = []
outcomes_houseno = []
outcomes_id = []
outcomes_address = []
outcomes_id = []
master_filepaths = []
master_to_asset_list_filepath = None
asset_list_header = 0
landlord_block_reference = None
master_id_colnames = []
landlord_roof_construction = None
phase = False
landlord_sap = None
ecosurv_landlords = None
# MHS
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS"
data_filename = "MHS HOMES (Full Asset List) - for programme build.xlsx"
# Plus Dane
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Plus Dane/New Programme July 2025/"
data_filename = "20250711 Plus Dane Asset List.xlsx"
sheet_name = "Sheet1"
postcode_column = 'Postcode'
fulladdress_column = "FullAddress"
fulladdress_column = "Address"
address1_column = None
address1_method = "house_number_extraction"
address_cols_to_concat = []
missing_postcodes_method = None
landlord_year_built = "BuiltInYear"
landlord_year_built = "Property Age"
landlord_os_uprn = None
landlord_property_type = "AssetType"
landlord_built_form = "PropertyType"
landlord_wall_construction = None
landlord_roof_construction = None
landlord_heating_system = None
landlord_property_type = "Property Type"
landlord_built_form = "Built Form"
landlord_wall_construction = "Wall Construction"
landlord_heating_system = "Full Heating System"
landlord_existing_pv = None
landlord_property_id = "UPRN"
landlord_sap = None
outcomes_filename = []
outcomes_sheetname = []
outcomes_postcode = []
outcomes_houseno = []
outcomes_id = []
outcomes_address = []
master_filepaths = []
outcomes_filename = [
os.path.join(data_folder, "Outcomes - Plus Dane_CWI_2024.xlsx"),
os.path.join(data_folder, "Outcomes - Plus Dane_CWI_2025.xlsx"),
os.path.join(data_folder, "Outcomes - Plus Dane_PV_2025.xlsx"),
]
outcomes_sheetname = [
"CWI & LI - 2024", "2025 - CWI", "PV - 2025",
]
outcomes_postcode = ["Postcode", "Postcode", "Postcode"]
outcomes_houseno = ["No.", "No", "No"]
outcomes_address = ["Address", "Address", "Address"]
outcomes_id = ["Asset Reference", "LL UPRN", "LL UPRN"]
master_filepaths = [
os.path.join(data_folder, "submissions/JJC-Table 1.csv"),
os.path.join(data_folder, "submissions/SCIS-Table 1.csv")
]
master_to_asset_list_filepath = None
asset_list_header = 1
landlord_block_reference = None
master_id_colnames = [None, None]
landlord_roof_construction = None
phase = False
ecosurv_landlords = None
landlord_sap = "SAP Rating"
ecosurv_landlords = "plus dane"
# data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Brentwood/July 2025 New Programme"
# data_filename = "20250710 Asset List Brentwood.xlsx"
# sheet_name = "Sheet1"
# postcode_column = 'Postcode'
# fulladdress_column = None
# address1_column = "House Number"
# address1_method = None
# address_cols_to_concat = ["House Number", "Address Line 1", "Address Line 2", "Address Line 3"]
# missing_postcodes_method = None
# landlord_year_built = "Year Built"
# landlord_os_uprn = None
# landlord_property_type = "Dwelling"
# landlord_built_form = None
# landlord_wall_construction = None
# landlord_heating_system = "Heating"
# landlord_existing_pv = None
# landlord_property_id = "UPRN"
# outcomes_filename = [os.path.join(data_folder, "Brentwood - outcomes for analysis.xlsx")]
# outcomes_sheetname = ["OUTCOMES"]
# outcomes_postcode = ["POSTCODE"]
# outcomes_houseno = [None]
# outcomes_address = ["ADDRESS"]
# outcomes_id = [None]
# master_filepaths = [os.path.join(data_folder, "Submissions.csv")]
# master_to_asset_list_filepath = None
# asset_list_header = 1
# landlord_block_reference = None
# master_id_colnames = [None]
# landlord_roof_construction = None
# phase = False
# landlord_sap = None
# ecosurv_landlords = "brentwood"
# Brentwood
# data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Brentwood/July 2025 New Programme"
# data_filename = "20250710 Asset List Brentwood.xlsx"
# sheet_name = "Sheet1"
# postcode_column = 'Postcode'
# fulladdress_column = None
# address1_column = "House Number"
# address1_method = None
# address_cols_to_concat = ["House Number", "Address Line 1", "Address Line 2", "Address Line 3"]
# missing_postcodes_method = None
# landlord_year_built = "Year Built"
# landlord_os_uprn = None
# landlord_property_type = "Dwelling"
# landlord_built_form = None
# landlord_wall_construction = None
# landlord_heating_system = "Heating"
# landlord_existing_pv = None
# landlord_property_id = "UPRN"
# outcomes_filename = [os.path.join(data_folder, "Brentwood - outcomes for analysis.xlsx")]
# outcomes_sheetname = ["OUTCOMES"]
# outcomes_postcode = ["POSTCODE"]
# outcomes_houseno = [None]
# outcomes_address = ["ADDRESS"]
# outcomes_id = [None]
# master_filepaths = [os.path.join(data_folder, "Submissions.csv")]
# master_to_asset_list_filepath = None
# asset_list_header = 1
# landlord_block_reference = None
# master_id_colnames = [None]
# landlord_roof_construction = None
# phase = False
# landlord_sap = None
# ecosurv_landlords = "brentwood"
#
# # Eastlight
# data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Eastlight/New Programme"
# data_filename = "INSPECTIONS MASTER Non Tech.xlsx"
# sheet_name = "EASTLIGHT CW"
# postcode_column = 'Postcode'
# fulladdress_column = None
# address1_column = "HouseName"
# address1_method = None
# address_cols_to_concat = ["HouseName", "Block", "Address1", "Address2", "Address3"]
# missing_postcodes_method = None
# landlord_year_built = "Built In Year"
# landlord_os_uprn = None
# landlord_property_type = "AssetType"
# landlord_built_form = "Archetype" # Using inspections archetype
# landlord_wall_construction = None
# landlord_roof_construction = None
# landlord_heating_system = "Main Heating Source"
# landlord_existing_pv = None
# landlord_property_id = "UPRN"
# landlord_sap = "SAP Score"
# outcomes_filename = [
# os.path.join(data_folder, "Eastlight_CWI_JJC_2025.xlsx"),
# os.path.join(data_folder, "Eastlight_CWI_SCIS_2025.xlsx"),
# ]
# outcomes_sheetname = ["Outcomes", "Feedback"]
# outcomes_postcode = ["Postcode", "Postcode"]
# outcomes_houseno = ["No", "No."]
# outcomes_id = [None, None]
# outcomes_address = ["Address", "Address"]
# master_filepaths = [
# os.path.join(data_folder, "ECO 3-Table 1.csv"),
# os.path.join(data_folder, "ECO 4-Table 1.csv"),
# ]
# master_to_asset_list_filepath = None
# phase = False
# ecosurv_landlords = "eastlight"
# asset_list_header = 0
# landlord_block_reference = None
# master_id_colnames = [None, None]
# landlord_sap = None
# Pickering and Ferens
# data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Pickering & Ferens"
# data_filename = "SAP 9 vs SAP 10 Sava Intelligent Energy - Property List (190625).xlsx"
# sheet_name = "Sava Intelligent Energy - Prope"
# postcode_column = 'Postcode'
# fulladdress_column = 'Address'
# address1_column = None
# address1_method = "house_number_extraction"
# address_cols_to_concat = []
# missing_postcodes_method = None
# landlord_year_built = None
# landlord_os_uprn = None
# landlord_property_type = "Property Type" # Using the inspections property type
# landlord_built_form = "Archetype 2"
# landlord_wall_construction = None
# landlord_roof_construction = None
# landlord_heating_system = None
# landlord_existing_pv = None
# landlord_property_id = "UPRN"
# landlord_sap = "SAP Rating (RdSAP 10)"
# outcomes_filename = []
# outcomes_sheetname = []
# outcomes_postcode = []
# outcomes_houseno = []
# outcomes_id = []
# outcomes_address = []
# master_filepaths = [
# os.path.join(data_folder, "PICKERING & FERENS ROLLING MASTER SHEET HEDGEFUND - 26.7.24 - K.csv"),
# os.path.join(data_folder, "PICKERING & FERENS NEW MASTER GBIS UPDATED 21.8.24 - M - For Analysis.csv"),
# ]
# master_to_asset_list_filepath = None
# phase = False
# ecosurv_landlords = "pickering"
# asset_list_header = 0
# landlord_block_reference = None
# master_id_colnames = [None, None]
# Colchester
# data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Colchester"
# data_filename = "Warmfront data- Colchester Borough Homes (Complete).xlsx"
# sheet_name = "Sheet1"
# postcode_column = 'Full Address.1'
# fulladdress_column = "Full Address"
# address1_column = None
# address1_method = "first_word"
# address_cols_to_concat = []
# missing_postcodes_method = None
# landlord_year_built = "Build Date"
# landlord_os_uprn = None
# landlord_property_type = "Property Type"
# landlord_wall_construction = "Wallinsul"
# landlord_heating_system = "HeatSorc"
# landlord_existing_pv = None
# landlord_property_id = "Property Reference"
# outcomes_filename = []
# outcomes_sheetname = []
# outcomes_postcode = []
# outcomes_houseno = []
# outcomes_id = []
# outcomes_address = []
# master_filepaths = []
# master_to_asset_list_filepath = None
# asset_list_header = 0
# landlord_built_form = None
# landlord_roof_construction = None
# landlord_sap = None
# landlord_block_reference = None
# phase = False
# ecosurv_landlords = None
# master_id_colnames = []
# data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing/Hubspot"
# data_filename = "EalingFlats.xlsx"
# sheet_name = "Sheet1"
# postcode_column = 'Postcode'
# fulladdress_column = "Address"
# address1_column = None
# address1_method = "house_number_extraction"
# address_cols_to_concat = []
# missing_postcodes_method = None
# landlord_year_built = None
# landlord_os_uprn = None
# landlord_property_type = None # Using the inspections property type
# landlord_built_form = None
# landlord_wall_construction = None
# landlord_roof_construction = None
# landlord_heating_system = None
# landlord_existing_pv = None
# landlord_property_id = "Property ref"
# landlord_sap = None
# outcomes_filename = []
# outcomes_sheetname = []
# outcomes_postcode = []
# outcomes_houseno = []
# outcomes_id = []
# outcomes_address = []
# master_filepaths = []
# master_to_asset_list_filepath = None
# phase = False
# ecosurv_landlords = None
# asset_list_header = 0
# landlord_block_reference = "Block Ref"
# master_id_colnames = []
# Southern - Jan list
# data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Southern/New Programme/Jan 2025 List"
# data_filename = "SOUTHERN ASSETS January 2025 Additions Query 21.03.2025.xlsx"
# sheet_name = "Jan 2025 additions"
# postcode_column = 'Post Code'
# fulladdress_column = None
# address1_column = "NO."
# address1_method = None
# address_cols_to_concat = ["NO.", "Street / Block Name", "Town/Area"]
# missing_postcodes_method = None
# landlord_year_built = None
# landlord_os_uprn = None
# landlord_property_type = None # Using the inspections property type
# landlord_built_form = None
# landlord_wall_construction = None
# landlord_roof_construction = None
# landlord_heating_system = None
# landlord_existing_pv = None
# landlord_property_id = "SH Property Reference"
# landlord_sap = None
# outcomes_filename = [
# os.path.join(data_folder, "RT - Southern Housing Group - JJC.xlsx"),
# os.path.join(data_folder, "RT - SOUTHERN OUTCOMES - SCIS Merged.xlsx"),
# ]
# outcomes_sheetname = ["Feedback", "Collated"]
# outcomes_postcode = ["Poscode", "Postcode"]
# outcomes_houseno = ["No.", "No"]
# outcomes_id = ["UPRNs", None]
# outcomes_address = ["Address", "Address"]
# master_filepaths = [
# os.path.join(data_folder, "southern_submissions/CAVITY'S - DECEMBER 2018-Table 1.csv"),
# os.path.join(data_folder, "southern_submissions/CAVITY'S 2019-Table 1.csv"),
# os.path.join(data_folder, "southern_submissions/CAVITY'S ECO4-Table 1.csv"),
# os.path.join(data_folder, "southern_submissions/LOFT'S-Table 1.csv"),
# ]
# master_to_asset_list_filepath = None
# phase = False
# ecosurv_landlords = "southern"
# asset_list_header = 0
# landlord_block_reference = None
# master_id_colnames = [None, None, None, None]
# NCHA
# data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/NCHA"
# data_filename = "Energy Information MASTER June 2025.xlsx"
# sheet_name = "Data"
# postcode_column = 'Postcode'
# fulladdress_column = "Address"
# address1_column = None
# address1_method = "house_number_extraction"
# address_cols_to_concat = []
# missing_postcodes_method = None
# landlord_year_built = "Build Date (HAR10)"
# landlord_os_uprn = None
# landlord_property_type = "Property Type (HAR10)"
# landlord_built_form = "Build Form (EPC)"
# landlord_wall_construction = "Wall Description"
# landlord_roof_construction = None
# landlord_heating_system = "HEAT Code"
# landlord_existing_pv = None
# landlord_property_id = "Place ref"
# landlord_sap = "EPC SAP"
# outcomes_filename = None
# outcomes_sheetname = None
# outcomes_postcode = None
# outcomes_houseno = None
# outcomes_id = None
# outcomes_address = None
# master_filepaths = []
# master_to_asset_list_filepath = None
# phase = False
# ecosurv_landlords = None
# asset_list_header = 0
# landlord_block_reference = None
# master_id_colnames = []
# data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Calico"
# data_filename = "07.04 CALICO - Final List.xlsx"
# asset_list_header = 2
# sheet_name = "Final List"
# postcode_column = 'Postcode'
# fulladdress_column = None
# address1_column = "Property Number / Name"
# address1_method = None
# address_cols_to_concat = [
# "Property Number / Name",
# "Street",
# "Town"
# ]
# missing_postcodes_method = None
# landlord_year_built = "NROSH Estimated Build Date"
# landlord_os_uprn = None
# landlord_property_type = "Asset Type"
# landlord_built_form = None
# landlord_wall_construction = "Wall Type"
# landlord_heating_system = "Boiler Type"
# landlord_existing_pv = None
# landlord_property_id = "Asset Reference"
# outcomes_filename = []
# outcomes_sheetname = []
# outcomes_postcode = []
# outcomes_houseno = []
# outcomes_id = []
# outcomes_address = []
# master_filepaths = []
# master_id_colnames = []
# master_to_asset_list_filepath = None
# landlord_roof_construction = None
# landlord_block_reference = None
# landlord_sap = "Current Efficiency Rating - Score"
# phase = None
# ecosurv_landlords = None
# data_folder = (
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Livewest/Programme Update - March 2025/2018 Asset
# List"
# )
# data_filename = "LIVEWEST STOCK - 23rd October 2018.xlsx"
# sheet_name = "Assets"
# postcode_column = 'Postcode'
# fulladdress_column = "Address"
# address1_column = None
# address1_method = "house_number_extraction"
# address_cols_to_concat = []
# missing_postcodes_method = None
# landlord_year_built = "Build Year"
# landlord_os_uprn = None
# landlord_property_type = "Property Archetype"
# landlord_built_form = None
# landlord_wall_construction = None
# landlord_heating_system = "Heating Fuel Type"
# landlord_existing_pv = None
# landlord_property_id = "Uprn - DO NOT DELETE"
# outcomes_filename = [
# os.path.join(data_folder, "RT - LiveWest.xlsx")
# ]
# outcomes_sheetname = ["Feedback"]
# outcomes_postcode = ["Poscode"]
# outcomes_houseno = ["No."]
# outcomes_id = ["UPRN"]
# outcomes_address = ["Address"]
# master_filepaths = [
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Livewest/Programme Update - March 2025/Rolling
# Master "
# "- redacted for analysis/CAVITY-Table 1.csv"
# ]
# master_id_colnames = [None]
# master_to_asset_list_filepath = None
# landlord_roof_construction = None
# landlord_block_reference = None
# landlord_sap = None
# phase = None
# ecosurv_landlords = "livewest|live west"
# data_folder = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Livewest/Programme Update - March "
# "2025/Livewest Asset List (Original) - csv")
# data_filename = "Report-Table 1.csv"
# sheet_name = None
# postcode_column = 'Postcode'
# fulladdress_column = "T1_Address"
# address1_column = None
# address1_method = "house_number_extraction"
# address_cols_to_concat = []
# missing_postcodes_method = None
# landlord_year_built = "Build Yr"
# landlord_os_uprn = None
# landlord_property_type = "T1_AssetType"
# landlord_built_form = "T1_AssetType"
# landlord_wall_construction = "Wall Type Cavity"
# landlord_heating_system = "Heating Fuel"
# landlord_existing_pv = None
# landlord_property_id = "T1_UPRN"
# outcomes_filename = [
# os.path.join(data_folder, "RT - LiveWest.xlsx")
# ]
# outcomes_address = ["Address"]
# outcomes_sheetname = ["Feedback"]
# outcomes_postcode = ["Poscode"]
# outcomes_houseno = ["No."]
# outcomes_id = ["UPRN"]
# master_filepaths = [
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Livewest/Programme Update - March 2025/Rolling
# Master "
# "- redacted for analysis/CAVITY-Table 1.csv"
# ]
# master_id_colnames = [None]
# master_to_asset_list_filepath = None
# landlord_roof_construction = None
# landlord_block_reference = None
# landlord_sap = None
# phase = None
# ecosurv_landlords = "livewest|live west"
# Stori
# data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Storicymru"
# data_filename = "Asset list - for analysis.xlsx"
# sheet_name = "SAP and Costs Calculations"
# postcode_column = 'Postcode'
# fulladdress_column = "Address1"
# address1_column = None
# address1_method = "house_number_extraction"
# address_cols_to_concat = []
# missing_postcodes_method = None
# landlord_year_built = "Age"
# landlord_os_uprn = None
# landlord_property_type = "TYPE"
# landlord_built_form = "AGE / DETACHMENT"
# landlord_wall_construction = "WALL"
# landlord_roof_construction = "LOFT INSULATION"
# landlord_heating_system = "BOILER"
# landlord_existing_pv = "SOLAR PV"
# landlord_property_id = "UPRN"
# landlord_sap = "Current SAP Rating"
# landlord_block_reference = None
# outcomes_filename = []
# outcomes_sheetname = []
# outcomes_postcode = []
# outcomes_houseno = []
# outcomes_id = []
# outcomes_address = []
# master_filepaths = []
# master_to_asset_list_filepath = None
# master_id_colnames = []
# phase = False
# ecosurv_landlords = None
# Thrive - reconciliation
# data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation"
# data_filename = "Thrive Asset List - Complete - Updated May 2025.xlsx"
# sheet_name = "Sheet1"
# postcode_column = 'postcode'
# fulladdress_column = "full_address"
# address1_column = "address_line_1"
# address1_method = None
# address_cols_to_concat = []
# missing_postcodes_method = None
# landlord_year_built = "age_band_calculated"
# landlord_os_uprn = None
# landlord_property_type = "property_type"
# landlord_built_form = "build_form"
# landlord_wall_construction = None
# landlord_roof_construction = "assumed_loft_insulation_thickness_updated"
# landlord_heating_system = "heating_type_updated"
# landlord_existing_pv = None
# landlord_property_id = "thrive_property_id"
# landlord_sap = "sap_rating_updated"
# landlord_block_reference = "block_reference"
# outcomes_filename = [
# os.path.join(data_folder, "Thrive - Outcomes - April 24-March25 - Corrected.xlsx")
# ]
# outcomes_sheetname = ["Sheet1"]
# outcomes_postcode = ["postcode"]
# outcomes_houseno = ["No."]
# outcomes_id = ["thrive_property_id"]
# outcomes_address = ["address"]
# master_filepaths = [
# os.path.join(data_folder, "Thrive Submissions ECO3 - with IDS.csv"),
# os.path.join(data_folder, "Thrive Submissions ECO4 - with IDS.csv"),
# ]
# master_to_asset_list_filepath = None
# master_id_colnames = ["thrive_property_id", "thrive_property_id"]
# phase = False
# ecosurv_landlords = "thrive"
# Southern Midlands
# data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Southern/Midlands Properties - Apr 2025"
@ -182,40 +639,12 @@ def app():
# master_filepaths = []
# master_to_asset_list_filepath = None
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/North-West"
data_filename = "Places for People NORTH WEST - INSPECTIONS MASTER - UPDATE.xlsx"
sheet_name = "CHECKED"
postcode_column = 'Postcode'
fulladdress_column = None
address1_column = "AddressLine1"
address1_method = None
address_cols_to_concat = ["AddressLine1", "AddressLine2", "AddressLine3"]
missing_postcodes_method = None
landlord_year_built = None
landlord_os_uprn = None
landlord_property_type = "Archetype (PFP)"
landlord_built_form = "Archetype (PFP)"
landlord_wall_construction = None
landlord_roof_construction = None
landlord_heating_system = None
landlord_existing_pv = None
landlord_property_id = "Uprn"
outcomes_filename = None
outcomes_sheetname = None
outcomes_postcode = None
outcomes_houseno = None
outcomes_id = None
master_filepaths = []
master_to_asset_list_filepath = None
landlord_sap = None
phase = None
# Maps addresses to uprn in problematic cases
manual_uprn_map = {}
asset_list = AssetList(
local_filepath=os.path.join(data_folder, data_filename),
header=0,
header=asset_list_header,
sheet_name=sheet_name,
address1_colname=address1_column,
postcode_colname=postcode_column,
@ -233,6 +662,7 @@ def app():
landlord_heating_system=landlord_heating_system,
landlord_existing_pv=landlord_existing_pv,
landlord_sap=landlord_sap,
landlord_block_reference=landlord_block_reference,
phase=phase
)
asset_list.init_standardise()
@ -294,7 +724,8 @@ def app():
asset_list.flag_survey_master(
master_filepaths=master_filepaths,
master_to_asset_list_filepath=master_to_asset_list_filepath
master_to_asset_list_filepath=master_to_asset_list_filepath,
master_id_colnames=master_id_colnames,
)
asset_list.flag_ecosurv(ecosurv_landlords)
@ -306,7 +737,7 @@ def app():
epc_api_only = False
force_retrieve_data = False
skip = None # Used to skip already completed chunks
chunk_size = 5000
chunk_size = 2000
filename = "Chunk {i}.csv"
download_folder = os.path.join(data_folder, "Chunks")
if not os.path.exists(download_folder):
@ -486,59 +917,13 @@ def app():
)
asset_list.merge_data(epc_df)
asset_list.extract_attributes()
asset_list.identify_worktypes()
cleaned = read_from_s3(
s3_file_name="cleaned_epc_data/cleaned.bson",
bucket_name="retrofit-data-dev"
)
cleaned = msgpack.unpackb(cleaned, raw=False)
asset_list.identify_worktypes(cleaned)
pprint(asset_list.work_type_figures)
asset_list.flat_analysis()
asset_list.load_contact_details(
local_filepath=os.path.join(data_folder, "Full property list wth D&V report V look up 12.2.25.xlsx"),
sheet_name="Report 1",
landlord_property_id=asset_list.landlord_property_id,
phone_number_column='Property Current Tel. Number',
fullname_column='Proeprty Current Occupant',
firstname_column=None,
lastname_column=None,
email_column=None, # TODO - we need this
)
# Convert to a format suitable for CRM
# TODO: TEMP
assigned_surveyors = pd.DataFrame(
[
{
asset_list.landlord_property_id: "02610001",
"week_commencing": "10/10/2025",
"surveyor_name": "Khalim Conn-Kowlessar",
"surveyor_email": "khalim@domna.homes",
}
]
)
# TODO: Sort the output by postcode
company_domain = "ealing.gov.uk"
crm_pipeline_name = "Survey Management"
first_dealstage = "READY TO BEGIN SCHEDULING"
# TODO - temp, upload to either SharePoint or AWS
asset_list.prepare_for_crm(
assigned_surveyors=assigned_surveyors,
company_domain=company_domain,
crm_pipeline_name=crm_pipeline_name,
first_dealstage=first_dealstage
)
hubspot_data = asset_list.hubspot_data
# We now flag the status of the property
asset_list.label_property_status()
asset_list.analyse_geographies()
asset_list.get_work_figures()
# Store as an excel
filename = os.path.join(data_folder, ".".join(data_filename.split(".")[:-1])) + " - Standardised.xlsx"
@ -546,7 +931,8 @@ def app():
with pd.ExcelWriter(filename) as writer:
asset_list.standardised_asset_list.to_excel(writer, sheet_name="Standardised Asset List", index=False)
asset_list.flat_data.to_excel(writer, sheet_name="Flat Data", index=False)
if asset_list.block_analysis_df is not None:
asset_list.block_analysis_df.to_excel(writer, sheet_name="Block Analysis", index=False)
# If we have outcomes, we add a tab with the outcomes
if not asset_list.outcomes_for_output.empty:
asset_list.outcomes_for_output.to_excel(writer, sheet_name="Outcomes", index=False)
@ -560,5 +946,5 @@ def app():
if not asset_list.ecosurv_no_match.empty:
asset_list.ecosurv_no_match.to_excel(writer, sheet_name="Unmatched Ecosurv", index=False)
# Store the Hubspot export as a csv
hubspot_data.to_csv(os.path.join(data_folder, "Hubspot Export.csv"), index=False)
if not asset_list.geographical_areas.empty:
asset_list.geographical_areas.to_excel(writer, sheet_name="Geographical Areas", index=False)

View file

@ -0,0 +1,85 @@
from enum import IntEnum, Enum
CRM_PIPELINE_NAME = 'Operations - Housing Associations'
class HubspotProcessStatus(IntEnum):
def __new__(cls, value, label):
obj = int.__new__(cls, value)
obj._value_ = value
obj.label = label
return obj
# the numerical values of this enum aren't important, but they define the order of operations
# This is the first stage, where a survey is ready to go
READY_TO_BE_SCHEDULED = 1, "READY TO BE SCHEDULED"
# The property didn't get access and needs sign off
SURVEYED_NO_ACCESS_NEEDS_SIGN_OFF = 2, "SURVEYED - NO ACCESS - NEED SIGN OFF"
# The survey has been completed. We don't have any update as to whether the property has been installed
SURVEYED_COMPLETED_SIGNED_OFF = 3, "SURVEYED - AUTOMATED SIGNED OFF"
# The property turned out to be ineligibile
NOT_VIABLE = 4, "NOT VIABLE"
# The property is with the installer. This will likely be the default for historic programmes
SUBMITTED_TO_INSTALLER = 5, "SUBMITTED TO INSTALLER"
# The property has been installed
INSTALL_COMPLETE = 6, "INSTALL COMPLETE"
# The install has complete and lodgement is complete
LODGEMENT_COMPLETE = 7, "LODGEMENT COMPLETE"
# The property has been cancelled
INSTALLER_CANCELLED_FINALIZED = 8, "INSTALLER CANCELLED - FINALIZED"
class Installer(Enum):
SCIS = "SCIS"
JJ_CRUMP = "J & J CRUMP"
SGEC = "SGEC"
@classmethod
def is_valid_value(cls, value):
"""
Check if the value is a valid installer.
"""
return value in cls._value2member_map_
CRM_UPLOAD_COLUMNS = [
'Name <LISTING hs_name>', 'Associations: Listing', 'Company Domain Name <COMPANY domain>',
'Email <CONTACT email>', 'First Name <CONTACT firstname>', 'Last Name <CONTACT lastname>',
'Phone <CONTACT phone>', 'Secondary Phone <CONTACT secondary_phone_number>',
'Secondary Contact Full Name <CONTACT secondary_contact_full_name>',
'Listing Owner Email <LISTING hubspot_owner_id>',
'Full Address <LISTING full_address>', 'Address 1 <LISTING hs_address_1>',
'Address 2 <LISTING hs_address_2>', 'Postcode <LISTING hs_zip>',
'Property Type <LISTING property_type>', 'Property Sub Type <LISTING property_sub_type>',
'Bedroom(s) <LISTING hs_bedrooms>', 'Domna Property ID <LISTING domna_property_id>',
'National UPRN <LISTING national_uprn>', 'Owner Property ID <LISTING owner_property_id>',
'Wall Construction <LISTING wall_construction>', 'Heating System <LISTING heating_system>',
'Year Built <LISTING hs_year_built>', 'Boiler Make <LISTING boiler_make>',
'Boiler Model <LISTING boiler_model>',
'Non-Intrusives: Date Checked <LISTING non_intrusives__date_checked>',
'Non-Intrusives: Wall Type <LISTING non_intrusives__wall_type>',
'Non-intrusives: Insulation <LISTING non_intrusives__insulation>',
'Non-intrusives: Insulation Material <LISTING non_intrusives__insulation_material>',
'Non-Intrusives: CIGA Check Required <LISTING non_intrusives__ciga_check_required>',
'Non-Intrusives: PV Access Issues <LISTING non_intrusives__access_issues>',
'Non-Intrusives: Roof Orientation <LISTING non_intrusives__roof_orientation>',
'Non-Intrusives: Surveyor Notes <LISTING non_intrusives__surveyor_notes>',
'Non-Intrusives: Surveyor Name <LISTING non_intrusives__surveyor_name>',
'CIGA: Date Requested <LISTING ciga__date_requested>',
'CIGA: Cavity Guarantee Found <LISTING ciga__cavity_guarantee_found>',
'Last EPC: Is Estimated <LISTING last_epc__is_estimated>',
'Last EPC: EPC Rating <LISTING last_epc__epc_rating>',
'Last EPC: SAP Rating <LISTING last_epc__sap_rating>',
'Last EPC: Main Heating Description <LISTING last_epc__main_heating_description>',
'Last EPC: Heating Controls <LISTING last_epc__heating_controls>',
'Last EPC: Lodgement Date <LISTING last_epc__lodgement_date>',
'Last EPC: Floor Area <LISTING last_epc__floor_area>', 'Last EPC: Wall <LISTING last_epc__wall>',
'Last EPC: Roof <LISTING last_epc__roof>', 'Last EPC: Floor <LISTING last_epc__floor>',
'Last EPC: Room Height <LISTING last_epc__room_height>',
'Last EPC: Age Band <LISTING last_epc__age_band>', 'Deal Stage <DEAL dealstage>',
'Pipeline <DEAL pipeline>', 'Expected Commencement Date <DEAL expected_commencement_date>',
'Deal Name <DEAL dealname>', 'Project Code <DEAL project_code>', 'Postcode <DEAL postcode>',
'Product ID <LINE_ITEM hs_product_id>', 'Name <LINE_ITEM name>', 'Unit price <LINE_ITEM price>',
'Quantity <LINE_ITEM quantity>', 'Deal Owner', 'Amount <DEAL amount>', 'Installer <DEAL installer>'
]

View file

@ -0,0 +1,176 @@
import os
import pandas as pd
from asset_list.AssetList import AssetList
import re
def normalize_uk_phone(number: str | float | int) -> str | None:
if pd.isna(number):
return None
number = str(number)
number = re.sub(r"[^\d+]", "", number)
# Handle common short inputs: add '0' if likely missing
if re.match(r"^7\d{8,9}$", number) or re.match(r"^1\d{8,9}$", number):
number = "0" + number
# Convert to international format
if number.startswith("0"):
number = "+44" + number[1:]
elif number.startswith("0044"):
number = "+" + number[2:]
# Must be +44 followed by 10 digits (some area codes may vary)
if re.match(r"^\+44\d{9,10}$", number):
return number
return None
def app():
"""
TODO: Operations may have removed some cavity_reason/solar_reason values from the standardised asset list after
review. So, we will need to update the hubspot status for these entries and set them to None, if they
were previously being set to ready for scheduling. We don't want to just filter on rows where
cavity_reason and solar_reason are populated, as if we want to include historical surveys, this will remove
them
TODO: If we wish to upload deals in batches
:return:
"""
# inputs:
reconcile_programme = True # If True, the hubspot upload will include all properties with a project code
customer_domain = "https://ealing.gov.uk"
installer_name = "SCIS"
asset_list_filepath = (
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing/Hubspot/20250707 Ealing Flats - Prepared "
"programme.xlsx"
)
asset_list_sheet_name = "Standardised Asset List"
asset_list_header = 0
contact_details_filepath = None
contacts_sheet_name = "Sheet 1"
contacts_landlord_property_id = "UPRN"
contacts_phone_number_column = "phone_number"
contacts_secondary_phone_number_column = "secondary_phone_number"
contacts_secondary_contact_full_name = "secondary_contact_full_name"
contacts_email_column = "email"
contacts_fullname_column = "fullname"
contacts_firstname_column = "First Name"
contacts_lastname_column = "Last Name"
existing_programme_filepath = None
asset_list = AssetList.load_standardised_asset_list(
asset_list_filepath, asset_list_sheet_name, asset_list_header
)
asset_list.load_contact_details(
local_filepath=contact_details_filepath,
sheet_name=contacts_sheet_name,
landlord_property_id=contacts_landlord_property_id,
phone_number_column=contacts_phone_number_column,
secondary_phone_number_column=contacts_secondary_phone_number_column,
secondary_contact_full_name=contacts_secondary_contact_full_name,
email_column=contacts_email_column,
fullname_column=contacts_fullname_column,
firstname_column=contacts_firstname_column,
lastname_column=contacts_lastname_column
)
asset_list.prepare_for_crm(
company_domain=customer_domain,
installer_name=installer_name,
reconcile_programme=reconcile_programme
)
# Remove the existing programme
# existing_programme = pd.read_csv(existing_programme_filepath, encoding="utf-8-sig")
# asset_list.hubspot_data = asset_list.hubspot_data[
# ~asset_list.hubspot_data["Domna Property ID <LISTING domna_property_id>"].isin(
# existing_programme['Domna Property ID'].values
# )
# ]
# Get the filepath and the filename. Append hubspot upload to the filename. We also change the file type to csv
directory, filename = os.path.split(asset_list_filepath)
name, ext = os.path.splitext(filename)
output_filename = f"{name} - Hubspot Upload.csv"
output_filepath = os.path.join(directory, output_filename)
if pd.isnull(asset_list.hubspot_data['Project Code <DEAL project_code>']).sum():
raise ValueError("FIX MEEE")
if pd.isnull(asset_list.hubspot_data['Deal Stage <DEAL dealstage>']).any():
raise ValueError("Warning: Some rows have missing project codes. These will not be uploaded to HubSpot.")
# Just store locally
asset_list.hubspot_data.to_csv(output_filepath, index=False, encoding="utf-8-sig")
# # TODO: Set this up separately, but we associate multiple contacts to the same deal
# contact_details = pd.read_csv(
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot "
# "Upload/Hubspot/contact "
# "details.csv"
# )
#
# # contacts_phone_number_column = "phone_number"
# # contacts_secondary_phone_number_column = "secondary_phone_number"
# # contacts_secondary_contact_full_name = "secondary_contact_full_name"
# # contacts_email_column = "email"
# # contacts_fullname_column = "fullname"
# # contacts_firstname_column = "First Name"
# # contacts_lastname_column = "Last Name"
# contact_details["phone_number"] = contact_details["Mobile Phone"].copy()
# # If phone number is NaN, we will use the landline number
# contact_details["phone_number"] = contact_details["phone_number"].fillna(contact_details["Landline"])
# contact_details["secondary_phone_number"] = contact_details["Landline"].copy()
# # If secondary phone number is the same as primary, we remove it
# import numpy as np
# contact_details["secondary_phone_number"] = np.where(
# contact_details["secondary_phone_number"] == contact_details["phone_number"],
# np.nan,
# contact_details["secondary_phone_number"]
# )
# contact_details = contact_details[
# ['Property Reference Number (Main Address) (Property)', "Email Address", "phone_number",
# "secondary_phone_number", "First Name", "Last Name"]].copy().rename(
# columns={"Property Reference Number (Main Address) (Property)": "landlord_proprty_id"}
# )
# contact_details["fullname"] = contact_details["First Name"] + " " + contact_details["Last Name"]
# # Format the phone numbers
#
# contact_details["phone_number"] = contact_details["phone_number"].astype(int).astype(str).apply(
# normalize_uk_phone)
# contact_details["secondary_phone_number"] = contact_details["secondary_phone_number"].astype("Int64").astype(
# str).apply(
# normalize_uk_phone)
#
# # Add in the Hubspot deal data
# hubspot_data = pd.read_csv(
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot Upload/Hubspot/"
# "property-status.csv",
# encoding="utf-8-sig"
# )
# # Merge on contact details
# contact_details = hubspot_data[["Landlord Property ID", "Deal ID"]].merge(
# contact_details,
# how="left",
# right_on="landlord_proprty_id",
# left_on="Landlord Property ID"
# )
#
# contact_details = contact_details.drop(columns=["landlord_proprty_id"])
#
# # Store as csv
# contact_details.to_csv(
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar "
# "Programme Hubspot Upload/Hubspot/"
# "contact_details.csv",
# index=False, encoding="utf-8-sig"
# )

View file

@ -3,7 +3,7 @@ import numpy as np
STANDARD_BUILT_FORMS = {
"unknown",
# Houses
"end-terrace", "semi-detached", "detached", "mid-terrace",
"end-terrace", "semi-detached", "detached", "mid-terrace", "enclosed mid-terrace", "enclosed end-terrace",
# Flats
"ground floor", "mid-floor", "top-floor", "basement", "low rise", "high rise",
}
@ -331,4 +331,46 @@ BUILT_FORM_MAPPINGS = {
'Low Rise': 'low rise',
'Upper Floor': 'top-floor',
'High Rise': 'high rise',
'2012 ONWARDS DETACHED': 'detached',
'1950-66 END TERRACE': 'end-terrace',
'1976-82 MID TERRACED': 'mid-terrace',
'1950-66 MID TERRACE': 'mid-terrace',
'1991-95 DETACHED': 'detached',
'1976-82 END TERRACED': 'end-terrace',
'1967-75 DETACHED': 'detached',
'PRE 1900 DETACHED': 'detached',
'PRE 1900 MID TERRACE': 'mid-terrace',
'1900 DET': 'detached',
'1967-75 MID TERR': 'mid-terrace',
'1930-49 SEMI DET': 'semi-detached',
'1900-29 SEMI DET': 'semi-detached',
'1900-29 MID TERR': 'mid-terrace',
'1983- 90 MID TERR': 'mid-terrace',
'1976-82 MID TERR': 'mid-terrace',
'1983-90 END TERR': 'end-terrace',
'1991-95 SEMI DET': 'semi-detached',
'1983-90 SEMI DET': 'semi-detached',
'1991-95 MID TERR': 'mid-terrace',
'1950-66 SEMI DET': 'semi-detached',
'1900 MID TERR': 'mid-terrace',
'1967-75 SEMI DET': 'semi-detached',
'1983- 90 SEMI DET': 'semi-detached',
'1983-90 MID TERR': 'mid-terrace',
'1976-82 SEMI DET': 'semi-detached',
'PRE 1900 MID TERR': 'mid-terrace',
None: 'unknown',
'SEMI-DETACHED': 'semi-detached',
'DETACHED': 'detached',
'MID TERRACE': 'mid-terrace',
'END TERRACE': 'end-terrace',
'ENCLOSED MID': 'enclosed mid-terrace',
'BUILDING': 'unknown',
'FLAT COMMUNAL FACILITIES': 'unknown',
'MAISONETTE': 'unknown',
'HOUSE': 'unknown',
'FLAT': 'unknown',
'BLOCK': 'unknown'
}

View file

@ -16,5 +16,6 @@ EXISTING_PV_MAPPINGS = {
'PV: 25% roof area, PV: 3.6kWp array': 'already has PV',
'PV: 10% roof area, PV: 2kWp array': 'already has PV',
'PV: 50% roof area': 'already has PV',
'Solar PV': 'already has PV'
'Solar PV': 'already has PV',
'SOLAR PV': 'already has PV'
}

View file

@ -27,7 +27,8 @@ STANDARD_HEATING_SYSTEMS = {
"electric ceiling",
"electric underfloor",
"no heating",
"non-electric underfloor"
"non-electric underfloor",
"warm air heating",
}
HEATING_MAPPINGS = {
@ -292,4 +293,76 @@ HEATING_MAPPINGS = {
'Communal Heating': 'communal heating',
'No Data': 'unknown',
'Boiler System': 'gas condensing boiler',
'Storage heating': 'electric storage heaters',
'Storage heating (HHRSH)': 'high heat retention storage heaters',
'ELECTRIC BOILER': 'electric boiler',
'STORAGE HEATERS': 'electric storage heaters',
'GREENSTAR 24I JUNIOR': 'gas combi boiler',
'generic cond combi post98': 'gas condensing combi',
'SAP TABLE REG COND +98 NO PICTURE OF BOILER': 'gas condensing boiler',
'ECO TEC PRO 28 H COMBI A': 'gas combi boiler',
'GREENSTAR 25I ErP': 'gas combi boiler',
'IDEAL LOGIC MAX COMBI C30': 'gas combi boiler',
'ECO TEC PRO 28 (286/5-3)': 'gas combi boiler',
'IDEAL LOGIC HEAT 30': 'gas boiler, radiators',
'WORCESTER 240': 'gas boiler, radiators',
'ECO TEC PRO 24 (246/5-3)': 'gas combi boiler',
'ECO TEC PRO 28 (OLD)': 'gas combi boiler',
'LOGIC COMBI2 C30': 'gas combi boiler',
'GREENSTAR 28I JUNIOR': 'gas combi boiler',
'WORCESTER 24i': 'gas combi boiler',
'GREENSTAR 30I ErP': 'gas combi boiler',
'25 CDI': 'gas combi boiler',
'GREENSTAR 28CDI COMPACT ErP': 'gas combi boiler',
'GREENSTAR 24 RI': 'gas boiler, radiators',
'BAXI COMBI 105 HE': 'gas combi boiler',
'ECO TEC PRO 28 (OLD TYPE)': 'gas combi boiler',
'WORCESTER 28 SI ll RSF': 'gas combi boiler',
'GREENSTAR 30SI COMPACT ErP': 'gas combi boiler',
'SAP TABLE REG COND +98 NO PICTURE OF CYLINDER': 'gas condensing boiler',
'WORCESTER 24 SI ll RSF': 'gas combi boiler',
'GREENSTAR 4000': 'gas combi boiler',
'GREENSTAR 24i JUNIOR': 'gas combi boiler',
'ECO TEC PRO 24 (OLD TYPE)': 'gas combi boiler',
'GREENSTAR 30SI COMPACT': 'gas combi boiler',
'BAXI DUO TEC 28 COMBI ErP': 'gas combi boiler',
'Not applicable for this asset type': 'unknown',
'Boiler: F rated Regular Boiler': 'gas condensing boiler',
'Warm Air Systems: Electric warm air: Electricaire system': 'warm air heating',
'Boiler: B rated Combi': 'gas condensing combi',
'Boiler: G rated Regular Boiler': 'gas condensing boiler',
'Electric Storage Systems: Modern (slimline) storage heaters': 'electric storage heaters',
'Boiler: C rated CPSU': 'gas condensing combi',
'Boiler: D rated Regular Boiler': 'gas condensing boiler',
'Warm Air Systems: Gas fired warm air with balanced or open flue: Ducted or stub-ducted, on-off control, '
'pre 1998': 'warm air heating',
'Electric Storage Systems: Integrated storage+direct-acting heater': 'electric storage heaters',
'Boiler: D rated Combi': 'gas condensing combi',
'Heat Pump: (from database)': 'air source heat pump',
'Community Heating Systems: Community CHP and boilers (RdSAP)': 'communal heating',
'': 'unknown',
'Solid Fuel Boiler': 'solid fuel',
'Heating (Other)': 'other',
'Solid Fuel Fire Only': 'solid fuel',
'No Main Heat Source': 'no heating',
'Electric Programmable': 'electric storage heaters',
'Linked to Communal Boiler': 'communal heating',
'Bio Mass Boiler': 'solid fuel',
'Electric Non Programmable': 'electric storage heaters',
'Room heaters, Mains gas': 'room heaters',
'Boiler, Solid fuel': 'solid fuel',
'Room heaters, Electricity': 'room heaters',
'Room heaters, Solid fuel': 'room heaters',
'Boiler, Oil': 'oil boiler',
'Boiler, Biomass': 'boiler - other fuel',
'Community heating, Community (non-gas)': 'communal heating',
'Heat pump (wet), Electricity': 'air source heat pump',
'Community heating, Community (mains gas)': 'communal gas boiler',
'Boiler, Electricity': 'electric boiler',
'Boiler, LPG': 'gas boiler, radiators',
'Boiler, Mains gas': 'gas boiler, radiators',
'Storage heating, Electricity': 'electric storage heaters'
}

View file

@ -252,5 +252,24 @@ PROPERTY_MAPPING = {
'Bedsit bungalow semi detached': 'bedsit',
'Bedsit Flat': 'bedsit',
'Semi detached house': 'house',
'Unit': 'unknown'
'Unit': 'unknown',
'HOUSE (3 STOREY)': 'house',
'FLAT GROUND FLOOR': 'flat',
'FLAT TOP FLOOR': 'flat',
'SHARED HOUSE': 'house',
'MAISONETTE': 'maisonette',
'DIRECT ACCESS HOSTEL': 'other',
'Day centre': 'other',
'Care home': 'other',
'BLOCK (Communal)': 'block of flats',
'SHOP': 'other',
'Office Block': 'other',
'BLOCK (Non-Communal)': 'block of flats',
'Refuge': 'other',
None: 'unknown',
'HFOP FLAT': 'flat',
'HFOP BEDSIT': 'bedsit',
'LINKED FLAT': 'flat',
'LINKED BUNGALOW': 'bungalow'
}

View file

@ -6,9 +6,10 @@ STANDARD_ROOF_CONSTRUCTIONS = {
"pitched unknown access to loft",
"piched unknown insulation",
"pitched insulated",
"pitched less than 100mm insulation"
"pitched less than 100mm insulation",
"another dwelling above",
"flat unknown insulation",
"flat insulated",
"unknown insulated",
"unknown",
}
@ -38,4 +39,140 @@ ROOF_CONSTRUCTION_MAPPINGS = {
'200mm': 'pitched insulated',
'0-49mm': 'pitched less than 100mm insulation',
'50mm': 'pitched less than 100mm insulation',
'': 'unknown',
'NR': 'unknown',
'Non-joist': 'unknown',
'25mm': 'pitched less than 100mm insulation',
'400mm+': 'pitched insulated',
'12mm': 'pitched less than 100mm insulation',
'150MM': 'pitched insulated',
'200MM': 'pitched insulated',
'250MM': 'pitched insulated',
'100MM': 'pitched less than 100mm insulation',
'U/K': 'unknown',
'U/K - 250MM RIR FLAT CEILING': 'flat unknown insulation',
'U/K - 200MM RIR FLAT CEILING': 'flat unknown insulation',
'AnotherDwellingAbove: Unknown, PitchedNormalLoftAccess: 100mm': 'another dwelling above',
'PitchedNormalNoLoftAccess: 150mm': 'pitched insulated',
'PitchedNormalLoftAccess: As Built, PitchedNormalNoLoftAccess: None': 'pitched insulated',
'PitchedNormalLoftAccess: 150mm, PitchedNormalNoLoftAccess: Unknown': 'pitched insulated',
'Flat: As Built, PitchedNormalLoftAccess: 200mm': 'flat unknown insulation',
'PitchedNormalLoftAccess: 200mm': 'pitched insulated',
'PitchedNormalNoLoftAccess: 50mm': 'unknown',
'PitchedNormalNoLoftAccess: No Insulation': 'pitched less than 100mm insulation',
'PitchedNormalLoftAccess: 50mm, PitchedNormalNoLoftAccess: None': 'pitched less than 100mm insulation',
'PitchedNormalLoftAccess: 50mm, PitchedNormalLoftAccess: No Insulation': 'pitched less than 100mm insulation',
'PitchedNormalLoftAccess: 150mm': 'unknown', 'Flat: None': 'pitched insulated',
'Flat: As Built, PitchedNormalLoftAccess: 150mm, PitchedNormalNoLoftAccess: None': 'flat unknown insulation',
'PitchedNormalNoLoftAccess: 250mm, PitchedNormalNoLoftAccess: Unknown': 'pitched insulated',
'PitchedNormalLoftAccess: 75mm': 'pitched less than 100mm insulation',
'Flat: Unknown, PitchedNormalLoftAccess: 200mm, SameDwellingAbove: Unknown': 'flat unknown insulation',
'Flat: As Built, PitchedNormalLoftAccess: 100mm, PitchedNormalNoLoftAccess: None': 'flat unknown insulation',
'PitchedNormalNoLoftAccess: 250mm': 'pitched insulated',
'PitchedNormalLoftAccess: 150mm, PitchedNormalNoLoftAccess: 100mm': 'pitched insulated',
'AnotherDwellingAbove: Unknown, PitchedNormalLoftAccess: 300mm': 'another dwelling above',
'PitchedNormalLoftAccess: 200mm, PitchedNormalNoLoftAccess: 50mm': 'pitched insulated',
'Flat: As Built, PitchedNormalNoLoftAccess: 100mm': 'flat unknown insulation',
'PitchedNormalLoftAccess: 250mm, PitchedNormalNoLoftAccess: Unknown': 'pitched insulated',
'PitchedNormalLoftAccess: 100mm, PitchedNormalLoftAccess: 150mm': 'pitched less than 100mm insulation',
'PitchedNormalLoftAccess: 100mm, PitchedNormalLoftAccess: 200mm': 'pitched less than 100mm insulation',
'PitchedNormalNoLoftAccess: 75mm': 'pitched less than 100mm insulation',
'Flat: As Built, PitchedNormalLoftAccess: 25mm': 'flat unknown insulation',
'PitchedNormalLoftAccess: 150mm, SameDwellingAbove': 'pitched insulated',
'PitchedNormalLoftAccess: 150mm, PitchedNormalLoftAccess: 50mm': 'pitched insulated',
'Flat: As Built, PitchedNormalLoftAccess: 100mm': 'flat unknown insulation',
'Flat: As Built, PitchedNormalNoLoftAccess: None': 'flat unknown insulation',
'PitchedNormalLoftAccess: 150mm, PitchedNormalLoftAccess: 200mm': 'pitched insulated',
'PitchedNormalNoLoftAccess: 300mm': 'pitched insulated',
'Flat: As Built, PitchedNormalNoLoftAccess: 150mm': 'flat unknown insulation',
'PitchedNormalLoftAccess: 150mm, PitchedNormalNoLoftAccess: None': 'pitched insulated',
'PitchedNormalNoLoftAccess: 200mm': 'pitched insulated',
'PitchedNormalLoftAccess: 300mm, PitchedNormalNoLoftAccess: Unknown': 'pitched insulated',
'PitchedNormalLoftAccess: None': 'pitched less than 100mm insulation',
'Flat: As Built': 'flat unknown insulation',
'PitchedNormalLoftAccess: 100mm, PitchedNormalLoftAccess: 250mm': 'pitched less than 100mm insulation',
'AnotherDwellingAbove: Unknown, PitchedNormalLoftAccess: 50mm': 'another dwelling above',
'PitchedNormalLoftAccess: 200mm, PitchedNormalLoftAccess: 250mm, PitchedNormalNoLoftAccess: None': 'pitched '
'insulated',
'PitchedNormalLoftAccess: 200mm, PitchedNormalLoftAccess: 250mm': 'pitched insulated',
'Flat: 50mm': 'flat unknown insulation',
'AnotherDwellingAbove: Unknown, PitchedNormalNoLoftAccess: None': 'another dwelling above',
'PitchedNormalNoLoftAccess: None': 'pitched uninsulated',
'AnotherDwellingAbove: Unknown, PitchedNormalLoftAccess: 25mm': 'another dwelling above',
'AnotherDwellingAbove: Unknown, Flat: As Built, PitchedNormalNoLoftAccess: Unknown': 'another dwelling above',
'Flat: As Built, PitchedNormalLoftAccess: 200mm, PitchedNormalNoLoftAccess: Unknown': 'flat unknown insulation',
'Flat: Unknown, PitchedNormalLoftAccess: 75mm, PitchedNormalLoftAccess: Unknown': 'flat unknown insulation',
'PitchedNormalLoftAccess: 100mm, PitchedNormalLoftAccess: Unknown': 'pitched less than 100mm insulation',
'PitchedNormalLoftAccess: 200mm, PitchedNormalLoftAccess: 300mm': 'pitched insulated',
'PitchedNormalLoftAccess: 100mm, PitchedNormalNoLoftAccess: 100mm': 'pitched less than 100mm insulation',
'PitchedNormalLoftAccess: 100mm, PitchedNormalLoftAccess: No Insulation': 'pitched less than 100mm insulation',
'AnotherDwellingAbove: Unknown, Flat: As Built, PitchedNormalLoftAccess: 150mm': 'another dwelling above',
'PitchedNormalLoftAccess: 75mm, PitchedNormalNoLoftAccess: Unknown': 'pitched less than 100mm insulation',
'Flat: As Built, PitchedNormalLoftAccess: 300mm': 'unknown', 'Flat: 100mm': 'flat unknown insulation',
'PitchedNormalNoLoftAccess: 150mm, PitchedNormalNoLoftAccess: Unknown': 'pitched insulated',
'PitchedNormalNoLoftAccess: 100mm': 'pitched less than 100mm insulation',
'PitchedNormalLoftAccess: 12mm': 'pitched less than 100mm insulation',
'AnotherDwellingAbove: Unknown, PitchedNormalNoLoftAccess: 150mm': 'another dwelling above',
'PitchedNormalLoftAccess: No Insulation': 'pitched less than 100mm insulation',
'PitchedNormalLoftAccess: 25mm, PitchedNormalNoLoftAccess: Unknown': 'pitched less than 100mm insulation',
'PitchedNormalLoftAccess: 250mm, PitchedNormalNoLoftAccess: None, PitchedNormalNoLoftAccess: Unknown': 'pitched '
'insulated',
'PitchedNormalNoLoftAccess: 100mm, PitchedNormalNoLoftAccess: Unknown': 'pitched less than 100mm insulation',
'Flat: As Built, PitchedNormalNoLoftAccess: 100mm, PitchedNormalNoLoftAccess: Unknown': 'flat unknown insulation',
'PitchedNormalNoLoftAccess: Unknown, SameDwellingAbove: Unknown': 'pitched no access to loft',
'PitchedNormalLoftAccess: 200mm, PitchedNormalNoLoftAccess: None': 'pitched insulated',
'PitchedNormalLoftAccess: 100mm, PitchedNormalNoLoftAccess: As Built': 'pitched less than 100mm insulation',
'PitchedNormalNoLoftAccess: 50mm, PitchedNormalNoLoftAccess: Unknown': 'pitched less than 100mm insulation',
'Flat: As Built, Flat: Unknown, PitchedNormalLoftAccess: 250mm': 'flat unknown insulation',
'PitchedNormalLoftAccess: 50mm': 'pitched less than 100mm insulation',
'PitchedNormalLoftAccess: 250mm, PitchedNormalNoLoftAccess: None': 'pitched insulated',
'Flat: 100mm, Flat: As Built': 'flat unknown insulation',
'PitchedNormalLoftAccess: 100mm, PitchedNormalNoLoftAccess: Unknown': 'pitched less than 100mm insulation',
'AnotherDwellingAbove: Unknown, PitchedNormalLoftAccess: No Insulation': 'another dwelling above',
'PitchedNormalLoftAccess: 100mm, PitchedNormalNoLoftAccess: None': 'pitched less than 100mm insulation',
'PitchedNormalLoftAccess: 300mm': 'pitched insulated',
'PitchedNormalLoftAccess: 100mm': 'pitched less than 100mm insulation',
'PitchedNormalLoftAccess: 270mm': 'pitched insulated',
'PitchedNormalNoLoftAccess: 200mm, PitchedNormalNoLoftAccess: Unknown': 'pitched insulated',
'PitchedNormalLoftAccess: 200mm, PitchedNormalNoLoftAccess: Unknown': 'pitched insulated',
'Flat: As Built, PitchedNormalLoftAccess: 250mm': 'flat unknown insulation',
'PitchedNormalLoftAccess: 50mm, PitchedNormalLoftAccess: 75mm': 'pitched less than 100mm insulation',
'PitchedNormalLoftAccess: 300mm, PitchedNormalLoftAccess: Unknown': 'pitched insulated',
'Flat: As Built, PitchedNormalNoLoftAccess: 250mm': 'flat unknown insulation',
'Flat: As Built, PitchedNormalNoLoftAccess: 50mm': 'flat unknown insulation',
'Flat: As Built, PitchedNormalLoftAccess: 75mm': 'flat unknown insulation',
'PitchedNormalLoftAccess: 250mm, PitchedNormalNoLoftAccess: No Insulation': 'pitched insulated',
'AnotherDwellingAbove: Unknown, PitchedNormalLoftAccess: 150mm': 'another dwelling above',
'PitchedNormalLoftAccess: 75mm, PitchedNormalNoLoftAccess: No Insulation': 'pitched less than 100mm insulation',
'AnotherDwellingAbove: Unknown, PitchedNormalNoLoftAccess: Unknown': 'another dwelling above',
'Flat: As Built, PitchedNormalNoLoftAccess: 200mm': 'flat unknown insulation',
'Flat: As Built, Flat: Unknown, PitchedNormalLoftAccess: 150mm': 'flat unknown insulation',
'Flat: As Built, PitchedNormalLoftAccess: 150mm': 'flat unknown insulation',
'PitchedNormalLoftAccess: 300mm, PitchedNormalNoLoftAccess: 100mm': 'pitched insulated',
'PitchedNormalLoftAccess: 100mm, PitchedNormalLoftAccess: 75mm': 'pitched less than 100mm insulation',
'AnotherDwellingAbove: Unknown, PitchedNormalNoLoftAccess: 50mm, PitchedNormalNoLoftAccess: No Insulation':
'another dwelling above',
'Flat: As Built, PitchedNormalLoftAccess: 50mm': 'flat unknown insulation',
'PitchedNormalLoftAccess: 25mm': 'pitched less than 100mm insulation',
'PitchedNormalLoftAccess: 50mm, PitchedNormalNoLoftAccess: Unknown': 'pitched less than 100mm insulation',
'PitchedNormalNoLoftAccess: 150mm, PitchedNormalNoLoftAccess: Unknown, PitchedThatched: 25mm': 'pitched insulated',
'Flat: 150mm+': 'flat insulated',
'Flat: Unknown, PitchedNormalLoftAccess: 100mm, PitchedNormalNoLoftAccess: Unknown': 'flat unknown insulation',
'PitchedNormalLoftAccess: 150mm, PitchedNormalLoftAccess: Unknown': 'pitched insulated',
'PitchedNormalLoftAccess: 150mm, PitchedNormalLoftAccess: 250mm': 'pitched insulated',
'Flat: As Built, PitchedNormalLoftAccess: 100mm, PitchedNormalNoLoftAccess: Unknown': 'flat unknown insulation',
'PitchedNormalLoftAccess: 250mm': 'pitched insulated',
'PitchedNormalLoftAccess: 250mm, PitchedNormalLoftAccess: 75mm': 'pitched insulated',
'PitchedNormalLoftAccess: 250mm, PitchedNormalLoftAccess: 50mm': 'pitched insulated',
'AnotherDwellingAbove: Unknown, PitchedNormalLoftAccess: 200mm': 'another dwelling above',
'PitchedNormalNoLoftAccess: Unknown': 'pitched no access to loft',
'PitchedNormalLoftAccess: Unknown': 'pitched unknown insulation',
'AnotherDwellingAbove: Unknown': 'another dwelling above'
}

View file

@ -7,122 +7,163 @@ STANDARD_WALL_CONSTRUCTIONS = {
"uninsulated solid brick", "insulated solid brick", "solid brick unknown insulation",
# Timber Frame
"timber frame unknown insulation", "insulated timber frame", "uninsulated timber frame",
"system built", "granite or whinstone", "other",
"unknown", "sandstone or limestone",
# System
"system built unknown insulation", "insulated system built", "uninsulated system built",
# Granite or Whinstone
"granite or whinstone unknown insulation", "insulated granite or whinstone", "uninsulated granite or whinstone",
# Sandstone or Limestone
"sandstone or limestone unknown insulation", "insulated sandstone or limestone",
"uninsulated sandstone or limestone",
# Other
"other",
"cob",
"new build - average thermal transmittance",
}
WALL_CONSTRUCTION_MAPPINGS = {
"New Build - Average Thermal Transmittance": "new build - average thermal transmittance",
'Average thermal transmittance 0.25 W/m?K': 'unknown',
'Average thermal transmittance 0.25 W/m?K': 'new build - average thermal transmittance',
'Cavity wall, as built, insulated (assumed)': 'filled cavity',
'Average thermal transmittance 0.31 W/m?K': 'unknown',
'Cavity wall, as built, no insulation (assumed)': 'uninsulated cavity',
'Average thermal transmittance 0.30 W/m?K': 'unknown', 'Average thermal transmittance 0.28 W/m-¦K': 'unknown',
'Average thermal transmittance 0.25 W/m-¦K': 'unknown', 'Average thermal transmittance 0.21 W/m-¦K': 'unknown',
'Average thermal transmittance 0.20 W/m-¦K': 'unknown', 'Average thermal transmittance 0.29 W/m?K': 'unknown',
'Average thermal transmittance 0.16 W/m?K': 'unknown',
'Average thermal transmittance 0.27 W/m&#0178;K': 'unknown',
'Average thermal transmittance 0.15 W/m-¦K': 'unknown', 'Average thermal transmittance 0.23 W/m-¦K': 'unknown',
'Average thermal transmittance 0.18 W/m?K': 'unknown',
'Granite or whin, with internal insulation': 'granite or whinstone',
"Granite or whinstone, as built, insulated (assumed)": "granite or whinstone",
'Average thermal transmittance 0.22 W/m-¦K': 'unknown', 'Average thermal transmittance 0.24 W/m?K': 'unknown',
'Average thermal transmittance 0.16 W/m-¦K': 'unknown', 'Average thermal transmittance 0.35 W/m?K': 'unknown',
'Average thermal transmittance 0.26 W/m-¦K': 'unknown', 'Average thermal transmittance 0.62 W/m?K': 'unknown',
'Average thermal transmittance 0.64 W/m?K': 'unknown', 'Average thermal transmittance 0.61 W/m?K': 'unknown',
'Sandstone or limestone, as built, no insulation (assumed)': 'sandstone or limestone',
'Average thermal transmittance 0.33 W/m?K': 'unknown',
'Average thermal transmittance 0.30 W/m?K': 'new build - average thermal transmittance',
'Average thermal transmittance 0.28 W/m-¦K': 'new build - average thermal transmittance',
'Average thermal transmittance 0.25 W/m-¦K': 'new build - average thermal transmittance',
'Average thermal transmittance 0.21 W/m-¦K': 'new build - average thermal transmittance',
'Average thermal transmittance 0.20 W/m-¦K': 'new build - average thermal transmittance',
'Average thermal transmittance 0.29 W/m?K': 'new build - average thermal transmittance',
'Average thermal transmittance 0.16 W/m?K': 'new build - average thermal transmittance',
'Average thermal transmittance 0.27 W/m&#0178;K': 'new build - average thermal transmittance',
'Average thermal transmittance 0.15 W/m-¦K': 'new build - average thermal transmittance',
'Average thermal transmittance 0.23 W/m-¦K': 'new build - average thermal transmittance',
'Average thermal transmittance 0.18 W/m?K': 'new build - average thermal transmittance',
'Granite or whin, with internal insulation': 'insulated granite or whinstone',
"Granite or whinstone, as built, insulated (assumed)": "uninsulated granite or whinstone",
'Average thermal transmittance 0.22 W/m-¦K': 'new build - average thermal transmittance',
'Average thermal transmittance 0.24 W/m?K': 'new build - average thermal transmittance',
'Average thermal transmittance 0.16 W/m-¦K': 'new build - average thermal transmittance',
'Average thermal transmittance 0.35 W/m?K': 'new build - average thermal transmittance',
'Average thermal transmittance 0.26 W/m-¦K': 'new build - average thermal transmittance',
'Average thermal transmittance 0.62 W/m?K': 'new build - average thermal transmittance',
'Average thermal transmittance 0.64 W/m?K': 'new build - average thermal transmittance',
'Average thermal transmittance 0.61 W/m?K': 'new build - average thermal transmittance',
'Sandstone or limestone, as built, no insulation (assumed)': 'uninsulated sandstone or limestone',
'Average thermal transmittance 0.33 W/m?K': 'new build - average thermal transmittance',
'Cavity wall,': "cavity unknown insulation",
'Cavity wall, as built, partial insulation (assumed)': 'partial insulated cavity',
'Average thermal transmittance 0.29 W/m-¦K': 'unknown', 'Average thermal transmittance 0.32 W/m-¦K': 'unknown',
'Average thermal transmittance 0.19 W/m-¦K': 'unknown', 'Average thermal transmittance 0.27 W/m?K': 'unknown',
'Average thermal transmittance 0.22 W/m?K': 'unknown', 'Average thermal transmittance 0.38 W/m?K': 'unknown',
'Average thermal transmittance 0.26 W/m?K': 'unknown', 'Average thermal transmittance 0.27 W/m-¦K': 'unknown',
'Average thermal transmittance 0.18 W/m-¦K': 'unknown', 'Average thermal transmittance = 0.27 W/m?K': 'unknown',
'Cavity wall, with external insulation': 'filled cavity', 'Average thermal transmittance 0.21 W/m?K': 'unknown',
'Average thermal transmittance 0.23 W/m?K': 'unknown', 'Average thermal transmittance 0.20 W/m?K': 'unknown',
'Average thermal transmittance 0.32 W/m?K': 'unknown', 'Average thermal transmittance 0.24 W/m-¦K': 'unknown',
'Average thermal transmittance 0.29 W/m-¦K': 'new build - average thermal transmittance',
'Average thermal transmittance 0.32 W/m-¦K': 'new build - average thermal transmittance',
'Average thermal transmittance 0.19 W/m-¦K': 'new build - average thermal transmittance',
'Average thermal transmittance 0.27 W/m?K': 'new build - average thermal transmittance',
'Average thermal transmittance 0.22 W/m?K': 'new build - average thermal transmittance',
'Average thermal transmittance 0.38 W/m?K': 'new build - average thermal transmittance',
'Average thermal transmittance 0.26 W/m?K': 'new build - average thermal transmittance',
'Average thermal transmittance 0.27 W/m-¦K': 'new build - average thermal transmittance',
'Average thermal transmittance 0.18 W/m-¦K': 'new build - average thermal transmittance',
'Average thermal transmittance = 0.27 W/m?K': 'new build - average thermal transmittance',
'Cavity wall, with external insulation': 'filled cavity',
'Average thermal transmittance 0.21 W/m?K': 'new build - average thermal transmittance',
'Average thermal transmittance 0.23 W/m?K': 'new build - average thermal transmittance',
'Average thermal transmittance 0.20 W/m?K': 'new build - average thermal transmittance',
'Average thermal transmittance 0.32 W/m?K': 'new build - average thermal transmittance',
'Average thermal transmittance 0.24 W/m-¦K': 'new build - average thermal transmittance',
'Cavity wall, with internal insulation': 'filled cavity',
'Average thermal transmittance 0.17 W/m-¦K': 'unknown', 'Average thermal transmittance 0.28 W/m?K': 'unknown',
'Average thermal transmittance 0.17 W/m-¦K': 'new build - average thermal transmittance',
'Average thermal transmittance 0.28 W/m?K': 'new build - average thermal transmittance',
'new build - average thermal transmittance': 'new build - average thermal transmittance',
'average thermal transmittance 0.25 w/m?k': 'unknown',
'average thermal transmittance 0.25 w/m?k': 'new build - average thermal transmittance',
'cavity wall, as built, insulated (assumed)': 'filled cavity',
'average thermal transmittance 0.31 w/m?k': 'unknown',
'average thermal transmittance 0.31 w/m?k': 'new build - average thermal transmittance',
'cavity wall, as built, no insulation (assumed)': 'uninsulated cavity',
'average thermal transmittance 0.30 w/m?k': 'unknown', 'average thermal transmittance 0.28 w/m-¦k': 'unknown',
'average thermal transmittance 0.25 w/m-¦k': 'unknown', 'average thermal transmittance 0.21 w/m-¦k': 'unknown',
'average thermal transmittance 0.20 w/m-¦k': 'unknown', 'average thermal transmittance 0.29 w/m?k': 'unknown',
'average thermal transmittance 0.16 w/m?k': 'unknown', 'average thermal transmittance 0.27 w/m&#0178;k': 'unknown',
'average thermal transmittance 0.15 w/m-¦k': 'unknown', 'average thermal transmittance 0.23 w/m-¦k': 'unknown',
'average thermal transmittance 0.18 w/m?k': 'unknown',
'granite or whin, with internal insulation': 'granite or whinstone',
'average thermal transmittance 0.22 w/m-¦k': 'unknown', 'average thermal transmittance 0.24 w/m?k': 'unknown',
'average thermal transmittance 0.16 w/m-¦k': 'unknown', 'average thermal transmittance 0.35 w/m?k': 'unknown',
'average thermal transmittance 0.26 w/m-¦k': 'unknown', 'average thermal transmittance 0.62 w/m?k': 'unknown',
'average thermal transmittance 0.64 w/m?k': 'unknown', 'average thermal transmittance 0.61 w/m?k': 'unknown',
'sandstone or limestone, as built, no insulation (assumed)': 'sandstone or limestone',
'average thermal transmittance 0.33 w/m?k': 'unknown', 'cavity wall,': "cavity unknown insulation",
'average thermal transmittance 0.30 w/m?k': 'new build - average thermal transmittance',
'average thermal transmittance 0.28 w/m-¦k': 'new build - average thermal transmittance',
'average thermal transmittance 0.25 w/m-¦k': 'new build - average thermal transmittance',
'average thermal transmittance 0.21 w/m-¦k': 'new build - average thermal transmittance',
'average thermal transmittance 0.20 w/m-¦k': 'new build - average thermal transmittance',
'average thermal transmittance 0.29 w/m?k': 'new build - average thermal transmittance',
'average thermal transmittance 0.16 w/m?k': 'new build - average thermal transmittance',
'average thermal transmittance 0.27 w/m&#0178;k': 'new build - average thermal transmittance',
'average thermal transmittance 0.15 w/m-¦k': 'new build - average thermal transmittance',
'average thermal transmittance 0.23 w/m-¦k': 'new build - average thermal transmittance',
'average thermal transmittance 0.18 w/m?k': 'new build - average thermal transmittance',
'granite or whin, with internal insulation': 'insulated granite or whinstone',
'average thermal transmittance 0.22 w/m-¦k': 'new build - average thermal transmittance',
'average thermal transmittance 0.24 w/m?k': 'new build - average thermal transmittance',
'average thermal transmittance 0.16 w/m-¦k': 'new build - average thermal transmittance',
'average thermal transmittance 0.35 w/m?k': 'new build - average thermal transmittance',
'average thermal transmittance 0.26 w/m-¦k': 'new build - average thermal transmittance',
'average thermal transmittance 0.62 w/m?k': 'new build - average thermal transmittance',
'average thermal transmittance 0.64 w/m?k': 'new build - average thermal transmittance',
'average thermal transmittance 0.61 w/m?k': 'new build - average thermal transmittance',
'sandstone or limestone, as built, no insulation (assumed)': 'uninsulated sandstone or limestone',
'average thermal transmittance 0.33 w/m?k': 'new build - average thermal transmittance',
'cavity wall,': "cavity unknown insulation",
'cavity wall, as built, partial insulation (assumed)': 'partial insulated cavity',
'average thermal transmittance 0.29 w/m-¦k': 'unknown', 'average thermal transmittance 0.32 w/m-¦k': 'unknown',
'average thermal transmittance 0.19 w/m-¦k': 'unknown', 'average thermal transmittance 0.27 w/m?k': 'unknown',
'average thermal transmittance 0.22 w/m?k': 'unknown', 'average thermal transmittance 0.38 w/m?k': 'unknown',
'average thermal transmittance 0.26 w/m?k': 'unknown', 'average thermal transmittance 0.27 w/m-¦k': 'unknown',
'average thermal transmittance 0.18 w/m-¦k': 'unknown', 'average thermal transmittance = 0.27 w/m?k': 'unknown',
'cavity wall, with external insulation': 'filled cavity', 'average thermal transmittance 0.21 w/m?k': 'unknown',
'average thermal transmittance 0.23 w/m?k': 'unknown', 'average thermal transmittance 0.20 w/m?k': 'unknown',
'average thermal transmittance 0.32 w/m?k': 'unknown', 'average thermal transmittance 0.24 w/m-¦k': 'unknown',
'cavity wall, with internal insulation': 'filled cavity', 'average thermal transmittance 0.17 w/m-¦k': 'unknown',
'average thermal transmittance 0.28 w/m?k': 'unknown',
'average thermal transmittance 0.29 w/m-¦k': 'new build - average thermal transmittance',
'average thermal transmittance 0.32 w/m-¦k': 'new build - average thermal transmittance',
'average thermal transmittance 0.19 w/m-¦k': 'new build - average thermal transmittance',
'average thermal transmittance 0.27 w/m?k': 'new build - average thermal transmittance',
'average thermal transmittance 0.22 w/m?k': 'new build - average thermal transmittance',
'average thermal transmittance 0.38 w/m?k': 'new build - average thermal transmittance',
'average thermal transmittance 0.26 w/m?k': 'new build - average thermal transmittance',
'average thermal transmittance 0.27 w/m-¦k': 'new build - average thermal transmittance',
'average thermal transmittance 0.18 w/m-¦k': 'new build - average thermal transmittance',
'average thermal transmittance = 0.27 w/m?k': 'new build - average thermal transmittance',
'cavity wall, with external insulation': 'filled cavity',
'average thermal transmittance 0.21 w/m?k': 'new build - average thermal transmittance',
'average thermal transmittance 0.23 w/m?k': 'new build - average thermal transmittance',
'average thermal transmittance 0.20 w/m?k': 'new build - average thermal transmittance',
'average thermal transmittance 0.32 w/m?k': 'new build - average thermal transmittance',
'average thermal transmittance 0.24 w/m-¦k': 'new build - average thermal transmittance',
'cavity wall, with internal insulation': 'filled cavity',
'average thermal transmittance 0.17 w/m-¦k': 'new build - average thermal transmittance',
'average thermal transmittance 0.28 w/m?k': 'new build - average thermal transmittance',
'Cavity wall, filled cavity': 'filled cavity',
'Cavity wall, filled cavity and external insulation': 'filled cavity',
'Granite or whinstone, as built, no insulation (assumed)': 'granite or whinstone',
'Granite or whinstone, as built, no insulation (assumed)': 'uninsulated granite or whinstone',
'Solid brick, as built, insulated (assumed)': 'insulated solid brick',
'Solid brick, as built, no insulation (assumed)': 'uninsulated solid brick',
'Solid brick, with external insulation': 'insulated solid brick',
'Solid brick, with internal insulation': 'insulated solid brick',
'System built, as built, insulated (assumed)': 'system built',
'System built, as built, no insulation (assumed)': 'system built',
'System built, with external insulation': 'system built',
'System built, with internal insulation': 'system built',
'Timber frame, as built, insulated (assumed)': 'timber frame',
'Timber frame, as built, no insulation (assumed)': 'timber frame',
'Timber frame, as built, partial insulation (assumed)': 'timber frame',
'Timber frame, with additional insulation': 'timber frame',
'System built, as built, insulated (assumed)': 'insulated system built',
'System built, as built, no insulation (assumed)': 'uninsulated system built',
'System built, with external insulation': 'insulated system built',
'System built, with internal insulation': 'insulated system built',
'Timber frame, as built, insulated (assumed)': 'insulated timber frame',
'Timber frame, as built, no insulation (assumed)': 'uninsulated timber frame',
'Timber frame, as built, partial insulation (assumed)': 'insulated timber frame',
'Timber frame, with additional insulation': 'insulated timber frame',
'CAVITY': 'cavity unknown insulation',
'COMB': 'unknown',
'NONE': 'unknown',
'NOTKNOWN': 'unknown',
'SOLID': 'solid brick unknown insulation',
np.nan: 'unknown',
'RENDER/TIMBER FRAME': 'timber frame',
'SYSTEM BUILT': 'system built',
'RENDER/TIMBER FRAME': 'timber frame unknown insulation',
'SYSTEM BUILT': 'system built unknown insulation',
'PCC PANELS': 'other',
'NOT APPLICABLE - FLAT': 'unknown',
'BRICK/TIMBER FRAME': 'timber frame',
'BRICK/TIMBER FRAME': 'timber frame unknown insulation',
'BRICK/BLOCK CAVITY': 'cavity unknown insulation',
'STONE SOLID': 'sandstone or limestone',
'EXT CLADDING SYSTEM': 'system built',
'STONE SOLID': 'sandstone or limestone unknown insulation',
'EXT CLADDING SYSTEM': 'system built unknown insulation',
'BRICK/BLOCK SOLID': 'solid brick unknown insulation',
'Cavity Filled cavity (with internal/external)': 'filled cavity',
'ND (inferred) Filled cavity': 'filled cavity',
'Cavity Filled cavity': 'filled cavity',
'Cavity Unknown insulation': 'cavity unknown insulation',
'Timber frame As-built': 'timber frame',
'System build Unknown insulation': 'system built',
'Timber frame As-built': 'uninsulated timber frame',
'System build Unknown insulation': 'system built unknown insulation',
'Cavity As-built': 'uninsulated cavity',
'System build External': 'system built',
'System build External': 'insulated system built',
'ND (inferred) ND (inferred)': 'unknown',
'Solid brick External': 'insulated solid brick',
'Cavity External': 'filled cavity',
'System build As-built': 'system built',
'System build As-built': 'uninsulated system built',
'Solid brick Internal': 'insulated solid brick',
'Cavity Internal': 'filled cavity',
'System build Internal': 'system built',
'Solid brick As-built': 'solid brick unknown insulation',
'System build Internal': 'insulated system built',
'Solid brick As-built': 'uninsulated solid brick',
'Cavity ': 'cavity unknown insulation',
'Solid brick ': 'solid brick unknown insulation',
'Timber frame Timber frame (good insulation)': 'insulated timber frame',
@ -141,88 +182,156 @@ WALL_CONSTRUCTION_MAPPINGS = {
'Cavity: Unknown': 'cavity unknown insulation',
'Cavity: AsBuilt (Post 1995)': 'filled cavity',
'Cavity: AsBuilt (1976-1982)': 'cavity unknown insulation',
'SystemBuilt: AsBuilt': 'system built',
'TimberFrame: AsBuilt': "timber frame unknown insulation",
'Cavity: AsBuilt (1983-1995)': 'cavity unknown insulation',
'SystemBuilt: AsBuilt': 'uninsulated system built',
'TimberFrame: AsBuilt': "uninsulated timber frame",
'Cavity: AsBuilt (1983-1995)': 'filled cavity',
'Cavity: AsBuilt (1983-1995), Cavity: FilledCavity': 'filled cavity',
'SolidBrick: AsBuilt': 'solid brick unknown insulation',
'SolidBrick: AsBuilt': 'uninsulated solid brick',
'Cavity: FilledCavity': 'filled cavity',
'SolidBrick: Internal': 'insulated solid brick',
'Cavity: External': 'filled cavity',
'Sandstone: Internal': 'sandstone or limestone',
'Cavity: AsBuilt (Pre 1976)': 'cavity unknown insulation',
'System build': 'system built',
'Sandstone: Internal': 'insulated sandstone or limestone',
'Cavity: AsBuilt (Pre 1976)': 'uninsulated cavity',
'System build': 'system built unknown insulation',
'Solid brick': 'solid brick unknown insulation',
'Stone': 'sandstone or limestone',
'Stone': 'sandstone or limestone unknown insulation',
'Timber frame': 'timber frame unknown insulation',
'2017 onwards': 'new build - average thermal transmittance',
'ND (inferred)': 'unknown',
'Flat / maisonette': 'other',
'Other': 'other',
'Flat / maisonette': 'unknown',
'Other': 'unknown',
'Timber Frame': 'timber frame unknown insulation',
'Cavity Wall': 'cavity unknown insulation',
'Non-Traditional': 'system built',
'PRC': 'system built',
'Cross Wall': 'system built',
'Non-Traditional': 'system built unknown insulation',
'PRC': 'system built unknown insulation',
'Cross Wall': 'system built unknown insulation',
'Solid Wall': 'solid brick unknown insulation',
'Traditional': 'unknown',
'Solid': 'solid brick unknown insulation',
'Wates no fines': 'system built',
'Concrete Frame': 'system built',
'PRCWATES': 'system built',
'Refurbished Cornish': 'system built',
'Wates no fines': 'system built unknown insulation',
'Concrete Frame': 'system built unknown insulation',
'PRCWATES': 'system built unknown insulation',
'Refurbished Cornish': 'system built unknown insulation',
'Bailey Stratton': 'other',
'Refurbished Reema': 'system built',
'PRCREEMA': 'system built',
'Trustsell Type': 'system built',
'Refurbished Reema': 'system built unknown insulation',
'PRCREEMA': 'system built unknown insulation',
'Trustsell Type': 'system built unknown insulation',
'Petra Nissan': 'unknown',
'Reinstated Airey': 'system built',
'Refurbished Airey': 'system built',
'Reinstated Airey': 'system built unknown insulation',
'Refurbished Airey': 'system built unknown insulation',
# From Abri- slightly unclear on types but not a large portion of the data
'No Fines Type': 'system built',
'Refurbished Unity': 'system built',
'No Fines Type': 'system built unknown insulation',
'Refurbished Unity': 'system built unknown insulation',
'Timber Framed': 'timber frame unknown insulation',
'Refurbished Woolaway': 'system built',
'Refurbished Woolaway': 'system built unknown insulation',
'Modern Methods of Construction': 'other',
'BISF - Brit Iron & Steel Federation': 'system built',
'Steel Framed': 'system built',
'BISF - Brit Iron & Steel Federation': 'system built unknown insulation',
'Steel Framed': 'system built unknown insulation',
'Timber Framed with confirmed Fire Stopping': 'timber frame unknown insulation',
'Sipporex': 'system built',
'Sipporex': 'system built unknown insulation',
'Wates': 'system built',
'Bryants': 'system built',
'Gregory (Crosswall)': 'system built',
'Rsmit': 'system built',
'Dorman Long': 'system built',
'Tarmac': 'system built',
'RBIS': 'system built',
'Five Oaks': 'system built',
'Wates': 'system built unknown insulation',
'Bryants': 'system built unknown insulation',
'Gregory (Crosswall)': 'system built unknown insulation',
'Rsmit': 'system built unknown insulation',
'Dorman Long': 'system built unknown insulation',
'Tarmac': 'system built unknown insulation',
'RBIS': 'system built unknown insulation',
'Five Oaks': 'system built unknown insulation',
'Not known': 'unknown',
'Smiths': 'system built',
'Kendrick': 'system built',
'IDC': 'system built',
'Wimpey (Part Brick)': 'system built',
'Whitehall': 'system built',
'Wimpey': 'system built',
'Bison': 'system built',
'Zinns': 'system built',
'Bisf': 'system built',
'Integer': 'system built',
'Cornish': 'system built',
'Rwate': 'system built',
'Hill Presweld Steel': 'system built',
'Smiths': 'system built unknown insulation',
'Kendrick': 'system built unknown insulation',
'IDC': 'system built unknown insulation',
'Wimpey (Part Brick)': 'system built unknown insulation',
'Whitehall': 'system built unknown insulation',
'Wimpey': 'system built unknown insulation',
'Bison': 'system built unknown insulation',
'Zinns': 'system built unknown insulation',
'Bisf': 'system built unknown insulation',
'Integer': 'system built unknown insulation',
'Cornish': 'system built unknown insulation',
'Rwate': 'system built unknown insulation',
'Hill Presweld Steel': 'system built unknown insulation',
'Cavity Filled Cavity': 'filled cavity',
'Cavity Unknown': 'cavity unknown insulation',
'Cavity Filled Cavity (internal)': 'filled cavity',
'': 'unknown',
'Cavity Internal Insulation': 'filled cavity',
'Cavity As Built': "uninsulated cavity",
'Non Trad Large Panel System': 'system built',
'Non Trad Cornish': 'system built',
'Non Trad Reema': 'system built',
'Non Trad Large Panel System': 'system built unknown insulation',
'Non Trad Cornish': 'system built unknown insulation',
'Non Trad Reema': 'system built unknown insulation',
'Traditional Cavity Brickwork': 'cavity unknown insulation',
'System build (undefined)': 'system built',
'Non Trad Wimpey': 'system built',
'Non Trad Wates': 'system built'
'System build (undefined)': 'system built unknown insulation',
'Non Trad Wimpey': 'system built unknown insulation',
'Non Trad Wates': 'system built unknown insulation',
'CAVITY FILLED 270MM': 'filled cavity',
'CAVITY FILLED 270MM': 'filled cavity',
'CAVITY FILLED 250MM': 'filled cavity',
'CAVITY FILLED 260MM': 'filled cavity',
'CAVITY FILLED 260MM': 'filled cavity',
'SOLID A/B 220MM': 'solid brick unknown insulation',
'CAVITY A/B 300MM': "uninsulated cavity",
'CAVITY A/B 250MM': "uninsulated cavity",
'CAVITY A/B 260MM': "uninsulated cavity",
'CAVITY A/B 270MM': "uninsulated cavity",
'SOLID BRICK/CAVITY EXT': 'solid brick unknown insulation',
'CAVITY EWI': 'filled cavity',
'SANDSTONE/CAVITY EXT': 'sandstone or limestone unknown insulation',
'SYSTEM BUILD 100MM EWI': 'insulated system built',
'CAVITY A/B 260MM': "uninsulated cavity",
'CAVITY A/B 270MM': "uninsulated cavity",
'CAVITY A/B 250MM': "uninsulated cavity",
'System': 'system built unknown insulation',
'Sandstone/Limestone': 'sandstone or limestone unknown insulation',
'No Fines': 'system built unknown insulation',
'Granite/Whinstone': 'granite or whinstone unknown insulation',
'Not applicable to this asset type': 'unknown',
'Steel Frame': 'system built unknown insulation',
'Solid Wall As Built': 'uninsulated solid brick',
'Solid As Built': 'uninsulated solid brick',
'Cavity: FilledCavity, Cavity: Unknown': 'filled cavity',
'Cavity: AsBuilt (Pre 1976), TimberFrame: Unknown': 'uninsulated cavity',
'SolidBrick: AsBuilt, SolidBrick: Unknown': 'uninsulated solid brick',
'Cavity: FilledCavity, SolidBrick: Unknown': 'filled cavity',
'Cavity: AsBuilt (Pre 1976), SolidBrick: Unknown': 'uninsulated cavity',
'Cavity: FilledCavity, TimberFrame: Unknown': 'filled cavity',
'Cavity: AsBuilt (1976-1982), Cavity: Unknown': 'uninsulated cavity',
'Cavity: Unknown, SolidBrick: AsBuilt': 'cavity unknown insulation',
'Cavity: AsBuilt (1976-1982), Cavity: FilledCavity': 'filled cavity',
'Cavity: External, Cavity: FilledCavity': 'filled cavity',
'Cavity: AsBuilt (Post 1995), TimberFrame: AsBuilt': 'filled cavity',
'TimberFrame: AsBuilt, TimberFrame: Internal': 'timber frame unknown insulation',
'GraniteOrWhinstone: AsBuilt': 'uninsulated granite or whinstone',
'Cavity: AsBuilt (Post 1995), Cavity: FilledCavity, SolidBrick: Internal': 'filled cavity',
'Cavity: AsBuilt (Pre 1976), Cavity: FilledCavity': 'filled cavity',
'SolidBrick: AsBuilt, SolidBrick: External': 'insulated solid brick',
'Cavity: AsBuilt (Post 1995), Cavity: FilledCavity': 'filled cavity',
'Cavity: FilledCavity, SolidBrick: Internal': 'filled cavity',
'Cavity: AsBuilt (Post 1995), Cavity: FilledCavity, SolidBrick: Unknown': 'filled cavity',
'Cavity: AsBuilt (Pre 1976), SolidBrick: AsBuilt': 'uninsulated cavity',
'Cavity: AsBuilt (1976-1982), SolidBrick: AsBuilt': 'filled cavity',
'Cavity: FilledCavity, SolidBrick: AsBuilt': 'filled cavity',
'SolidBrick: External': 'insulated solid brick',
'Cavity: FilledCavity, Cavity: Internal': 'filled cavity',
'Cavity: External, SolidBrick: AsBuilt': 'filled cavity',
'SolidBrick: AsBuilt, TimberFrame: AsBuilt': 'uninsulated solid brick',
'Cavity: FilledCavity, SystemBuilt: AsBuilt': 'filled cavity',
'Cavity: AsBuilt (1976-1982), SystemBuilt: AsBuilt': 'system built',
'Cavity: AsBuilt (Post 1995), SolidBrick: AsBuilt': 'filled cavity',
'Cavity: AsBuilt (1983-1995), TimberFrame: AsBuilt': 'filled cavity',
'SystemBuilt: AsBuilt, TimberFrame: AsBuilt': 'uninsulated system built',
'TimberFrame: Internal': 'insulated timber frame',
'Cavity: Internal': 'filled cavity',
'SystemBuilt: External': 'filled cavity',
'Cavity: AsBuilt (Pre 1976), SystemBuilt: AsBuilt': 'uninsulated cavity',
'SystemBuilt: Internal': 'insulated system built',
'Cavity: AsBuilt (1983-1995), SolidBrick: AsBuilt': 'solid brick unknown insulation',
'Cavity: AsBuilt (Pre 1976), TimberFrame: AsBuilt': 'timber frame unknown insulation',
'SolidBrick: AsBuilt, SolidBrick: Internal': 'uninsulated solid brick',
'Cavity: FilledCavity, TimberFrame: AsBuilt': 'filled cavity',
'Cavity: FilledCavity, SolidBrick: AsBuilt, SolidBrick: Internal': 'filled cavity',
'Cavity: Internal, SolidBrick: AsBuilt': 'filled cavity',
}

View file

@ -6,7 +6,10 @@ epc-api-python==1.0.2
thefuzz
boto3
openpyxl
openai
openai>=1.3.5
tiktoken
msgpack
beautifulsoup4
beautifulsoup4
pydantic>=1.10.7
typing-extensions>=4.5.0
requests>=2.28.2

View file

@ -1,5 +1,5 @@
import time
import numpy as np
import random
import pandas as pd
from backend.SearchEpc import SearchEpc
from etl.find_my_epc.RetrieveFindMyEpc import RetrieveFindMyEpc
@ -37,7 +37,9 @@ def get_data(
"mid-terrace": "Mid-Terrace",
"end-terrace": "End-Terrace",
"semi-detached": "Semi-Detached",
"detached": "Detached"
"detached": "Detached",
"enclosed end-terrace": "End-Terrace",
"enclosed mid-terrace": "Mid-Terrace",
}
epc_data = []
@ -79,7 +81,13 @@ def get_data(
uprn=uprn
)
# Force the skipping of estimating the EPC
searcher.ordnance_survey_client.property_type = None
# We check if the property was split
if home.get("is_expended_block"):
searcher.ordnance_survey_client.property_type = "Flat"
searcher.property_type = "Flat"
searcher.set_strict_property_type_search()
else:
searcher.ordnance_survey_client.property_type = None
searcher.ordnance_survey_client.built_form = None
searcher.find_property(skip_os=True)
@ -95,7 +103,6 @@ def get_data(
else:
# Try splitting on space
add1 = full_address.split(" ")[0].strip()
else:
add1 = str(house_number)
searcher = SearchEpc(
@ -166,7 +173,7 @@ def get_data(
find_epc_data = {}
except Exception as e:
raise Exception(f"Error retrieving FindMyEPC data: {e}")
time.sleep(np.random.uniform(0.1, 1))
time.sleep(random.sample(range(50, 100), 1)[0] / 100)
epc = {
row_id_name: home[row_id_name],
@ -176,6 +183,11 @@ def get_data(
}
epc_data.append(epc)
if len(epc_data) % 50 == 0 and len(epc_data) > 0:
logger.info("Sleeping for 10 seconds to avoid hitting API rate limit")
time.sleep(10)
except Exception as e:
errors.append(home[row_id_name])
time.sleep(5)

View file

@ -5,7 +5,7 @@ from typing import List
from backend.app.plan.schemas import HousingType
class Funding:
class FundingOld:
"""
Given a property, this class identifies if the home is possibly eligible for funding under
the various funding schemes. It will also calculate the expected amount of funding available
@ -411,3 +411,190 @@ class Funding:
self.gbis()
# self.eco4()
self.whlg()
class Funding:
"""
New class to handle funding calculation
"""
def __init__(
self,
tenure: HousingType,
social_cavity_abs_rate: float,
social_solid_abs_rate: float,
private_cavity_abs_rate: float,
private_solid_abs_rate: float,
project_scores_matrix,
whlg_eligible_postcodes
):
self.tenure = tenure
self.social_cavity_abs_rate = social_cavity_abs_rate
self.social_solid_abs_rate = social_solid_abs_rate
self.private_cavity_abs_rate = private_cavity_abs_rate
self.private_solid_abs_rate = private_solid_abs_rate
self.starting_sap_band = None
self.ending_sap_band = None
self.floor_area_band = None
self.project_scores_matrix = project_scores_matrix
self.whlg_eligible_postcodes = whlg_eligible_postcodes
@staticmethod
def get_sap_band(sap_score_number):
bands = [
("High_A", 96, float("inf")),
("Low_A", 92, 96),
("High_B", 86, 92),
("Low_B", 81, 86),
("High_C", 74.5, 81),
("Low_C", 69, 74.5),
("High_D", 61.5, 69),
("Low_D", 55, 61.5),
("High_E", 46.5, 55),
("Low_E", 39, 46.5),
("High_F", 29.5, 39),
("Low_F", 21, 29.5),
("High_G", 10.5, 21),
("Low_G", 1, 10.5),
]
for band, lower, upper in bands:
if lower <= sap_score_number < upper:
return band
return None
@staticmethod
def get_floor_area_band(floor_area):
if floor_area <= 72:
return "0-72"
if floor_area <= 97:
return "73-97"
if floor_area <= 199:
return "98-199"
return "200"
@staticmethod
def eco4_prs_eligibility(
starting_sap: int, measures: List, mainheat_description: str, heating_control_description: str
):
"""
Handles the eligibility criteria for private rental properties under eco
:return:
"""
# Help to heat group
# 1) EPC E - G
# 2) Must receive one of SWI, FTCH, renewable heating or DHC
# 3) Tenant must be on benefits
# We don't consider the tenant being on benefits - we just notify the end user that this is a requirement
meets_epc = starting_sap <= 54
has_solid_wall = "internal_wall_insulation" in measures or "external_wall_insulation" in measures
# We check if the property has a heating system that means solar pv counts as a renewable heating system
has_eligible_electric_heating = any(x in mainheat_description for x in [
"air source heat pump", "ground source heat pump", "boiler and radiators, electric"
]) | (("electric storage heaters" in mainheat_description) and
(heating_control_description.lower() == "controls for high heat retention storage heaters")
)
# Counts as renewable heating
solar_renweable_heating = has_eligible_electric_heating & ("solar_pv" in measures)
# Is a renewable heating
ashp = "air_source_heat_pump" in measures
if meets_epc & (solar_renweable_heating or ashp or has_solid_wall):
return True
return False
def calculate_full_project_abs(self):
# Filter the project scores matrix
data = self.project_scores_matrix[
(self.project_scores_matrix["Floor Area Segment"] == self.floor_area_band) &
(self.project_scores_matrix["Starting Band"] == self.starting_sap_band) &
(self.project_scores_matrix["Finishing Band"] == self.ending_sap_band)
]
if data.emtpy:
raise ValueError("Missing abs rate, check the project scores matrix")
return data["Cost Savings"].values[0]
def check_funding(
self, measures: List,
starting_sap: int,
ending_sap: int,
floor_area: float,
mainheat_description: str,
heating_control_description: str,
is_cavity: bool
):
"""
Given a list of measures, this function will check if the package of measures is fundable
:param measures:
:param starting_sap:
:param ending_sap:
:param floor_area:
:param mainheat_description:
:param heating_control_description:
:param is_cavity: Indicates if the property has cavity wall insulation
:return:
"""
# If it's an E or D, should get to an EPC C
if starting_sap >= 55 and ending_sap < 69:
raise NotImplementedError("This property doesn't have sufficient SAP movement")
if starting_sap <= 38 & ending_sap <= 55:
# F or G should get to D
raise NotImplementedError("Implement F or G to D eligibility")
self.starting_sap_band = self.get_sap_band(starting_sap)
self.ending_sap_band = self.get_sap_band(ending_sap)
self.floor_area_band = self.get_floor_area_band(floor_area)
########################
# Private
########################
# 1) ECO4
# 2) GBIS
if self.tenure == "Private":
is_eco4_eligible = self.eco4_prs_eligibility(
starting_sap=starting_sap,
measures=measures,
mainheat_description=mainheat_description,
heating_control_description=heating_control_description
)
# Need to implement
# 1) Package has to include an insulation measure
# 2) We should use the funding for the measure that has the largest partial project score
is_gbis_eligible = ()
if not is_eco4_eligible:
return
eco4_abs = self.calculate_full_project_abs()
# We estimate rates now
eco4_funding = (
eco4_abs * self.private_cavity_abs_rate if is_cavity else eco4_abs & self.private_solid_abs_rate
)
########################
# Social
########################
# 1) ECO4
# 2) GBIS
if self.tenure == "Social":
pass
raise NotImplementedError("Only implemented for Private or Social housing")

View file

@ -217,6 +217,9 @@ class Property:
self.eco4_eligibility = None
self.whlg_eligibility = None
# Ventilation
self.has_ventilation = self.identify_ventilation()
@classmethod
def extract_kwargs(cls, kwargs):
"""
@ -1197,7 +1200,7 @@ class Property:
self.heating_energy_source = self.heating_energy_source[0]
if self.heating_energy_source == "Varied (Community Scheme)":
if self.main_fuel["fuel_type"] == "mains gas":
if self.main_fuel["fuel_type"] in ["mains gas", None]: # We assume when None as it's unknown
self.heating_energy_source = "Natural Gas (Community Scheme)"
else:
raise Exception("Implement me")
@ -1233,6 +1236,13 @@ class Property:
if "air_source_heat_pump" not in measures:
return False
# If we have a house over a floor area threshold, we recommend an ASHP
if (
self.data["property-type"] in ["House", "Bungalow"] and
self.floor_area > assumptions.ASHP_FLOOR_AREA_THRESHOLD
):
return True
suitable_house = self.data["property-type"] == "House" and self.data["built-form"] in [
"Detached", "Semi-Detached", "End-Terrace",
]
@ -1342,3 +1352,12 @@ class Property:
self.gbis_eligibiltiy = funding_calulator.gbis_eligibiltiy
self.eco4_eligibility = funding_calulator.eco4_eligibility
self.whlg_eligibility = funding_calulator.whlg_eligibility
def identify_ventilation(self):
ventilation_descriptions = [
'mechanical, extract only',
'mechanical, supply and extract'
]
return self.data["mechanical-ventilation"] in ventilation_descriptions

View file

@ -160,6 +160,9 @@ class SearchEpc:
"""
Address lines 1 and postcode are mandatory fields. The other address lines are optional
but can be used to find the epc for the home, if address1 and postcode are insufficient
If you wish to run a strict property type search, please run set_strict_property_type_search()
:param address1: string, propery's address line 1
:param postcode: string, propery's postcode
:param full_address: string, optional parameter, the full address of the property
@ -189,6 +192,7 @@ class SearchEpc:
self.older_epcs = None
self.full_sap_epc = None
self.metadata = None
self.strict_property_type_search = False
# These are the address and postcode values, which we store in the database
self.address_clean = None
@ -199,6 +203,14 @@ class SearchEpc:
self.property_type = property_type
self.fast = fast
def set_strict_property_type_search(self):
"""
This method sets the strict property type search flag to True. When this flag is set, the search will
only return results that match the specified property type.
:return:
"""
self.strict_property_type_search = True
@staticmethod
def get_house_number(address: str, postcode=None) -> str | None:
"""
@ -315,6 +327,8 @@ class SearchEpc:
address_params["address"] = self.address1
if self.postcode:
address_params["postcode"] = self.postcode
if self.strict_property_type_search and self.property_type:
address_params["property-type"] = self.property_type.lower()
# We attempt the search with uprn params
@ -365,11 +379,16 @@ class SearchEpc:
unique_property_types = {r["property-type"] for r in rows}
is_just_a_house = (len(unique_property_types) == 1) & (
("House" in unique_property_types) | ("Bungalow" in unique_property_types)
)
# We allow for variation in property type across flats/maisonettes
# If we know that we have a flat/maisonette, we allow for both property types
if property_type in ["Flat", "Maisonette"]:
if ((len(uprns) == 1) and ((len(unique_property_types) == 1)
) or unique_property_types == {"Flat", "Maisonette"}):
# Make sure we have not JUST a house, or not JUST a flat/maisonette
if property_type in ["Flat", "Maisonette"] and not is_just_a_house:
if (((len(uprns) == 1) and ((len(unique_property_types) == 1)
) or unique_property_types == {"Flat", "Maisonette"})):
return rows
if property_type is not None:
@ -424,6 +443,8 @@ class SearchEpc:
return rows
raise ValueError("property type and address cannot both be None, at least one must be provided")
@staticmethod
def format_address(newest_epc):
"""
@ -702,6 +723,18 @@ class SearchEpc:
exclude_old=exclude_old
)
# Check if it's a new build EPC. A property that doesn't have an EPC is not going to be a new build
# so we avoid comparing it to new builds
# TODO - this is experimental
newer_age_bands = [
"England and Wales: 1996-2002", "England and Wales: 2003-2006", "England and Wales: 2007-2011",
"England and Wales: 2012 onwards"
]
if (~epc_data["construction-age-band"].isin(newer_age_bands)).sum():
# We have some older age bands, so we need to filter them out
epc_data = epc_data[~epc_data["construction-age-band"].isin(newer_age_bands)].copy()
# If we have missing lodgment date, we fill it with inspection-date
epc_data["lodgement-datetime"] = epc_data["lodgement-datetime"].fillna(epc_data["inspection-date"])
# If we still have missing dates, we set it to the mean of the non NA dates

View file

@ -58,6 +58,19 @@ DESCRIPTIONS_TO_FUEL_TYPES = {
"Room heaters, wood logs": {"fuel": "Wood Logs", "cop": 1},
"Boiler and radiators, coal": {"fuel": "Coal", "cop": 0.85},
"From main system, no cylinderstat": {"fuel": "Natural Gas", "cop": 0.85},
"Room heaters, coal": {"fuel": "Coal", "cop": 0.85},
"Electric underfloor heating, Electric storage heaters": {"fuel": "Electricity", "cop": 1},
'Room heaters, electric, Boiler and radiators, mains gas': {"fuel": "Natural Gas", "cop": 0.85},
'Boiler and radiators, mains gas, Boiler and radiators, mains gas': {"fuel": "Natural Gas", "cop": 0.85},
'Room heaters, electric, Electric storage heaters': {"fuel": "Electricity", "cop": 1},
"Boiler and radiators, mains gas, Electric storage heaters": {"fuel": "Natural Gas", "cop": 0.85},
"Boiler and radiators, anthracite": {"fuel": "Anthracite", "cop": 0.85},
'Electric immersion, off-peak, plus solar': {"fuel": "Electricity + Solar Thermal", "cop": 1},
'Ground source heat pump, radiators, electric': {
"fuel": "Electricity", "cop": AVERAGE_ASHP_EFFICIENCY / 100
},
'Electric instantaneous at point of use, plus solar': {"fuel": "Electricity + Solar Thermal", "cop": 1},
"Electric storage heaters, Room heaters, electric": {"fuel": "Electricity", "cop": 1},
}
# These are the measure types where if there is a ventilation recommendation, we force the inclusion of it
@ -65,3 +78,6 @@ DESCRIPTIONS_TO_FUEL_TYPES = {
measures_needing_ventilation = [
"internal_wall_insulation", "external_wall_insulation", "cavity_wall_insulation"
]
# If we have a property beyond this size, we assume it's likely large enough to have an ASHP
ASHP_FLOOR_AREA_THRESHOLD = 120 # m2

View file

@ -96,3 +96,7 @@ class PlanTriggerRequest(BaseModel):
# When performing a remote assessment, if this has been set, it will allow the engine to
# pull data from the find my epc website, to utilise as part of a remote assessment
event_type: Optional[Literal["remote_assessment"]] = None
# If true, before optimising the engine will select a slightly larger package, to account for the SAP 10 causing
# scores to drop by a few points
simulate_sap_10: Optional[bool] = False

View file

@ -30,7 +30,6 @@ import backend.app.assumptions as assumptions
from backend.ml_models.api import ModelApi
from backend.Property import Property
from backend.Funding import Funding
from backend.apis.GoogleSolarApi import GoogleSolarApi
from recommendations.optimiser.CostOptimiser import CostOptimiser
@ -507,7 +506,7 @@ async def model_engine(body: PlanTriggerRequest):
)
# if we have a remote assment data type, we pull the additional data and include it
if body.event_type == "remote_assessment":
if (body.event_type == "remote_assessment") and not (epc_searcher.newest_epc.get("estimated")):
logger.info("Retrieving find my epc data")
try:
property_non_invasive_recommendations, patch = RetrieveFindMyEpc.get_from_epc(
@ -728,7 +727,8 @@ async def model_engine(body: PlanTriggerRequest):
# Additionally, if we have required measures, they should also be included. Therefore
# we can discount the number of points required to get to the target SAP band (or increase)
# in the case of ventilation
needs_ventilation = any(x in property_measure_types for x in assumptions.measures_needing_ventilation)
needs_ventilation = any(
x in property_measure_types for x in assumptions.measures_needing_ventilation) and not p.has_ventilation
input_measures = prepare_input_measures(measures_to_optimise, body.goal, needs_ventilation)
@ -772,6 +772,10 @@ async def model_engine(body: PlanTriggerRequest):
epc_to_sap_lower_bound(body.goal_value) - current_sap_points
) - fixed_gain
if body.simulate_sap_10:
# We add 3 additional SAP points to the required gain to account for SAP 10
sap_gain += 3
if not body.optimise:
if body.goal != "Increasing EPC":
raise NotImplementedError("Only EPC optimisation is currently supported")
@ -826,7 +830,11 @@ async def model_engine(body: PlanTriggerRequest):
)
# If wall insulation is selected, we also include mechanical ventilation as a best practice measure
if any(x in [r["type"] for r in solution] for x in assumptions.measures_needing_ventilation):
ventilation_selected = [
r for r in solution if "+mechanical_ventilation" in r["type"]
]
if (any(x in [r["type"] for r in solution] for x in assumptions.measures_needing_ventilation) or
len(ventilation_selected)):
ventilation_rec = next(
(r[0] for r in recommendations[p.id] if r[0]["type"] == "mechanical_ventilation"),
None

View file

@ -28,8 +28,8 @@ class AnnualBillSavings:
# Latest price cap figures from Ofgem are for April 2024
# https://www.ofgem.gov.uk/energy-price-cap
ELECTRICITY_PRICE_CAP = 0.2486
GAS_PRICE_CAP = 0.0634
ELECTRICITY_PRICE_CAP = 0.2573
GAS_PRICE_CAP = 0.0633
# This is the most recent export payment figure, at 9.28p/kWh
# Smart export guarantee rates can be found here:
# https://www.sunsave.energy/solar-panels-advice/exporting-to-the-grid/best-seg-rates
@ -39,8 +39,8 @@ class AnnualBillSavings:
PRICE_FACTOR = 0.09549999999999999
# Daily standard charge, based on average across England, Scotland and Wales, and includes VAT
DAILY_STANDARD_CHARGE_GAS = 0.3165
DAILY_STANDARD_CHARGE_ELECTRICITY = 0.6097
DAILY_STANDARD_CHARGE_GAS = 0.2982
DAILY_STANDARD_CHARGE_ELECTRICITY = 0.5137
# Based on https://www.nottenergy.com/advice-and-tools/project-energy-cost-comparison
# For July 2024. These quotes are based on the east midlands region, so we

View file

@ -0,0 +1,52 @@
import pytest
import pandas as pd
from utils.s3 import read_csv_from_s3
from backend.Funding import Funding
def get_funding_data():
"""
This function retrieves the eco project scores matrix and the warm homes local grant funding data
:return:
"""
project_scores_matrix = read_csv_from_s3(
bucket_name="retrofit-data-dev",
filepath="funding/ECO4 Full Project Scores Matrix.csv",
)
project_scores_matrix = pd.DataFrame(project_scores_matrix)
project_scores_matrix.columns = ['Floor Area Segment', 'Starting Band', 'Finishing Band', 'Cost Savings']
project_scores_matrix["Cost Savings"] = project_scores_matrix["Cost Savings"].astype(float)
whlg_eligible_postcodes = read_csv_from_s3(
bucket_name="retrofit-data-dev",
filepath="funding/whlg eligible postcodes.csv",
)
whlg_eligible_postcodes = pd.DataFrame(whlg_eligible_postcodes)
return project_scores_matrix, whlg_eligible_postcodes
class TestFunding:
def test_prs(self):
eco_project_scores_matrix, whlg_eligible_postcodes = get_funding_data()
funding = Funding(
project_scores_matrix=eco_project_scores_matrix,
whlg_eligible_postcodes=whlg_eligible_postcodes,
social_cavity_abs_rate=13.5,
social_solid_abs_rate=17,
private_cavity_abs_rate=13.5,
private_solid_abs_rate=17,
tenure="Private",
)
measures_1 = ["internal_wall_insulation", "solar_pv"]
funding.check_funding(
measures=measures_1,
starting_sap=54,
ending_sap=69,
floor_area=73,
mainheat_description="Boiler and radiators, mains gas",
heating_control_description="Programmer, room thermostat and TRVs",
is_cavity=True
)

View file

@ -0,0 +1,38 @@
"""
Brentwood sent us a new asset list in July 2025. This script will combine the data in the new asset list with the
old, so we have a single picture
"""
import pandas as pd
new_asset_list = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Brentwood/July 2025 New Programme/All Assets "
"29.05.2025.xlsx",
sheet_name="Sheet1",
header=1
)
old_asset_list = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Brentwood/July 2025 New Programme/BRENTWOOD Asset "
"list.xlsx",
sheet_name="Asset List"
)
# We combine based on the data we want
compiled = new_asset_list.merge(
old_asset_list[["UPRN", "Asset Type", "Year Built", "Dwelling", "Bedrooms", "Ownership", 'Asbestos Full Survey',
'Stock Condition Survey', 'Cat', 'Heating',
'WFT Findings', 'ECO Eligibility', 'CIGA Requested', 'CIGA Guarantee',
'ECO Survey completed']],
how="left",
on="UPRN"
)
compiled["WFT Findings"] = compiled["WFT Findings"].fillna("Not Inspected")
# Store this data
compiled.to_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Brentwood/July 2025 New Programme/20250710 Asset List "
"Brentwood.xlsx",
index=False
)

View file

@ -0,0 +1,54 @@
import pandas as pd
comments_df = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Colchester/July Finalised "
"Project/CBH_RetroTeamList_amended_25-06-05.xlsx",
)
cavity_route = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Colchester/July Finalised "
"Project/20250708 Colchester Borough Homes- Standardised.xlsx",
sheet_name="July 2025 Route - Cavity"
)
solar_route = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Colchester/July Finalised "
"Project/20250708 Colchester Borough Homes- Standardised.xlsx",
sheet_name="July 2025 Route - Solar"
)
# Merge on the comments
comments = comments_df[
["URPN", 'Unnamed: 6', 'SHDF Live', 'SHDF Removed', 'SHDF Reserve', '25-26 List (138 to EPC)']
].copy()
cavity_route = cavity_route.merge(
comments, left_on="landlord_property_id", right_on="URPN", how="left"
)
solar_route = solar_route.merge(
comments, left_on="landlord_property_id", right_on="URPN", how="left"
)
# Get properties that are not on either route
not_on_routes = comments_df[
~comments_df["URPN"].isin(cavity_route["landlord_property_id"]) &
~comments_df["URPN"].isin(solar_route["landlord_property_id"])
]
# Store
not_on_routes.to_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Colchester/July Finalised "
"Project/Properties not on routes.xlsx",
index=False
)
# Save the routes
cavity_route.to_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Colchester/July Finalised "
"Project/Cavity Route.xlsx",
index=False
)
solar_route.to_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Colchester/July Finalised "
"Project/Solar Route.xlsx",
index=False
)

View file

@ -0,0 +1,167 @@
import pandas as pd
def get_band(sap_score_number):
bands = [
("High_A", 96, float("inf")),
("Low_A", 92, 96),
("High_B", 86, 92),
("Low_B", 81, 86),
("High_C", 74.5, 81),
("Low_C", 69, 74.5),
("High_D", 61.5, 69),
("Low_D", 55, 61.5),
("High_E", 46.5, 55),
("Low_E", 39, 46.5),
("High_F", 29.5, 39),
("Low_F", 21, 29.5),
("High_G", 10.5, 21),
("Low_G", 1, 10.5),
]
for band, lower, upper in bands:
if lower <= sap_score_number < upper:
return band
return None
def classify_floor_area(floor_area):
if floor_area <= 72:
return "0-72"
if floor_area <= 97:
return "73-97"
if floor_area <= 199:
return "98-199"
return "200+"
asset_list = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Wates - Futures Housing/ECO 4 Wates - Standardised.xlsx",
sheet_name="Standardised Asset List"
)
asset_list["starting_sap_band"] = asset_list["epc_sap_score_on_register"].apply(get_band)
asset_list["floor_area_band"] = asset_list["epc_total_floor_area"].apply(classify_floor_area)
# Objective:
# We need to get a reasonable estimate for the cost of works for properties that are EPC D or below
#
# Therefore:
# 1) We know that some properties that are currently EPC C may* qualify for ECO4 funding. Right now, we aren't trying
# to determine which EPC C properties or above will qualify, just how much works will cost for properties that do
# qualify
# 2) We cannot survey everything, so before we undetake too much risk we should produce some costings for each of the
# archetypes
#
# Driving Factors:
# 1) Floor area band & starting SAP band - this will determine how much funding is produced
# 2) Heating system - this will determine if the property needs a heating upgrade or not
archetypes = asset_list[asset_list["epc_sap_score_on_register"] <= 68].groupby(
["floor_area_band", "starting_sap_band", "landlord_heating_system"]
)["landlord_property_id"].nunique().reset_index()
archetypes = archetypes.rename(columns={"landlord_property_id": "n_properties"})
archetypes = archetypes.sort_values("n_properties", ascending=False)
archetypes["running_total"] = archetypes["n_properties"].cumsum()
archetypes["cumulative_percentage"] = archetypes["running_total"] / archetypes["n_properties"].sum() * 100
archetypes["is_electric"] = archetypes["landlord_heating_system"] != "boiler - other fuel"
archetypes["needs_heating_upgrade"] = archetypes["landlord_heating_system"].isin(
["boiler - other fuel", "electric storage heaters"]
)
archetypes = archetypes.reset_index(drop=True)
# Right now, they don't want to treat the oil properties so we'll exclude them for the moment
electric_heated_archetypes = (
archetypes[archetypes["landlord_heating_system"] != "boiler - other fuel"].copy().reset_index(drop=True)
)
electric_heated_archetypes["running_total"] = electric_heated_archetypes["n_properties"].cumsum()
electric_heated_archetypes["cumulative_percentage"] = (
electric_heated_archetypes["running_total"] / electric_heated_archetypes["n_properties"].sum() * 100
)
# The main properties that need validation surveys are properties that require a heating upgrade
electric_heated_archetypes = electric_heated_archetypes[electric_heated_archetypes["needs_heating_upgrade"]]
electric_heated_archetypes = electric_heated_archetypes.merge(
archetypes[["starting_sap_band", "floor_area_band", "landlord_heating_system", "archetype_id"]],
how="left", on=["starting_sap_band", "floor_area_band", "landlord_heating_system"]
)
oil_archetypes = archetypes[
archetypes["landlord_heating_system"] == "boiler - other fuel"
].copy().reset_index(drop=True)
archetypes["archetype_id"] = archetypes.index
asset_list = asset_list.merge(
archetypes[["starting_sap_band", "floor_area_band", "landlord_heating_system", "archetype_id"]],
how="left", on=["starting_sap_band", "floor_area_band", "landlord_heating_system"]
)
properties_for_verification = asset_list[
asset_list["archetype_id"].isin(electric_heated_archetypes["archetype_id"].values)
].copy()
properties_for_verification["postal_region"] = properties_for_verification["domna_postcode"].str.split(" ").str[
0].str.strip()
properties_for_verification["epc_age"] = (
pd.Timestamp.now() - pd.to_datetime(properties_for_verification["epc_inspection_date"])
).dt.days
# We also survey 2 oil heater properties, so we take the 2 most prevelant archetypes
archetypes_for_survey = pd.concat(
[electric_heated_archetypes, oil_archetypes.head(2)]
)
# Take the property with the oldest EPC, by region. Prioritise estimated properties
sample = []
for _, config in archetypes_for_survey.iterrows():
properties = asset_list[
(asset_list["archetype_id"] == config["archetype_id"]) &
(asset_list["floor_area_band"] == config["floor_area_band"]) &
(asset_list["starting_sap_band"] == config["starting_sap_band"])
]
if pd.isnull(properties["epc_inspection_date"]).sum():
sample_property = properties[pd.isnull(properties["epc_inspection_date"])].head(1).to_dict("records")
else:
# Take the property with the oldest EPC
sample_property = properties.sort_values("epc_inspection_date", ascending=True).head(1).to_dict("records")
sample.extend(sample_property)
sample = pd.DataFrame(sample)
sample = sample[
[
"landlord_property_id", "epc_inspection_date", "epc_sap_score_on_register", "starting_sap_band",
"floor_area_band", "landlord_heating_system", "domna_postcode", "domna_full_address", "archetype_id"
]
]
archetypes = asset_list[["landlord_property_id", "archetype_id"]].copy()
archetypes["archetype_id"] = archetypes["archetype_id"].astype(str)
filename = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Wates - Futures Housing/archetypes.xlsx"
# Store the data in two tabs. One for the asset list with the EPC data and the second with the flat data
with pd.ExcelWriter(filename) as writer:
archetypes.to_excel(writer, sheet_name="Archetypes", index=False)
sample.to_excel(writer, sheet_name="Survey Sample", index=False)
# We store this
# Questions:
# 1) If futures are considering changing properties that have oil heating systems, we could include them and
# we have 39 total archetypes. Otherwise, we have 25 archetypes
# 2) Can futures provide us with any information on the model of air source heat pumps and associated controls they're
# using
# Recommendations:
# 1) If they are willing to upgrade the heating systems of the oil properties, surveying 18 properties will cover
#

View file

@ -0,0 +1,144 @@
import os
import pandas as pd
import numpy as np
from dotenv import load_dotenv
from etl.find_my_epc.AssetListEpcData import AssetListEpcData
from backend.Funding import Funding
from backend.app.utils import sap_to_epc
from recommendations.recommendation_utils import estimate_external_wall_area
load_dotenv(dotenv_path="backend/.env")
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
abs_matrix = pd.read_csv(
"/Users/khalimconn-kowlessar/Downloads/ECO4 Full Project Scores Matrix.csv"
)
pps_matrix = pd.read_excel(
"/Users/khalimconn-kowlessar/Downloads/ECO4 Partial Project Scores Matrix v5.xlsx",
header=1
)
pps_matrix.columns = [c.strip() for c in pps_matrix.columns]
asset_list = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/ACIS/Solid Wall Properties - Standardised_2.xlsx",
sheet_name="Standardised Asset List"
)
asset_list = asset_list.rename(
columns={"domna_address_1": "address", "domna_postcode": "postcode"}
)
asset_list["address"] = asset_list["address"].astype(str)
# Pull the find my EPC data and get the SAP points for solid wall
asset_list_epc_client = AssetListEpcData(
asset_list=asset_list,
epc_auth_token=EPC_AUTH_TOKEN
)
asset_list_epc_client.get_data()
asset_list_epc_client.get_non_invasive_recommendations()
# We pull out solid wall insulation
solid_wall_sap_points = []
for r in asset_list_epc_client.non_invasive_recommendations:
solid_recommendations = [
x for x in r["recommendations"] if ("internal_wall_insulation" in x["type"]) or (
"external_wall_insulation" in x["type"]
)
]
if solid_recommendations:
solid_recommendations = solid_recommendations[0]
else:
continue
address = r["address"]
postcode = r["postcode"]
solid_wall_sap_points.append(
{
"address": address,
"postcode": postcode,
"sap_points": solid_recommendations["sap_points"]
}
)
solid_wall_sap_points = pd.DataFrame(solid_wall_sap_points)
avg_points = solid_wall_sap_points["sap_points"].median()
asset_list = asset_list.merge(solid_wall_sap_points, how="left", on=["address", "postcode"])
asset_list["sap_points"] = asset_list["sap_points"].fillna(avg_points)
asset_list["post_works_sap"] = asset_list["epc_sap_score_on_register"] + asset_list["sap_points"]
asset_list["post_works_epc"] = asset_list["post_works_sap"].apply(lambda x: sap_to_epc(x))
asset_list["starting_half_band"] = asset_list["epc_sap_score_on_register"].apply(lambda x: Funding.get_sap_band(x))
asset_list["ending_half_band"] = asset_list["post_works_sap"].apply(lambda x: Funding.get_sap_band(x))
asset_list["floor_area_band"] = asset_list["epc_total_floor_area"].apply(lambda x: Funding.get_floor_area_band(x))
asset_list["funding_scheme"] = np.where(
(
(asset_list["post_works_epc"] == asset_list["epc_rating_on_register"])
),
"GBIS",
"ECO4"
)
# Merge on the ABS matrix
asset_list = asset_list.merge(
abs_matrix, how="left", left_on=["starting_half_band", "ending_half_band", "floor_area_band"],
right_on=['Starting Band', 'Finishing Band', 'Floor Area Segment', ]
)
asset_list = asset_list.drop(columns=['Starting Band', 'Finishing Band', 'Floor Area Segment'])
# store for backup
# asset_list.to_csv(
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/ACIS/Solid Wall Properties -
# Standardised_2_with_funding.csv",
# index=False
# )
# For GBIS, we use the PPS
# Almost all properties are gas
# Using IWI solid 1.7 -> 0.3 rates
pps_matrix = pps_matrix[
pps_matrix["Measure_Type"].isin(["IWI_solid_1.7_0.3"])
]
# Merge on
asset_list = asset_list.merge(
pps_matrix[['Starting Band', 'Total Floor Area Band', 'Cost Savings']].rename(
columns={
"Cost Savings": "partial_project_score",
"Starting Band": "starting_half_band",
"Total Floor Area Band": "floor_area_band"
}
),
how="left",
on=["starting_half_band", "floor_area_band"],
)
asset_list["partial_project_score"] = np.where(
asset_list["starting_half_band"].isin(["Low_C", "High_C"]),
None,
asset_list["partial_project_score"]
)
asset_list["funding_abs"] = np.where(
asset_list["funding_scheme"] == "GBIS",
asset_list["partial_project_score"],
asset_list["Cost Savings"]
)
asset_list["heat_loss_area"] = asset_list.apply(
lambda x: estimate_external_wall_area(
num_floors=x["attribute_est_number_floors"],
floor_height=(
float(x["epc_floor_height"]) if
not pd.isnull(x["epc_floor_height"]) else 2.5
),
perimeter=x["attribute_est_perimter"],
built_form=x["epc_archetype"]
),
axis=1
)
filename = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/ACIS/20250624 ACIS solid wall - standardised.xlsx"
with pd.ExcelWriter(filename) as writer:
asset_list.to_excel(writer, sheet_name="Standardised Asset List", index=False)

View file

@ -0,0 +1,49 @@
# Get units for postcodes WF17 8RA, WF17 8RB
import os
import pandas as pd
from epc_api.client import EpcClient
from dotenv import load_dotenv
load_dotenv(dotenv_path="backend/.env")
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
postcodes = [
"WF17 8RA",
"WF17 8RB",
]
client = EpcClient(auth_token=EPC_AUTH_TOKEN)
data = []
for postcode in postcodes:
resp = client.domestic.search(
params={"postcode": postcode, "address": None, "local-authority": None, "property-type": None,
"floor-area": None,
"energy-band": None, "from-month": None, "from-year": None, "to-month": None, "to-year": None,
'constituency': None},
size=1000
)
data.extend(resp["rows"])
df = pd.DataFrame(data)
# Get newest field by UPRN, inspection-date
df["inspection-date"] = pd.to_datetime(df["inspection-date"])
df = df.sort_values(by=["uprn", "inspection-date"], ascending=[True, False])
df = df.drop_duplicates(subset=["uprn"], keep="first")
df.to_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Blakeridge Mill/blakeridge_mill_epc_data.xlsx", index=False
)
df = df[df["address"] != "The Tower Blakeridge Mill, Upper Blakeridge Lane"]
df["walls-description"].value_counts()
df["roof-description"].value_counts()
df["total-floor-area"].astype(float).mean()
df["current-energy-efficiency"] = pd.to_numeric(df["current-energy-efficiency"], errors='coerce')
df.groupby("transaction-type")["current-energy-efficiency"].mean()
df["transaction-type"].value_counts()
df[df["transaction-type"] == "rental"]["built-form"].value_counts()

View file

@ -0,0 +1,289 @@
import pandas as pd
from tqdm import tqdm
from backend.SearchEpc import SearchEpc
import numpy as np
contact_list = pd.read_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot Upload/Bromford - Solar "
"PV address list - second wave KLD - PP.csv"
)
contact_list["house_no"] = contact_list.apply(lambda x: SearchEpc.get_house_number(
address=str(x["Address 1: Street 1"]).strip(),
postcode=str(x["Postal Code"]).strip(),
), axis=1)
asset_list = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot Upload/asset_list - "
"Standardised (1).xlsx",
sheet_name="Standardised Asset List"
)
lookup = []
missed = []
for _, x in tqdm(contact_list.iterrows(), total=len(contact_list)):
if x["Address 1: Street 1"] == '1 The Beck':
lookup.append(
{
"UPRN": x["UPRN"],
"landlord_property_id": 40692,
}
)
continue
if x["Address 1: Street 1"] == '3 The Beck ':
lookup.append(
{
"UPRN": x["UPRN"],
"landlord_property_id": 40693,
}
)
continue
if x["Address 1: Street 1"] == '2 Orchard Close ':
lookup.append(
{
"UPRN": x["UPRN"],
"landlord_property_id": 7924,
}
)
continue
if x["Address 1: Street 1"] == '2 Orchard Close ':
lookup.append(
{
"UPRN": x["UPRN"],
"landlord_property_id": 7924,
}
)
continue
if x["Address 1: Street 1"] == '3 Croxall Road':
lookup.append(
{
"UPRN": x["UPRN"],
"landlord_property_id": 40650,
}
)
continue
if x["Address 1: Street 1"] == '4 Ward Road ':
lookup.append(
{
"UPRN": x["UPRN"],
"landlord_property_id": 33175,
}
)
continue
df = asset_list[
asset_list["domna_full_address"].str.replace(",", "").str.contains(x["Address 1: Street 1"].strip()) &
asset_list["domna_postcode"].str.contains(x["Postal Code"].strip())
]
if df.shape[0] != 1:
df = asset_list[
asset_list["domna_full_address"].str.replace(",", "") == x["Address 1: Street 1"].strip() &
asset_list["domna_postcode"].str.contains(x["Postal Code"].strip())
]
if df.shape[0] != 1:
df = asset_list[
(asset_list["domna_address_1"].astype(str) == str(x["house_no"])) &
(asset_list["domna_postcode"].str.contains(x["Postal Code"].strip()) == True)
]
if df.shape[0] != 1:
missed.append(x["UPRN"])
continue
lookup.append(
{
"UPRN": x["UPRN"],
"landlord_property_id": df["landlord_property_id"].values[0],
}
)
lookup = pd.DataFrame(lookup)
contact_list = contact_list.merge(lookup, how="left", on="UPRN")
# Store
contact_list.to_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot Upload/Bromford - Solar "
"PV address list - second wave KLD - PP with landlord_property_id.csv",
index=False
)
# I manually completed the lookup for the missed ones. We now read it back in and pull in the properties for the
# stndardised asset list
contacts_complete = pd.read_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot Upload/Bromford - Solar "
"PV address list - second wave KLD - PP with landlord_property_id.csv"
)
new_data = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot Upload/Master Sheet "
"Solar PV installs.xlsx",
sheet_name="Sheet1"
)
contact_list = contact_list.merge(
new_data,
how="left",
left_on="UPRN",
right_on="CE UPRN"
)
route = asset_list[
asset_list["landlord_property_id"].isin(contact_list["Legacy UPRN"].astype("Int64").astype(str))
].copy()
# Add the new heating data
contact_list["Legacy UPRN"] = contact_list["Legacy UPRN"].astype("Int64").astype(str)
route2 = contact_list.merge(
route,
how="left",
right_on="landlord_property_id",
left_on="Legacy UPRN"
)
# Because I did a data pull, we can fill the other bits of information
missed = contact_list[~contact_list["Legacy UPRN"].isin(route["landlord_property_id"].astype(int))]
# Store both the route and missed
route2.to_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot Upload/route.csv",
index=False
)
# Add on phone number
contact_details_filepath = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme "
"Hubspot Upload/Hubspot/Bromford - Solar PV address list - second wave KLD - PP with "
"landlord_property_id.xlsx")
contacts_filenames = [
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot Upload/contact "
"details/FAO Paul Contact Details-Table 1.csv",
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot Upload/contact "
"details/Green Contact Details-Table 1.csv",
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot Upload/contact "
"details/Main Contact Details-Table 1.csv",
]
merge_to = pd.read_excel(contact_details_filepath)
lookup = []
for fn in contacts_filenames:
df = pd.read_csv(fn, encoding="utf-8-sig")
# Merge on phone
details = df[
df["Property Reference Number (Main Address) (Property)"].isin(merge_to["UPRN"].astype(str))
][[
"Property Reference Number (Main Address) (Property)", "Landline", "Mobile Phone", "Email Address",
"First Name", "Last Name"
]]
lookup.append(details)
lookup = pd.concat(lookup)
# Drop entries where landline, mobile and email are all NaN
lookup = lookup.dropna(subset=["Landline", "Mobile Phone", "Email Address"], how="all")
lookup = lookup.drop_duplicates(["Landline", "Mobile Phone", "Email Address"])
# Sort so email is first, then landline, then mobile
lookup = lookup.sort_values(
["Property Reference Number (Main Address) (Property)", "Email Address", "Landline", "Mobile Phone"],
ascending=[True, True, True, True]
)
# Store
lookup.to_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot Upload/Hubspot/contact "
"details.csv",
index=False
)
lookup2 = []
for _, x in lookup.groupby("Property Reference Number (Main Address) (Property)"):
# We any entries have an email, we take that
if x["Email Address"].notna().any():
x = x[x["Email Address"].notna()]
# We then take the entry with a phone number
if x["Landline"].notna().any() or x["Mobile Phone"].notna().any():
x = x[x["Landline"].notna() | x["Mobile Phone"].notna()]
# Take the first entry
x = x.iloc[0]
lookup2.append(x)
lookup2 = pd.DataFrame(lookup2)
import pandas as pd
# Sample structure based on your columns
columns = ['Property Reference Number (Main Address) (Property)', 'Landline', 'Mobile Phone', 'Email Address']
# Simulating example input DataFrame
# In practice, you would use: lookup = pd.read_csv(...) or similar
lookup = pd.DataFrame(columns=columns)
# Grouping and transforming
results = []
for prop_id, group in lookup.groupby("Property Reference Number (Main Address) (Property)"):
# Filter rows with any contact information
filtered = group[
group["Email Address"].notna() &
(group["Landline"].notna() | group["Mobile Phone"].notna())
]
if filtered.empty:
continue
# Sort by presence of phone numbers (prioritize those with both)
filtered["contact_score"] = (
filtered["Landline"].notna().astype(int) +
filtered["Mobile Phone"].notna().astype(int)
)
filtered = filtered.sort_values("contact_score", ascending=False)
primary = filtered.iloc[0]
# Make sure secondary is not the same as primary
if not pd.isnull(primary["Mobile Phone"]):
secondary = filtered[
(filtered["Mobile Phone"] != primary["Mobile Phone"])
]
elif not pd.isnull(primary["Landline"]):
secondary = filtered[
(filtered["Landline"] != primary["Landline"])
]
else:
raise Exception("Look at me")
secondary = filtered.iloc[1] if len(filtered) > 1 else None
results.append({
"Property ID": prop_id,
"Primary Email": primary["Email Address"],
"Primary Phone": primary["Mobile Phone"] or primary["Landline"],
"Secondary Email": secondary["Email Address"] if secondary is not None else None,
"Secondary Phone": secondary["Mobile Phone"] or secondary["Landline"] if secondary is not None else None,
})
final_df = pd.DataFrame(results)
import ace_tools as tools;
tools.display_dataframe_to_user(name="Cleaned Contact Lookup", dataframe=final_df)
# We set up primary and secondary phone numbers. We use mobile as the primary
# We have duplicates, we prioritise entries, by ID, that have a email
lookup2 = lookup.sort_values("Property Reference Number (Main Address) (Property)").drop_duplicates(
"Property Reference Number (Main Address) (Property)", keep="last"
)
# TODO: Get into the standardised asset list format
# TODO: Add the deal postcode to Hubspot
# TODO: Upload the deal postcode

View file

@ -0,0 +1,24 @@
import pandas as pd
from backend.ml_models.Valuation import PropertyValuation
from backend.app.utils import sap_to_epc
# Read in the survey data
surveys = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Cambridge/Survey Data.xlsx",
sheet_name="Survey data",
)
increases = []
for _, x in surveys.iterrows():
current_epc = sap_to_epc(x["Pre SAP"])
target_epc = sap_to_epc(x["Scenario 1 Post SAP"])
current_value = x["Valuation"]
val = PropertyValuation.estimate_valuation_improvement(
current_value,
current_epc,
target_epc,
total_cost=None
)
avg_increase = val["average_increase"]
increases.append(round(avg_increase))

View file

@ -0,0 +1,45 @@
import pandas as pd
houses_list = pd.read_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing/Ealing BC - HOUSES(UNCHECKED).csv"
)
features = pd.read_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing/Ealing BC - HOUSES(IGNORE - FULL MAIN).csv"
)
features = features.drop(
columns=[
'Archetype', 'Construction', 'Insulated', 'Material',
'CIGA Check Required', 'PV, ACCESS ISSUE, SEE NOTES',
'OFF GAS - ROOF ORIENTATION', 'Any further surveyor notes', 'Surveyors Name',
'Unnamed: 30', 'Unnamed: 31'
]
)
demolitions = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing/Ealing - Demolished or due to be.xlsx",
sheet_name="Demolished or due to be"
)
inspections_data = houses_list[
[
"Property ref", "Postcode", 'Archetype', 'Construction', 'Insulated', 'Material',
'CIGA Check Required', 'PV, ACCESS ISSUE, SEE NOTES',
'OFF GAS - ROOF ORIENTATION', 'Any further surveyor notes', 'YET TO BE SURVEYED'
]
].rename(columns={"YET TO BE SURVEYED": "Surveyors Name"})
asset_list = features.drop(
columns=[
'Archetype', 'Construction', 'Insulated', 'Material', 'CIGA Check Required',
'PV, ACCESS ISSUE, SEE NOTES', 'OFF GAS - ROOF ORIENTATION',
'Any further surveyor notes', 'Surveyors Name', "Postcode"
]
).merge(
inspections_data,
how="inner",
on="Property ref",
)
asset_list.to_csv("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing/Ealing_rechecked_cleaned_05042025.csv",
index=False)

View file

@ -0,0 +1,75 @@
import numpy as np
import pandas as pd
from asset_list.hubspot.config import HubspotProcessStatus
project_data = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing/Hubspot/Ealing Flats Completion Tracker JW "
"170625.xlsx",
sheet_name="All_Flats"
)
project_data["hubspot_status"] = None
project_data["hubspot_status"] = np.where(
(project_data["Status"] == "Submitted") & (project_data["PAS"] == "PAS2023"),
HubspotProcessStatus.SURVEYED_COMPLETED_SIGNED_OFF.label,
project_data["hubspot_status"]
)
project_data["hubspot_status"] = np.where(
(project_data["Status"] == "Submitted") & (project_data["PAS"] == "PAS2019"),
"SURVEYED UNDER 2019 - NEEDS RE-SURVEY",
project_data["hubspot_status"]
)
project_data["project_code"] = "EALING-FLATS-" + project_data["Block Ref"].astype(str)
asset_list = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing/Hubspot/20250707 Ealing Flats - Standardised.xlsx",
sheet_name="Standardised Asset List"
)
asset_list["landlord_property_id"] = asset_list["landlord_property_id"].astype(str)
asset_list["incorrect_landlord_property_id"] = asset_list["incorrect_landlord_property_id"].astype(str)
project_data["Property ref"] = project_data["Property ref"].astype(str)
# We need to update the status of properties that already been surveyed
asset_list2 = asset_list.merge(
project_data[["Property ref", "hubspot_status", "project_code"]],
how="left",
right_on="Property ref",
left_on="incorrect_landlord_property_id",
suffixes=("", "_project")
)
asset_list2["hubspot_status"] = np.where(
~pd.isna(asset_list2["hubspot_status_project"]),
asset_list2["hubspot_status_project"],
asset_list2["hubspot_status"]
)
asset_list2["project_code"] = np.where(
~pd.isna(asset_list2["project_code"]),
asset_list2["project_code"],
asset_list2["landlord_property_id"]
)
asset_list2 = asset_list2.drop(columns=["hubspot_status_project", "project_code_project"])
asset_list2["cavity_reason"] = np.where(
pd.isnull(asset_list2["cavity_reason"]),
"Non-Intrusive Data Shows Empty Cavity: SAP Rating 55-68",
asset_list2["cavity_reason"]
)
asset_list2["solar_reason"] = None
# Read in block analysis and geographical areas from standardised asset list
block_analysis_df = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing/Hubspot/20250707 Ealing Flats - Standardised.xlsx",
sheet_name="Block Analysis"
)
geographical_areas = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing/Hubspot/20250707 Ealing Flats - Standardised.xlsx",
sheet_name="Geographical Areas"
)
# Update the new standardised asset list
filename = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing/Hubspot/20250707 Ealing Flats - Prepared "
"programme.xlsx")
with pd.ExcelWriter(filename) as writer:
asset_list2.to_excel(writer, sheet_name="Standardised Asset List", index=False)
block_analysis_df.to_excel(writer, sheet_name="Block Analysis", index=False)
geographical_areas.to_excel(writer, sheet_name="Geographical Areas", index=False)

View file

@ -81,6 +81,7 @@ def app():
# We need to calculate the costs
cost_data = []
for _, row in epr_data.iterrows():
epc = row["EPC"][0]
sap = int(row["EPC"][1:])

View file

@ -0,0 +1,116 @@
#
import pandas as pd
asset_list = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/New programme/12052025 MHS Standardised Asset List - "
"programme.xlsx",
sheet_name="Standardised Asset List"
)
new_cavity_programme = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/New programme/12052025 MHS Standardised Asset List - "
"programme.xlsx",
sheet_name="New Cavity Programme"
)
new_cavity_pilot = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/New programme/12052025 MHS Standardised Asset List - "
"programme.xlsx",
sheet_name="Empty Cavity Pilot"
)
new_solar_programme = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/New programme/12052025 MHS Standardised Asset List - "
"programme.xlsx",
sheet_name="New Solar Programme"
)
in_fill_properties_houses = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/New programme/Domna updated programme list May 2025 ("
"1).xlsx",
sheet_name="Houses and Bungalows"
)
in_fill_properties_flats = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/New programme/Domna updated programme list May 2025 ("
"1).xlsx",
sheet_name="Flats and Maistonettes"
)
# Q1) What are these properties? Do we have them on our list already?
# All of the houses are already in the asset list
in_fill_properties_houses["is_in_asset_list"] = in_fill_properties_houses["UPRN"].isin(
asset_list["landlord_property_id"].values
)
# All of the flats are already in the asset list
in_fill_properties_flats["is_in_asset_list"] = in_fill_properties_flats["UPRN"].isin(
asset_list["landlord_property_id"].values
)
# Q2) Which properties are excluded from the new programme?
in_fill_properties = pd.concat(
[in_fill_properties_houses, in_fill_properties_flats], ignore_index=True, sort=False
)
# Merge on the data
in_fill_properties = in_fill_properties.merge(
asset_list,
left_on="UPRN",
right_on="landlord_property_id",
how="left"
)
# How many properties are in the new programme?
in_fill_properties["in_new_cavity_programme"] = in_fill_properties["UPRN"].isin(
new_cavity_programme["landlord_property_id"].values
)
in_fill_properties["in_new_solar_programme"] = in_fill_properties["UPRN"].isin(
new_solar_programme["landlord_property_id"].values
)
in_fill_properties["in_new_cavity_pilot"] = in_fill_properties["UPRN"].isin(
new_cavity_pilot["landlord_property_id"].values
)
not_in_new_programme = in_fill_properties[
(~in_fill_properties["in_new_cavity_programme"] & ~in_fill_properties["in_new_solar_programme"] & ~
in_fill_properties["in_new_cavity_pilot"])
].copy()
# Why?
not_in_new_programme["cavity_reason"].value_counts()
not_in_new_programme["solar_reason"].value_counts()
not_identified_for_anything = not_in_new_programme[
pd.isnull(not_in_new_programme["cavity_reason"]) &
pd.isnull(not_in_new_programme["solar_reason"])
]
# Flag the potential re-inspections which is 994 properties though any extractions we need to consider the HA funding
# the extraction
not_in_new_programme["funded_extractions"] = not_in_new_programme["cavity_reason"].isin(
[
"Non-Intrusive Data Shows Cavity Extraction: SAP Rating 69-75",
"Non-Intrusive Data Shows Cavity Extraction: SAP Rating 55-68",
"Non-Intrusive Data Shows Cavity Extraction: SAP Rating 76 or more",
"Non-Intrusive Data Shows Cavity Extraction: SAP Rating 54 or less",
"EPC Shows Empty Cavity, inspections show non-cavity build: SAP Rating 76 or more",
"EPC Shows Empty Cavity, inspections show non-cavity build: SAP Rating 54 or less",
"EPC Shows Empty Cavity, inspections show retro drilled: SAP Rating 54 or less",
"EPC Shows Empty Cavity, inspections show retro drilled: SAP Rating 76 or more",
]
)
not_in_new_programme["excluded"] = not_identified_for_anything["landlord_property_id"].isin(
not_identified_for_anything["landlord_property_id"].values
)
not_in_new_programme[
not_in_new_programme["funded_extractions"]
].to_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/New programme/funded_extractions.csv",
index=False
)
not_in_new_programme[
not_in_new_programme["excluded"] == True
].to_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/New programme/excluded_properties.csv",
index=False
)

View file

@ -0,0 +1,14 @@
import pandas as pd
cavity = pd.read_excel(
"/Users/khalimconn-kowlessar/Downloads/Energy Information MASTER June 2025 - Standardised.xlsx",
sheet_name="Cavity Properties (for review)",
)
solar = pd.read_excel(
"/Users/khalimconn-kowlessar/Downloads/Energy Information MASTER June 2025 - Standardised.xlsx",
sheet_name="Solar Properties",
)
cavity_al = cavity[["domna_address_1", "domna_postcode", "epc_os_uprn"]].rename(
columns={"domna_address_1": "address", "domna_postcode": "postcode", "epc_os_uprn": "uprn"}
)

View file

@ -0,0 +1,199 @@
"""
This script is to calculate the ABS for the Places for People London project
"""
import os
import pandas as pd
# London
pfp_london_cav = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/abs "
"rates/PFP_areas_surrounding_london_reviewed_standardised_15052025.xlsx",
sheet_name="Cav Route",
header=1
)
pfp_london_cav = pfp_london_cav.rename(columns={"Route": "Route March"})
pfp_london_pv = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/abs "
"rates/PFP_areas_surrounding_london_reviewed_standardised_15052025.xlsx",
sheet_name="PV Route",
header=1
)
pfp_london_pv = pfp_london_pv.rename(columns={"Route": "Route March"})
pfp_london_cav["location"] = "London"
pfp_london_pv["location"] = "London"
# East
pfp_east_cav = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/abs "
"rates/PFP_east_reviewed_standarised_15052025.xlsx",
sheet_name="Cav Route",
header=1
)
pfp_east_cav = pfp_east_cav.rename(columns={"Route": "Route March"})
pfp_east_pv = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/abs "
"rates/PFP_east_reviewed_standarised_15052025.xlsx",
sheet_name="PV Route",
header=1
)
pfp_east_pv = pfp_east_pv.rename(columns={"Route": "Route March"})
pfp_east_cav["location"] = "East"
pfp_east_pv["location"] = "East"
# North east
pfp_north_east_cav = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/abs "
"rates/PFP_north_east_reviewed_standardised_15052025.xlsx",
sheet_name="Cav Route",
header=1
)
pfp_north_east_cav = pfp_north_east_cav.rename(columns={"Route": "Route March"})
pfp_north_east_pv = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/abs "
"rates/PFP_north_east_reviewed_standardised_15052025.xlsx",
sheet_name="PV Route",
header=1
)
pfp_north_east_pv = pfp_north_east_pv.rename(columns={"Route": "Route March"})
pfp_north_east_cav["location"] = "North East"
pfp_north_east_pv["location"] = "North East"
# North West
pfp_north_west_cav = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/abs "
"rates/PFP_north_west_reviewed_standardised_15052025.xlsx",
sheet_name="Cav Route",
header=1
)
pfp_north_west_cav = pfp_north_west_cav.rename(columns={"Route": "Route March"})
pfp_north_west_pv = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/abs "
"rates/PFP_north_west_reviewed_standardised_15052025.xlsx",
sheet_name="PV Route",
header=1
)
pfp_north_west_pv = pfp_north_west_pv.rename(columns={"Route": "Route March"})
pfp_north_west_cav["location"] = "North West"
pfp_north_west_pv["location"] = "North West"
cav_route = pd.concat(
[
pfp_london_cav,
pfp_east_cav,
pfp_north_east_cav,
pfp_north_west_cav
]
)
solar_route = pd.concat(
[
pfp_london_pv,
pfp_east_pv,
pfp_north_east_pv,
pfp_north_west_pv
]
)
def get_band(sap_score_number):
bands = [
("High_A", 96, float("inf")),
("Low_A", 92, 96),
("High_B", 86, 92),
("Low_B", 81, 86),
("High_C", 74.5, 81),
("Low_C", 69, 74.5),
("High_D", 61.5, 69),
("Low_D", 55, 61.5),
("High_E", 46.5, 55),
("Low_E", 39, 46.5),
("High_F", 29.5, 39),
("Low_F", 21, 29.5),
("High_G", 10.5, 21),
("Low_G", 1, 10.5),
]
for band, lower, upper in bands:
if lower <= sap_score_number < upper:
return band
return None
def classify_floor_area(floor_area):
if floor_area <= 72:
return "0-72"
if floor_area <= 97:
return "73-97"
if floor_area <= 199:
return "98-199"
return "200+"
# We classify the abs bounds
solar_route["starting_abs_band"] = solar_route["epc_sap_score_on_register"].apply(get_band)
solar_route["ending_abs_band_scenario1"] = "High_C"
solar_route["ending_abs_band_scenario2"] = "Low_B"
solar_route["epc_total_floor_area"] = solar_route["epc_total_floor_area"].fillna(90)
solar_route["floor_area_band"] = solar_route["epc_total_floor_area"].apply(classify_floor_area)
# We classify the abs bounds
cav_route["epc_sap_score_on_register"] = cav_route["epc_sap_score_on_register"].fillna(68)
cav_route["starting_abs_band"] = cav_route["epc_sap_score_on_register"].apply(get_band)
cav_route["floor_area_band"] = cav_route["epc_total_floor_area"].apply(classify_floor_area)
cav_route["ending_abs_band"] = "Low_C"
abs_matrix = pd.read_csv(
"/Users/khalimconn-kowlessar/Downloads/ECO4 Full Project Scores Matrix.csv"
)
cav_route = cav_route.merge(
abs_matrix.rename(columns={"Cost Savings": "ABS Rate"}),
how="left",
left_on=["starting_abs_band", "ending_abs_band", "floor_area_band"],
right_on=["Starting Band", "Finishing Band", "Floor Area Segment"],
)
solar_route = solar_route.merge(
abs_matrix.rename(columns={"Cost Savings": "ABS Rate"}),
how="left",
left_on=["starting_abs_band", "ending_abs_band_scenario1", "floor_area_band"],
right_on=["Starting Band", "Finishing Band", "Floor Area Segment"],
)
cav_route["ABS Rate"] = cav_route["ABS Rate"].fillna(0)
solar_route["ABS Rate"] = solar_route["ABS Rate"].fillna(0)
cav_abs_agg = (
cav_route.groupby("Route March").agg(
{
"ABS Rate": "sum",
"landlord_property_id": "count",
}
).reset_index()
)
cav_abs_agg["Week Number"] = cav_abs_agg["Route March"].str.extract(r"(\d+)").astype(int)
cav_abs_agg = cav_abs_agg.sort_values("Week Number", ascending=True)
cav_abs_agg = cav_abs_agg.rename(columns={"landlord_property_id": "Number of Properties"})
solar_abs_agg = (
solar_route.groupby("Route March").agg(
{
"ABS Rate": "sum",
"landlord_property_id": "count",
}
).reset_index()
)
solar_abs_agg["Week Number"] = solar_abs_agg["Route March"].str.extract(r"(\d+)").astype(int)
solar_abs_agg = solar_abs_agg.rename(columns={"landlord_property_id": "Number of Properties"})
solar_abs_agg = solar_abs_agg.sort_values("Week Number", ascending=True)
# We store the data
# Store as an excel
filename = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/abs rates/pfp programme rates.xlsx"
# Store the data in two tabs. One for the asset list with the EPC data and the second with the flat data
with pd.ExcelWriter(filename) as writer:
solar_abs_agg.to_excel(writer, sheet_name="Solar ABS", index=False)
cav_abs_agg.to_excel(writer, sheet_name="Cav ABS", index=False)
cav_route.to_excel(writer, sheet_name="Cavity data", index=False)
solar_route.to_excel(writer, sheet_name="Solar data", index=False)

View file

@ -0,0 +1,48 @@
"""
July 2025, this script prepares the asset list for Plus Dane
"""
import pandas as pd
oldest_asset_list = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Plus Dane/New Programme July 2025/PLUS DANE Asset List.xlsx"
)
solar_asset_list = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Plus Dane/New Programme July 2025/Plus Dane - potential "
"PV List 04.03.2025.xlsx"
)
newest_asset_list = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Plus Dane/New Programme July 2025/Sava Intelligent Energy "
"- Property List - March 2025.xlsx"
)
old_missed = oldest_asset_list[~oldest_asset_list["UPRN"].isin(newest_asset_list["UPRN"])]
solar_missed = solar_asset_list[~solar_asset_list["UPRN"].isin(newest_asset_list["UPRN"])] # Empty
# Build new asset list
# NEWEST
# 'UPRN', 'Address', 'Postcode', 'Town', 'EPC SAP Band', 'SAP Rating',
# 'CO₂ Emissions', 'EPC EI Band', 'Data Quality Indicator',
# 'Results Calculated', 'Property Age', 'Property Type', 'Built Form',
# 'Wall Construction', 'Wall Insulation', 'Roof Construction',
# 'Joist Insulation', 'Space Heating System', 'Space Heating Fuel'
#
# SOlAR
df = newest_asset_list.merge(
solar_asset_list, how="left", on="UPRN", suffixes=("", "_solar"),
).merge(
oldest_asset_list, how="left", on="UPRN", suffixes=("", "_old")
)
df["asset_list_versiion"] = "July 2025"
old_missed["asset_list_versiion"] = "Historic"
# Append on the old missed?
df = pd.concat(
[df, old_missed], ignore_index=True, sort=False
)
# Store excel
df.to_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Plus Dane/New Programme July 2025/Plus Dane Asset List "
"July 2025.xlsx",
index=False,
)

View file

@ -4,7 +4,7 @@ from dotenv import load_dotenv
from utils.s3 import save_csv_to_s3
from etl.find_my_epc.AssetListEpcData import AssetListEpcData
PORTFOLIO_ID = 141
PORTFOLIO_ID = 212
USER_ID = 8
load_dotenv(dotenv_path="backend/.env")
@ -17,25 +17,15 @@ def app():
:return:
"""
asset_list = [
{
"address": "196 Merrow Street",
"postcode": "SE17 2NP",
"uprn": 200003423454,
"patch": True
},
{
"address": "65 Liverpool Grove",
"postcode": "SE17 2HP",
"uprn": 200003423194
},
{
"address": "2 Brettell Street",
"postcode": "SE17 2NZ",
"uprn": 200003423607
},
]
asset_list = pd.DataFrame(asset_list)
asset_list = pd.read_excel(
"/Users/khalimconn-kowlessar/Downloads/Energy Information MASTER June 2025 - Standardised.xlsx",
sheet_name="Solar Properties",
)
asset_list = asset_list[~asset_list["estimated"]]
asset_list["domna_address_1"] = asset_list["domna_address_1"].astype(str)
asset_list = asset_list[["domna_address_1", "domna_postcode", "epc_os_uprn"]].rename(
columns={"domna_address_1": "address", "domna_postcode": "postcode", "epc_os_uprn": "uprn"}
)
# Store the asset list in s3
filename = f"{USER_ID}/{PORTFOLIO_ID}/asset_list.csv"
@ -98,14 +88,15 @@ def app():
"portfolio_id": str(PORTFOLIO_ID),
"housing_type": "Private",
"goal": "Increasing EPC",
"goal_value": "C",
"goal_value": "A",
"trigger_file_path": filename,
"already_installed_file_path": "",
"patches_file_path": patches_filename,
"non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
"valuation_file_path": valuation_filename,
"valuation_file_path": "",
"scenario_name": "Full package remote assessment",
"multi_plan": True,
"budget": None,
"inclusions": ["cavity_wall_insulation", "ventilation"]
}
print(body)

View file

@ -0,0 +1,40 @@
"""
This script will pull in properties, in neighbouring areas, that have been flagged for CWI
"""
import pandas as pd
asset_list = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/July 2025 Inspections/Thrive Programme - "
"reconciled.xlsx",
sheet_name="Standardised Asset List"
)
cavity_areas = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/July 2025 Inspections/Thrive Programme - "
"reconciled.xlsx",
sheet_name="Cavity Areas"
)
existing_inspections_sheet = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/July 2025 Inspections/Thrive Programme - "
"reconciled.xlsx",
sheet_name="July 2025 Inspections"
)
empties = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/July 2025 Inspections/Thrive Programme - "
"reconciled.xlsx",
sheet_name="Cavity properties - for review"
)
cavity_inspections = asset_list[
asset_list["domna_postcode"].isin(cavity_areas["domna_postcode"].values)
]
cavity_inspections = cavity_inspections[
~cavity_inspections["landlord_property_id"].isin(empties["landlord_property_id"].values)
]
cavity_inspections.to_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/July 2025 Inspections/cavity_inspections.csv",
index=False
)

View file

@ -8,6 +8,8 @@ address the following concerns:
"""
import pandas as pd
from tqdm import tqdm
from backend.SearchEpc import SearchEpc
# This is Thrive's list of properties and when they should have been surveyed
thrive_tracker = pd.read_excel(
@ -51,27 +53,10 @@ original_columns = {
}
original_asset_list = original_asset_list[original_columns.keys()].rename(columns=original_columns)
original_asset_list["Data Source"] = "Thrive Tracker"
original_asset_list["Data Source"] = "Original Asset List"
original_asset_list = original_asset_list.drop_duplicates()
# We append on the missed properties, with the information we have
# 'Unnamed: 0', 'Thrive Notes', 'Priority', 'UPRN', 'Short Address', '#',
# 'Adress Line 1', 'Postcode', 'Property Type', 'Build Form',
# 'Build year', 'Assumed mm ', 'SAP', 'Name', 'Primary Number',
# 'Secondary Number', 'Email', 'Thrive use: Tenancy Number',
# 'Special Requirements ', 'CIGA', 'Date CIGA check received',
# 'Proposed Progamme', 'New Proposed Programme',
# 'Missing from Route March?', 'Date Letters Sent (w.c)', 'Work Type',
# 'Warmfront Survey Notes', 'Notes Reply (Thrive)', 'Loft Insulation',
# 'Cost for Vents', 'Cavity Depth', 'Cavity Condition',
# 'Date Submitted to installer', 'PRRN Number',
# 'Loft insulation required? (Thrive)', 'Date booked ',
# 'Completed\n(yes/no)', 'Date Completed',
# 'Vents installed?\n(number and location)',
# 'Loft Top Up\n(amount of insulation) ', 'CIGA Warranty Provided ',
# 'Notes', 'Works Number', 'CW KGI Uploaded', 'Keystone Fan Added',
# 'SA Cavity Condition Updated', 'SA Loft & Energy Updated',
# 'PRRN Submitted '
missed_properties["Full Address"] = (
missed_properties["#"].astype(str) + ", " +
missed_properties["Adress Line 1"].astype(str) + ", " +
@ -94,6 +79,19 @@ missed_properties["WFT Findings"] = "Property Not Inspected"
missed_properties["ECO Eligibility"] = "Property Not Inspected"
missed_properties["Data Source"] = "Thrive Tracker"
# We de-dupe ides in original_asset_list
dupe_ids = original_asset_list[original_asset_list["thrive_property_id"].duplicated()]["thrive_property_id"].unique()
dupes = original_asset_list[
original_asset_list["thrive_property_id"].isin(dupe_ids)
].copy()
dupes = dupes.sort_values("thrive_property_id")
original_asset_list = original_asset_list.rename(
columns={
"detailed_property_type": "build_form"
}
)
master_list = pd.concat([missed_properties, original_asset_list], ignore_index=True)
# We were provided with a data update for a sample of properties. We update the data with this information
@ -103,12 +101,339 @@ data_update = pd.read_excel(
header=0
)
new_properties = data_update[~data_update["UPRN"].isin(master_list["thrive_property_id"].astype(str).values)]
new_properties = data_update[~data_update["UPRN"].isin(master_list["thrive_property_id"].astype(str).values)].copy()
new_properties["Full Address"] = (
new_properties["#"].astype(str) + ", " +
new_properties["Adress Line 1"].astype(str) + ", " +
new_properties["Postcode"].astype(str)
)
new_properties = new_properties[missed_columns.keys()].rename(columns=missed_columns)
new_properties["WFT Findings"] = "Property Not Inspected"
new_properties["ECO Eligibility"] = "Property Not Inspected"
new_properties["Data Source"] = "13.05.2025 Data Update"
master_list = pd.concat([new_properties, master_list])
# We append any new data on heating system, heating type, and insulation type, based on the data update
master_list = master_list.merge(
data_update[["UPRN", "Heating Type", "Assumed mm ", "SAP"]].rename(
columns={
"Heating Type": "heating_type_updated",
"Assumed mm ": "assumed_loft_insulation_thickness_updated",
"SAP": "sap_rating_updated"
}
),
how="left",
left_on="thrive_property_id",
right_on="UPRN"
)
# We fill the missings
master_list["heating_type_updated"] = master_list["heating_type_updated"].fillna(master_list["heating_type"])
master_list["assumed_loft_insulation_thickness_updated"] = master_list[
"assumed_loft_insulation_thickness_updated"
].fillna(master_list["assumed_loft_insulation_thickness"])
master_list["sap_rating_updated"] = master_list["sap_rating_updated"].fillna(master_list["sap_rating"])
assert not master_list["thrive_property_id"].duplicated().sum(), "Duplicate thrive_property_id found in master_list"
master_list["Address in tracker"] = master_list["thrive_property_id"].astype(str).isin(
thrive_tracker["UPRN"].astype(str).values
)
# Those the asset list - call it master asset list updated May2025
master_list = master_list.drop(columns=["UPRN"])
master_list["thrive_property_id"] = master_list["thrive_property_id"].astype(str)
# master_list.to_excel(
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Asset List - "
# "Complete - Updated May 2025.xlsx",
# )
master_list["house_number_TEMP"] = master_list.apply(
lambda x: SearchEpc.get_house_number(address=x["full_address"], postcode=x["postcode"]),
axis=1
)
# We add in the status of the property
# TODO: Add the status of the property from the Thrive tracker
outcomes = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive - Outcomes - April "
"24-March25.xlsx",
header=0
)
outcomes["row_id"] = outcomes.index
# We have two ids which have the same phohe. nymber, but different UPRN, so we don't match to the tracker for these
tracker_for_matching = thrive_tracker[
~thrive_tracker["UPRN"].isin(['OAKGRE0065ABBLDW1', 'OAKGRE0066ABBLDW1', 'JACKET0102ABBLDW1', 'BELLCL0008BEDMDW1'])
].copy()
tracker_for_matching["Full Address"] = (
tracker_for_matching["#"].astype(str) + ", " +
tracker_for_matching["Adress Line 1"].astype(str) + ", " +
tracker_for_matching["Postcode"].astype(str)
)
outcomes_id_lookup = []
for _, x in tqdm(outcomes.iterrows(), total=len(outcomes)):
hn = str(x["No."])
address = x["Address"]
postcode = x["Postcode"]
contact_no = str(x["Contact No"]) if not pd.isnull(x["Contact No"]) else str(x["Contact No.1"])
contact_no = None if contact_no == "nan" else contact_no
if address == "292 Micklefield Road":
hn = "292"
if (address == "Micklefield Road") & (hn == "302"):
hn = "292"
if (address == "103a Norfolk Road Rickmansworth Hertfordshire WD3 1JY"):
hn = "103a"
if (address == "105a Norfolk Road Rickmansworth Hertfordshire WD3 1JY"):
hn = "105a"
if (address == "107a Norfolk Road Rickmansworth Hertfordshire WD3 1JY"):
hn = "107a"
#
# # We match this to the tracker
# m1 = tracker_for_matching[tracker_for_matching["Primary Number"].astype(str) == contact_no]
# # Many of the phone numbers don't have a leading zero in the tracker so we add them
# if (m1.shape[0] != 1) and not pd.isnull(contact_no):
# m1 = tracker_for_matching[tracker_for_matching["Primary Number"].astype(str) == contact_no.lstrip("0")]
#
# if m1.shape[0] > 1:
# raise ValueError(
# f"Error for {hn} - {address} - {postcode} - {contact_no} in the tracker"
# )
# if m1.empty:
m1 = tracker_for_matching[
(tracker_for_matching["#"].astype(str) == hn) &
(tracker_for_matching["Postcode"] == postcode)
]
if m1.empty:
# Some properties aren't in the tracker, we match to the master list
m1 = master_list[
(master_list["house_number_TEMP"].astype(str) == hn) &
(master_list["postcode"] == postcode)
]
outcomes_id_lookup.append(
{
"row_id": x["row_id"],
"thrive_property_id": m1["thrive_property_id"].values[0],
"address": m1["full_address"].values[0],
"postcode": m1["postcode"].values[0],
}
)
continue
if m1.shape[0] != 1:
raise ValueError(
f"Error for {hn} - {address} - {postcode} - {contact_no} in the tracker"
)
# We add the status to the master list
outcomes_id_lookup.append(
{
"row_id": x["row_id"],
"thrive_property_id": m1["UPRN"].values[0],
"address": m1["Full Address"].values[0],
"postcode": m1["Postcode"].values[0],
}
)
outcomes_id_lookup = pd.DataFrame(outcomes_id_lookup)
outcomes = outcomes.merge(
outcomes_id_lookup,
how="left",
left_on="row_id",
right_on="row_id"
)
outcomes = outcomes.drop(columns=["row_id"])
outcomes = outcomes.rename(
columns={
"Outcomes": "Outcome",
"Notes (If 'no "
"answer' under outcomes, have you checked around the property for access issues where possible?)": "Notes",
}
)
# Store the corrected outcomes
# outcomes.to_excel(
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive - Outcomes -
# April 24-March25 - Corrected.xlsx",
# index=False
# )
data_update = = data_update[["UPRN", ""]]
# TODO: Flag the Thrive priorities and create a separate project code for these
# TODO: Add the general project code
# TODO: Add the thrive
def parse_date(value):
# Strip any 'W.C' or 'w/c' prefix and clean whitespace
value = value.strip().lower().replace('w.c', '').replace('w/c', '').strip()
try:
# Try parsing the date with dayfirst=True
return pd.to_datetime(value, dayfirst=True, errors='coerce')
except Exception:
return pd.NaT
outcomes['Parsed Date'] = outcomes['Date letters sent'].apply(parse_date)
# Next step - match the submissions master to the asset list. We will append on the UPRN
eco3_submissions = pd.read_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Submissions "
"ECO3.csv",
header=0
)
eco3_submissions["row_id"] = eco3_submissions.index
eco4_submissions = pd.read_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Submissions "
"ECO4.csv",
header=0
)
eco4_submissions["row_id"] = eco4_submissions.index
# List of properties never on the asset list
not_on_master = [
"7+FOXGROVE PATH+WD19 6YL", "9+FOXGROVE PATH+WD19 6YL", "11+FOXGROVE PATH+WD19 6YL",
"20+LINCOLN DRIVE+WD19 7BA", "22+LINCOLN DRIVE+WD19 7BA", "24+LINCOLN DRIVE+WD19 7BA",
"26+LINCOLN DRIVE+WD19 7BA", "1+Ryman Court, Stag Lane+WD3 5HN", "6+Ryman Court, Stag Lane+WD3 5HN",
"9+Ryman Court, Stag Lane+WD3 5HN", "10+Ryman Court, Stag Lane+WD3 5HN", "11+Ryman Court, Stag Lane+WD3 5HN",
"12+Ryman Court, Stag Lane+WD3 5HN", "14+Ryman Court, Stag Lane+WD3 5HN", "15+Ryman Court, Stag Lane+WD3 5HN",
"20+Ryman Court, Stag Lane+WD3 5HN", "21+Ryman Court, Stag Lane+WD3 5HN", "22+Ryman Court, Stag Lane+WD3 5HN",
"25+Ryman Court, Stag Lane+WD3 5HN", "26+Ryman Court, Stag Lane+WD3 5HN", "31+Ryman Court, Stag Lane+WD3 5HN",
"33+Ryman Court, Stag Lane+WD3 5HN", "34+Ryman Court, Stag Lane+WD3 5HN",
'37+Ryman Court, Stag Lane+WD3 5HN', '38+Ryman Court, Stag Lane+WD3 5HN', '39+Ryman Court, Stag Lane+WD3 5HN',
'41+Ryman Court, Stag Lane+WD3 5HN', '43+Ryman Court, Stag Lane+WD3 5HN', '45+Ryman Court, Stag Lane+WD3 5HN',
'46+Ryman Court, Stag Lane+WD3 5HN', '48+Ryman Court, Stag Lane+WD3 5HN', '49+Ryman Court, Stag Lane+WD3 5HN',
'50+Ryman Court, Stag Lane+WD3 5HN', '52+Ryman Court, Stag Lane+WD3 5HN'
]
eco3_remap = {
"19+OAKHILL ROAD+WD5 8RE": ('19', 'OAKHILL ROAD', 'WD3 9RE'),
"29+OAKHILL ROAD+WD5 8RE": ('29', 'OAKHILL ROAD', 'WD3 9RE'),
"31+OAKHILL ROAD+WD5 8RE": ('31', 'OAKHILL ROAD', 'WD3 9RE'),
"44+OAKHILL ROAD+WD5 8RE": ('44', 'OAKHILL ROAD', 'WD3 9RF'),
"64+OAKHILL ROAD+WD4 8RF": ('64', 'OAKHILL ROAD', 'WD3 9RF'),
"11+LANCASTER WAY+WD3 PRE": ('11', 'LANCASTER WAY', 'WD5 0PQ'),
"16+LANCASTER WAY+WD3 PRE": ('16', 'LANCASTER WAY', 'WD5 0PQ'),
"58+TALBOT ROAD +WD31HE": ('58', 'TALBOT ROAD', 'WD3 1HE'),
"10+PEARTREE COURT/WELWYN GARDEN CITY+AL73XN": ('10', 'PEARTREE COURT/WELWYN GARDEN CITY', 'AL7 3XN'),
"25+GOBLINS GREEN/WELWYN GARDEN CITY+AL73ST": ('25', 'GOBLINS GREEN/WELWYN GARDEN CITY', 'AL7 3ST'),
"32+GOBLINS GREEN/WELWYN GARDEN CITY+AL73ST": ('32', 'GOBLINS GREEN/WELWYN GARDEN CITY', 'AL7 3ST'),
"94+BAKER ST/POTTERS BAR+EN62EP": ('94', 'BAKER ST/POTTERS BAR', 'EN6 2EP'),
'33+Tudor Way+WD3JA': ('33', 'Tudor Way', 'WD3 8JA'),
'120+Hazlewood lane +WD5 0HF': ('120', 'Hazlewood lane', 'WD5 0HE'),
'35+Rosehill gardens +WD5 0HE': ('35', 'Rosehill gardens', 'WD5 0HF'),
'18+Rosehill gardens +WD5 0HE': ('18', 'Rosehill gardens', 'WD5 0HF'),
'34+Rosehill gardens +WD5 0HE': ('34', 'Rosehill gardens', 'WD5 0HF'),
'58+Rosehill gardens +WD5 0HE': ('58', 'Rosehill gardens', 'WD5 0HF'),
'48+Rosehill gardens +WD5 0HE': ('48', 'Rosehill gardens', 'WD5 0HF'),
'45+Rosehill gardens +WD5 0HE': ('45', 'Rosehill gardens', 'WD5 0HF'),
'6+Rosehill gardens +WD5 0HE': ('6', 'Rosehill gardens', 'WD5 0HF'),
'2+Rosehill gardens +WD5 0HE': ('2', 'Rosehill gardens', 'WD5 0HF'),
'29+Rosehill gardens +WD5 0HE': ('29', 'Rosehill gardens', 'WD5 0HF'),
'61+GOLDEN DELL+AL8 4EE': ('61', 'GOLDEN DELL', 'AL7 4EE'),
'2O+EDINBURGH AVENUE+WD3 8LB': ('20', 'EDINBURGH AVENUE', 'WD3 8LB'),
}
eco3_lookup = []
for _, row in tqdm(eco3_submissions.iterrows(), total=len(eco3_submissions)):
hn = row["NO "]
pc = row["Post Code"]
street = row["Street / Block Name"]
key = f"{hn}+{street}+{pc}"
if key in not_on_master:
continue
if key in eco3_remap:
hn, street, pc = eco3_remap[key]
# The postcode is different to the asse
# We filter the asset list, because it's hard to know how accurate this is
m1 = master_list[
(master_list["house_number_TEMP"].astype(str) == hn) &
(master_list["postcode"] == pc)
]
if m1.shape[0] != 1:
raise ValueError(
f"Error for {key} in the tracker"
)
eco3_lookup.append(
{
"row_id": row["row_id"],
"thrive_property_id": m1["thrive_property_id"].values[0],
"submission_house_number": row["NO "],
"submission_address1": row["Street / Block Name"],
"submission_postcode": row["Post Code"],
}
)
eco4_lookup = []
for _, row in tqdm(eco4_submissions.iterrows(), total=len(eco4_submissions)):
hn = row["NO."]
pc = row["Post Code"]
street = row["Street / Block Name"]
key = f"{hn}+{street}+{pc}"
if key in not_on_master:
continue
if key in eco3_remap:
hn, street, pc = eco3_remap[key]
# The postcode is different to the asse
# We filter the asset list, because it's hard to know how accurate this is
m1 = master_list[
(master_list["house_number_TEMP"].astype(str) == hn) &
(master_list["postcode"].str.lower() == pc.lower())
]
if m1.shape[0] != 1:
raise ValueError(
f"Error for {key} in the tracker"
)
eco4_lookup.append(
{
"row_id": row["row_id"],
"thrive_property_id": m1["thrive_property_id"].values[0],
"submission_house_number": row["NO."],
"submission_address1": row["Street / Block Name"],
"submission_postcode": row["Post Code"],
}
)
# We match the lookups back to the submission sheets
eco3_lookup = pd.DataFrame(eco3_lookup)
eco3_submissions = eco3_submissions.merge(
eco3_lookup,
how="left",
on="row_id",
)
eco4_lookup = pd.DataFrame(eco4_lookup)
eco4_submissions = eco4_submissions.merge(
eco4_lookup,
how="left",
on="row_id",
)
# Store
eco3_submissions.to_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Submissions "
"ECO3 - with IDS.csv",
index=False
)
eco4_submissions.to_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Submissions "
"ECO4 - with IDS.csv",
index=False
)

View file

@ -0,0 +1,130 @@
"""
THis script will take the standardised asset list and append on the project codes.
We also, review the existing install status, in case anything is wrong
"""
import pandas as pd
import numpy as np
standardised_asset_list = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Asset List - "
"Complete - Updated May 2025 - Standardised.xlsx",
sheet_name="Standardised Asset List",
)
project_code_allocations = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Master Tracker (Thrive - "
"Warmfront).xlsx",
sheet_name="Master Tracker",
header=1
)
programme_codes = project_code_allocations[
["UPRN", "Proposed Progamme", "New Proposed Programme", "Warmfront Survey Notes", ]
].copy()
programme_codes["programme_reference"] = programme_codes["New Proposed Programme"].copy()
programme_codes["programme_reference"] = np.where(
pd.isnull(programme_codes["programme_reference"]),
programme_codes["Proposed Progamme"],
programme_codes["programme_reference"]
)
PROJECT_CODE_MAP = {
'Phase 2': "THRIVE-002",
'Phase 3': "THRIVE-003",
'Phase 4': "THRIVE-004",
'Phase 5': "THRIVE-005",
'Phase 6': "THRIVE-006",
'Phase 7': "THRIVE-007",
'Phase 8': "THRIVE-008",
'Phase 9': "THRIVE-009",
'Phase 10': "THRIVE-010",
"Week 1": "THRIVE-WEEK-001",
"Week 2": "THRIVE-WEEK-002",
"Week 4": "THRIVE-WEEK-004",
"Week 7": "THRIVE-WEEK-007",
}
programme_codes["project_code"] = programme_codes["programme_reference"].map(PROJECT_CODE_MAP)
thrive_notes = project_code_allocations[["UPRN", "Thrive Notes", "Priority", "Notes Reply (Thrive)"]].copy()
standardised_asset_list = standardised_asset_list.merge(
programme_codes[["UPRN", "project_code", "programme_reference"]],
how="left",
left_on="landlord_property_id",
right_on="UPRN",
).merge(
thrive_notes[["UPRN", "Thrive Notes", "Priority", "Notes Reply (Thrive)"]],
how="left",
on="UPRN",
)
standardised_asset_list = standardised_asset_list.drop(columns=["UPRN"])
# We fill the project code for historical completions
standardised_asset_list["project_code"] = np.where(
pd.isnull(standardised_asset_list["project_code"]) & (
standardised_asset_list["hubspot_status"] != "READY TO BE SCHEDULED"
) & (
~pd.isnull(standardised_asset_list["hubspot_status"])
),
"THRIVE-HISTORICAL",
standardised_asset_list["project_code"]
)
# Store as an excel
filename = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Programme - "
"reconciled.xlsx")
# Store the data in two tabs. One for the asset list with the EPC data and the second with the flat data
# Other tabs:
block_analysis = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Asset List - "
"Complete - Updated May 2025 - Standardised.xlsx",
sheet_name="Block Analysis",
)
outcomes = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Asset List - "
"Complete - Updated May 2025 - Standardised.xlsx",
sheet_name="Outcomes",
)
unmatched_submissions = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Asset List - "
"Complete - Updated May 2025 - Standardised.xlsx",
sheet_name="Unmatched Submissions",
)
unmatched_ecosurv = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Asset List - "
"Complete - Updated May 2025 - Standardised.xlsx",
sheet_name="Unmatched Ecosurv",
)
with pd.ExcelWriter(filename) as writer:
standardised_asset_list.to_excel(writer, sheet_name="Standardised Asset List", index=False)
block_analysis.to_excel(writer, sheet_name="Block Analysis", index=False)
# If we have outcomes, we add a tab with the outcomes
outcomes.to_excel(writer, sheet_name="Outcomes", index=False)
unmatched_submissions.to_excel(writer, sheet_name="Unmatched Submissions", index=False)
unmatched_ecosurv.to_excel(writer, sheet_name="Unmatched Ecosurv", index=False)
# A check, just comparing against the master tracker to make sure I have all of the installs
asset_list = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Asset List - "
"Complete - Updated May 2025 - Standardised.xlsx",
sheet_name="Standardised Asset List",
)
master_tracker = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Master Tracker (Thrive - "
"Warmfront).xlsx",
sheet_name="Master Tracker",
header=1
)
df = asset_list[["landlord_property_id", "hubspot_status"]].merge(
master_tracker[~pd.isnull(master_tracker['Date Completed'])][["UPRN", "Date Completed"]],
how="inner",
left_on="landlord_property_id",
right_on="UPRN"
)
df["hubspot_status"].value_counts()
df[df["hubspot_status"] == "SUBMITTED TO INSTALLER"]

View file

@ -1,3 +1,4 @@
import random
import time
import pandas as pd
from tqdm import tqdm
@ -27,6 +28,7 @@ class AssetListEpcData:
self.extracted_data = None
self.non_invasive_recommendations = None
self.patches = None
self.epc_data = None
@staticmethod
def check_asset_list(asset_list):
@ -49,7 +51,7 @@ class AssetListEpcData:
"uprn": r.get("uprn"),
"address": r["address"],
"postcode": r["postcode"],
"recommendations": r["recommendations"]
"recommendations": r.get("recommendations")
} for r in self.extracted_data
]
@ -74,7 +76,9 @@ class AssetListEpcData:
# Pull the additional data
extracted_data = []
epc_data = []
for _, home in tqdm(self.asset_list.iterrows(), total=len(self.asset_list)):
add1 = home["address"]
pc = home["postcode"]
# Retrieve the EPC data
@ -92,9 +96,6 @@ class AssetListEpcData:
if epc_searcher.newest_epc is None:
continue
if not pd.isnull(home.get("patch")):
epc_searcher.newest_epc["address1"] = add1
# Attempt both methods:
try:
find_epc_searcher = RetrieveFindMyEpc(
@ -104,16 +105,37 @@ class AssetListEpcData:
find_epc_data = find_epc_searcher.retrieve_newest_find_my_epc_data()
except Exception as e:
logger.error(f"Error retrieving find my epc data: {e}")
find_epc_searcher = RetrieveFindMyEpc(
address=epc_searcher.newest_epc["address1"],
postcode=epc_searcher.newest_epc["postcode"]
)
find_epc_data = find_epc_searcher.retrieve_newest_find_my_epc_data()
time.sleep(0.5)
if not pd.isnull(home.get("patch")):
epc_searcher.newest_epc["address1"] = add1
try:
find_epc_searcher = RetrieveFindMyEpc(
address=epc_searcher.newest_epc["address1"],
postcode=epc_searcher.newest_epc["postcode"]
)
find_epc_data = find_epc_searcher.retrieve_newest_find_my_epc_data()
except Exception as e:
logger.error("Error retrieving find my epc data with alternative address format: {e}")
find_epc_data = {
"current_epc_rating": epc_searcher.newest_epc["current-energy-rating"],
"current_epc_efficiency": epc_searcher.newest_epc["current-energy-efficiency"],
"potential_epc_rating": None,
"potential_epc_efficiency": None,
"epc_data": {}
}
# Sleep for a random amount of time between 0.5 and 1 seconds to avoid hitting the API rate limit
time.sleep(random.sample(range(50, 100), 1)[0] / 100)
# Every 50 requests, we sleep for 10 seconds to avoid hitting the API rate limit
if len(extracted_data) % 50 == 0 and len(extracted_data) > 0:
logger.info("Sleeping for 10 seconds to avoid hitting API rate limit")
time.sleep(10)
# We need uprn
to_append = {
"uprn": home.get("uprn"),
"uprn": home.get("uprn", epc_searcher.newest_epc["uprn"]),
"address": home["address"],
"postcode": home["postcode"],
**find_epc_data,
@ -128,6 +150,8 @@ class AssetListEpcData:
}
extracted_data.append(to_append)
epc_data.append(epc_searcher.newest_epc)
self.extracted_data = extracted_data
self.epc_data = epc_data
logger.info("Data Extrction complete")

View file

@ -1,3 +1,4 @@
import time
import re
import pandas as pd
import requests
@ -55,9 +56,11 @@ class RetrieveFindMyEpc:
results = {}
# 1. Total floor area
results['total-floor-area'] = int(self.get_text(
# We have some isntances of very old EPCs where the total floor area is not available
tfa = self.get_text(
soup.find("dt", string="Total floor area").find_next_sibling("dd")
).split(" ")[0])
).split(" ")[0]
results['total-floor-area'] = int(tfa) if tfa != "Not" else None
# Table with features
rows = soup.select("table.govuk-table tbody tr")
@ -125,9 +128,156 @@ class RetrieveFindMyEpc:
return results
def retrieve_newest_find_my_epc_data(self, sap_2012_date=None):
def _extract_epc_from_soup(self, soup, epc_certificate, sap_2012_date=None):
ratings = soup.find('desc', {'id': 'svg-desc'}).text
current_rating = ratings.split(".")[0]
potential_rating = ratings.split(".")[1]
current_sap = int(current_rating.split(' ')[-1])
# Retrieve the energy consumption
bills = soup.find('div', {'id': 'bills-affected'})
bills_list = bills.find_all('li')
if not bills_list:
# If this is the case, it's usually becaue the EPC was very old. Early EPCs did not have this information
heating_text = None
hot_water_text = None
else:
heating_text = bills_list[0].text
hot_water_text = bills_list[1].text
# Retrieve the recommendations and SAP points
recommendations = []
recommendations_div = soup.find('div', class_='epb-recommended-improvements')
if recommendations_div:
# Find all h3 headers for each step and extract their related information
step_headers = recommendations_div.find_all('h3', class_='govuk-heading-m')
previous_sap_score = current_sap
previous_epc = current_rating.split(' ')[-6]
for step_num, step_header in enumerate(step_headers, start=1):
# Extract the step title (the measure)
measure_title = step_header.text.strip().replace(f"Step {step_num}: ", "")
# Find the div containing the potential rating within the same section
potential_rating_div = step_header.find_next(
'div', class_='epb-recommended-improvements__potential-rating'
)
# Check if the potential rating div is found
if potential_rating_div:
# Extract the rating text within the SVG text element
extracted_rating_text = potential_rating_div.find('text', class_='govuk-!-font-weight-bold')
if extracted_rating_text is not None:
rating_text = extracted_rating_text.text.strip()
else:
rating_text = " ".join([str(previous_sap_score), previous_epc])
# Parse the rating text to separate the numeric rating and EPC letter
new_rating = int(rating_text.split()[0])
new_epc = rating_text.split()[1]
# Append the information as a dictionary to the recommendations list
recommendations.append({
"step": step_num,
"measure": measure_title,
"new_rating": new_rating,
"new_epc": new_epc,
"sap_points": new_rating - previous_sap_score
})
previous_sap_score = new_rating
previous_epc = new_epc
# Search for the assessment informaton
assessment_information = soup.find('div', {'id': 'information'})
# Parse this information
rows = assessment_information.find_all('div', class_='govuk-summary-list__row')
# Create a dictionary to hold the parsed information
assessment_data = {}
for row in rows:
key = row.find('dt').text.strip()
if key == "Type of assessment":
# We dont reliably extract this
continue
value_tag = row.find('dd')
# Check if value contains a link (email)
if value_tag.find('a'):
value = value_tag.find('a').text.strip()
elif value_tag.find('summary'):
value = value_tag.find('span').text.strip()
else:
value = value_tag.text.strip()
# These are keys that we have for both the surveyor and the acreditation scheme. Firstly, we'll
# get the surveyor's name and email so we make that information clear
if key in ["Telephone", "Email"]:
if "Assessor's " + key not in assessment_data:
assessment_data["Assessor's " + key] = value
else:
assessment_data["Accreditation Scheme's " + key] = value
continue
assessment_data[key] = value
expected_keys = [
'Assessors name',
"Assessor's Telephone",
"Assessor's Email",
'Assessors ID',
'Accreditation scheme',
'Assessors declaration',
"Accreditation Scheme's Telephone",
"Accreditation Scheme's Email",
'Date of assessment',
'Date of certificate'
]
# Check we have all the expected keys
for key in expected_keys:
if key not in assessment_data:
raise ValueError(f"Missing key: {key}")
# The wall types of the property
property_features_table = soup.find("tbody", class_="govuk-table__body")
property_features_table = property_features_table.find_all("tr")
# Extract wall types
self.walls = []
for row in property_features_table:
cells = row.find_all("td")
if row.find("th").text.strip() == "Wall":
self.walls.append(cells[0].text.strip())
# Finally, we format the recommendations
recommendations = self.format_recommendations(recommendations, assessment_data, sap_2012_date)
# 4) Low and zero carbon energy sources
low_carbon_energy_sources = self.extract_low_carbon_sources(soup)
# 5) Pull out the EPC data
epc_data = self.extract_epc_data(soup)
resulting_data = {
'epc_certificate': epc_certificate,
'current_epc_rating': current_rating.split(' ')[-6],
'current_epc_efficiency': current_sap,
'potential_epc_rating': potential_rating.split(' ')[-6],
"potential_epc_efficiency": int(potential_rating.split(' ')[-1]),
"heating_text": heating_text,
"hot_water_text": hot_water_text,
"recommendations": recommendations,
"epc_data": epc_data,
**assessment_data,
**low_carbon_energy_sources,
}
return resulting_data
def retrieve_all_find_my_epc_data(self, sap_2012_date=None):
"""
For a post code and address, we pull out all the required data from the find my epc website
This is a quick function to retrieve all the data from the find my epc website for a given postcode and address.
Using this to fulfill a short term need to retrieve all history for a property
:param sap_2012_date:
:return:
"""
postcode_input = self.postcode.replace(" ", "+")
@ -182,6 +332,98 @@ class RetrieveFindMyEpc:
address_response = requests.get(chosen_epc, headers=self.HEADERS)
address_res = BeautifulSoup(address_response.text, features="html.parser")
# We check the section on "Other cerificates for this property and get the url"
# Find the section for other certificates
other_cert_section = address_res.find('div', id='other_certificates_and_reports')
# Extract all certificate number rows (anchor tags within a govuk-summary-list)
other_cert_links = other_cert_section.select('dd.govuk-summary-list__value a')
other_certificates = []
for link in other_cert_links:
cert_number = link.text.strip()
cert_url = link['href'].strip()
other_certificates.append({
"certificate_number": cert_number,
"certificate_url": f"https://find-energy-certificate.service.gov.uk{cert_url}"
})
# Always include the currently selected EPC first
soup_list = [address_res]
# Add additional historic certificates
for link in other_cert_links:
cert_url = f"https://find-energy-certificate.service.gov.uk{link['href'].strip()}"
response = requests.get(cert_url, headers=self.HEADERS)
time.sleep(0.3)
soup_list.append(BeautifulSoup(response.text, features="html.parser"))
all_find_my_epc_data = []
for soup in soup_list:
# Start with the primary one
all_find_my_epc_data.append(self._extract_epc_from_soup(soup, epc_certificate, sap_2012_date))
return all_find_my_epc_data
def retrieve_newest_find_my_epc_data(self, sap_2012_date=None):
"""
For a post code and address, we pull out all the required data from the find my epc website
"""
postcode_input = self.postcode.replace(" ", "+")
postcode_search = self.SEARCH_POSTCODE_URL.format(postcode_input=postcode_input)
postcode_response = requests.get(postcode_search, headers=self.HEADERS)
postcode_res = BeautifulSoup(postcode_response.text, features="html.parser")
rows = postcode_res.find_all('tr', class_='govuk-table__row')
extracted_table = []
for row in rows:
# Extract the address and URL
address_tag = row.find('a', class_='govuk-link')
if address_tag is None:
continue
extracted_address = None
extracted_address_url = None
if address_tag:
extracted_address = address_tag.text.strip()
extracted_address_url = address_tag['href']
extracted_address_cleaned = (
extracted_address.replace(",", "").replace(" ", "").lower()
)
if not extracted_address_cleaned.startswith(self.address_cleaned):
continue
# If the address is a match, we can extract the data
# Extract the expiry date
expiry_date_tag = row.find('td', class_='govuk-table__cell date')
expiry_date = None
if expiry_date_tag is not None:
expiry_date = expiry_date_tag.parent.find('span').text.strip()
extracted_table.append(
{
"extracted_address": extracted_address,
"extracted_address_url": extracted_address_url,
"expiry_date": datetime.strptime(expiry_date, '%d %B %Y'),
}
)
if not extracted_table:
raise ValueError("No EPC found")
if len(extracted_table) > 1:
# We take the one with the most recent expiry date
extracted_table = sorted(extracted_table, key=lambda x: x['expiry_date'], reverse=True)
chosen_epc = self.BASE_ENERGY_URL + extracted_table[0]['extracted_address_url']
epc_certificate = chosen_epc.split('/')[-1]
address_response = requests.get(chosen_epc, headers=self.HEADERS)
address_res = BeautifulSoup(address_response.text, features="html.parser")
# Key data we want to retrieve:
# 1) Rating
# 2) Bills estimates
@ -195,9 +437,6 @@ class RetrieveFindMyEpc:
potential_rating = ratings.split(".")[1]
current_sap = int(current_rating.split(' ')[-1])
# Floor area
address_res.find()
# Retrieve the energy consumption
bills = address_res.find('div', {'id': 'bills-affected'})
bills_list = bills.find_all('li')
@ -432,6 +671,13 @@ class RetrieveFindMyEpc:
"Condensing boiler (separate from the range cooker)": ["boiler_upgrade"],
"Heating controls (programmer and thermostatic radiator valves)": [
"roomstat_programmer_trvs", "time_temperature_zone_control"
],
'Heating controls (programmer room thermostat and thermostatic radiator valves)': [
"roomstat_programmer_trvs", "time_temperature_zone_control"
],
"Internal wall insulation": ["internal_wall_insulation"],
"High heat retention storage heaters and dual immersion cylinder and dual rate meter": [
"high_heat_retention_storage_heater"
]
}
@ -466,8 +712,13 @@ class RetrieveFindMyEpc:
find_epc_data = searcher.retrieve_newest_find_my_epc_data()
except Exception as e:
logger.error(f"Error retrieving find my epc data: {e}")
if epc["address1"] == epc["address"]:
# There's no benefit of using the same address, so we split on comma
address1 = epc["address"].split(",")[0]
else:
address1 = epc["address1"]
# We attempt with the backup add
searcher = cls(address=epc["address1"], postcode=epc["postcode"])
searcher = cls(address=address1, postcode=epc["postcode"])
find_epc_data = searcher.retrieve_newest_find_my_epc_data()
non_invasive_recommendations = {

View file

@ -194,7 +194,7 @@ class Costs:
IWI_CONTINGENCY = 0.2
# For air source heat pumps, we inflate the assume cost by quite a bit to account for design and installation
ASHP_CONTINGENCY = 0.35
ASHP_CONTINGENCY = 0.25
# Where there is more uncertainty, a higher contingency rate is used
HIGH_RISK_CONTINGENCY = 0.2
# When there is less uncertainty, a lower contingency rate is used
@ -871,10 +871,10 @@ class Costs:
if needs_cylinder:
# 1000 is the cost of a new hot water cylinder
total_cost = 1200 * number_heated_rooms + 1000
total_cost = 1300 * number_heated_rooms + 1000
else:
# 500 is the cost of a dual immersion heater - a rough estimate
total_cost = 1200 * number_heated_rooms + 500
total_cost = 1300 * number_heated_rooms + 500
subtotal_before_vat = total_cost / (1 + self.VAT_RATE)
vat = total_cost - subtotal_before_vat

View file

@ -57,6 +57,31 @@ class HeatingRecommender:
},
# These are the heating types we need to produce a dual heating recommendation
"dual": None
},
'Electric underfloor heating, electric storage heaters': {
# For this, we would recommend a heat pump
"dual": None
},
"Room heaters, electric, boiler and radiators, mains gas": {
"hhr": {
"mainheating_description": "Electric storage heaters, radiators",
"recommendation_description": "Install high heat retention electric storage heaters.",
"controls_prefix": ""
},
"boiler": {
"mainheating_description": "Boiler and radiators, mains gas",
"recommendation_description": "Upgrade to a new condensing boiler.",
"controls_suffix": ""
},
"dual": None
},
"Room heaters, electric, electric storage heaters": {
"hhr": {
"mainheating_description": "Electric storage heaters, radiators",
"recommendation_description": "Install high heat retention electric storage heaters.",
"controls_prefix": ""
},
"dual": None
}
}
@ -109,6 +134,10 @@ class HeatingRecommender:
hhr_suitable = no_mains or self.has_electric_heating_description or self.has_room_heaters
hhr_suitable = hhr_suitable and (
"underfloor heating" not in self.property.main_heating["clean_description"]
)
return (
hhr_suitable and (not ashp_only_heating_recommendation) and not self.has_ashp and
("high_heat_retention_storage_heater" in measures)
@ -165,7 +194,8 @@ class HeatingRecommender:
) and
(not ashp_only_heating_recommendation) and
("boiler_upgrade" in measures) and
(not self.has_ashp)
(not self.has_ashp) and
(not self.property.main_heating["has_warm_air"])
)
return is_valid, has_gas_boiler
@ -487,17 +517,30 @@ class HeatingRecommender:
]
# This is a map from the heating controls description to the description of the air source heat pump set up
ashp_descriptions = {
"Time and temperature zone control": (
f"Install a {ashp_size}KW air source heat pump, and upgrade heating controls to Smart Thermostats, "
"room sensors and smart radiator valves (time & temperature zone control). Ensure you have an 18 or "
"24 hour tariff"
),
"Programmer, TRVs and bypass": (
f"Install a {ashp_size}KW air source heat pump, with programmer, TRVs and a Bypass valve. Ensure you "
"have an 18 or 24 hour tariff"
),
}
if ashp_size is None:
ashp_descriptions = {
"Time and temperature zone control": (
f"Install two cascaded air source heat pumps, and upgrade heating controls to Smart Thermostats, "
"room sensors and smart radiator valves (time & temperature zone control). Ensure you have an 18 "
"or "
"24 hour tariff"
)
}
else:
ashp_descriptions = {
"Time and temperature zone control": (
f"Install a {ashp_size}KW air source heat pump, and upgrade heating controls to Smart Thermostats, "
"room sensors and smart radiator valves (time & temperature zone control). Ensure you have an 18 "
"or "
"24 hour tariff"
),
"Programmer, TRVs and bypass": (
f"Install a {ashp_size}KW air source heat pump, with programmer, TRVs and a Bypass valve. Ensure "
f"you "
"have an 18 or 24 hour tariff"
),
}
new_heating_description = "Air source heat pump, radiators, electric"
new_hot_water_description = "From main system"
@ -924,6 +967,7 @@ class HeatingRecommender:
return recommendations
self.heating_recommendations.extend(recommendations)
return None
@staticmethod
def estimate_boiler_size(property_type, built_form, floor_area, floor_height, num_heated_rooms):

View file

@ -679,7 +679,7 @@ class Recommendations:
# Handle the case of community schemes
if (heating_description == "Community scheme") or (hotwater_description == "Community scheme"):
if main_fuel_description == "mains gas (community)":
if main_fuel_description in ["mains gas (community)", "UNKNOWN"]:
return {
"heating_fuel_type": "Natural Gas (Community Scheme)",
"hotwater_fuel_type": "Natural Gas (Community Scheme)",

View file

@ -191,11 +191,22 @@ class RoofRecommendations:
non_invasive_recommendations = self.property.non_invasive_recommendations
# We check a specific condition - which will imply loft insulation isn't appropriate but room in roof
# insulation is
# 1) We have an uninsulated loft (assumed)
# 2) We have a non-intrusive recommendation for room in roof insulation
rir_over_loft = (
self.property.roof["is_pitched"] and
self.property.roof["insulation_thickness"] == "none" and
"room_in_roof_insulation" in [x["type"] for x in non_invasive_recommendations]
)
# We firstly handle non-intrusive recommendations, which may override the normal roof insulation recommendations
if ("loft_insulation" in [x["type"] for x in non_invasive_recommendations]) or (
self.property.roof["is_pitched"] and "loft_insulation" in measures and
not self.property.roof["is_at_rafters"]
):
) and not rir_over_loft:
self.recommend_roof_insulation(
u_value=u_value,
insulation_thickness=self.insulation_thickness,
@ -223,7 +234,8 @@ class RoofRecommendations:
# There are cases where the property might have a room roof as the second roof, but we have a recommendation for
# it, so we allow this override
if self.property.roof["is_roof_room"] and ("room_roof_insulation" in measures) or (
"room_roof_insulation" in [x["type"] for x in non_invasive_recommendations]
"room_roof_insulation" in [x["type"] for x in non_invasive_recommendations] or
rir_over_loft
):
self.recommend_room_roof_insulation(u_value, phase, default_u_values)
return
@ -502,7 +514,7 @@ class RoofRecommendations:
# and the cost of the materials
rir_non_invasive_recommendation = next(
(x for x in self.property.non_invasive_recommendations if x["type"] == "room_roof_insulation"), {}
(x for x in self.property.non_invasive_recommendations if x["type"] == "room_in_roof_insulation"), {}
)
insulation_materials = pd.DataFrame(self.room_roof_insulation_materials)

View file

@ -10,11 +10,6 @@ class VentilationRecommendations(Definitions):
crucial for prevent overheating risks in warmer months
"""
VENTILATION_DESCRIPTIONS = [
'mechanical, extract only',
'mechanical, supply and extract'
]
def __init__(
self,
property_instance: Property,
@ -26,9 +21,6 @@ class VentilationRecommendations(Definitions):
self.recommendation = None
self.materials = [part for part in materials if part["type"] == "mechanical_ventilation"]
def identify_ventilation(self):
self.has_ventilaion = self.property.data["mechanical-ventilation"] in self.VENTILATION_DESCRIPTIONS
def recommend(self, phase):
"""
If there is no ventilation, we recommend installing ventilation
@ -38,8 +30,8 @@ class VentilationRecommendations(Definitions):
:return:
"""
self.identify_ventilation()
if self.has_ventilaion:
self.property.identify_ventilation()
if self.property.has_ventilation:
return
if len(self.materials) != 1:

View file

@ -47,19 +47,19 @@ def prepare_input_measures(property_recommendations, goal, needs_ventilation):
# We bundle the impact of ventilation with the measure
total = (
rec["total"] + ventilation_recommendation["total"]
if rec["type"] in assumptions.measures_needing_ventilation
if rec["type"] in assumptions.measures_needing_ventilation and needs_ventilation
else rec["total"]
)
gain = (
rec[goal_key] + ventilation_recommendation[goal_key]
if rec["type"] in assumptions.measures_needing_ventilation
if rec["type"] in assumptions.measures_needing_ventilation and needs_ventilation
else rec[goal_key]
)
rec_type = (
"+".join(
[rec["type"], ventilation_recommendation["type"]]
) if rec["type"] in assumptions.measures_needing_ventilation
) if rec["type"] in assumptions.measures_needing_ventilation and needs_ventilation
else rec["type"]
)

View file

@ -49,6 +49,6 @@ class TestLightingRecommendations:
'lighting in all '
'fixed outlets',
'low-energy-lighting': 100},
'total': 240.24, 'subtotal': 200.20000000000002, 'vat': 40.040000000000006, 'contingency': 14.3,
'preliminaries': 14.3, 'material': 80.0, 'profit': 28.6, 'labour_hours': 3.2, 'labour_days': 0.4,
'labour_cost': 63.0, 'survey': False}]
'total': 188.76000000000002, 'subtotal': 157.3, 'vat': 31.460000000000004, 'contingency': 14.3,
'material': 80.0, 'labour_hours': 3.2, 'labour_days': 0.4, 'labour_cost': 63.0, 'survey': False}
]

View file

@ -0,0 +1,71 @@
"""
This is a script for preparing a sample for testing the end to end process, so that when Spring send us
data, we know it will work.
"""
import pandas as pd
from utils.s3 import read_csv_from_s3
birmingham_epcs = pd.read_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/sfr/Spring JV/domestic-E08000025-Birmingham/certificates.csv"
)
# We get the newest EPC, by UPRN and LODGEMENT_DATE
birmingham_epcs['LODGEMENT_DATE'] = pd.to_datetime(birmingham_epcs['LODGEMENT_DATE'])
birmingham_epcs = birmingham_epcs.sort_values(
by=['UPRN', 'LODGEMENT_DATE'],
ascending=[True, False]
).drop_duplicates(subset='UPRN')
birmingham_epcs["postal_region"] = birmingham_epcs["POSTCODE"].str.split(" ").str[0]
addressable_market = birmingham_epcs[
(birmingham_epcs['CURRENT_ENERGY_RATING'].isin(['F', 'G', 'E', 'D'])) &
(birmingham_epcs['LODGEMENT_DATE'] >= '2020-01-01') &
(birmingham_epcs['PROPERTY_TYPE'].isin(['House', 'Bungalow'])) &
(birmingham_epcs['TENURE'].isin(
['rental (private)', 'Rented (private)']
))
]
# We take the Spring portfolio and remove the properties in their sample
asset_list = read_csv_from_s3(bucket_name="retrofit-plan-inputs-dev", filepath='8/206/asset_list.csv')
asset_list = pd.DataFrame(asset_list)
asset_list["postal_region"] = asset_list["postcode"].str.split(" ").str[0]
addressable_market = addressable_market[
~addressable_market["UPRN"].astype(int).astype(str).isin(asset_list["uprn"].values)
]
addressable_market = addressable_market[
addressable_market["postal_region"].isin(asset_list["postal_region"].unique())
]
# Take a sample of properties, EPC F or G, EPC lodged in 2025. We focus on houses/bingalows
sample = birmingham_epcs[
(birmingham_epcs['CURRENT_ENERGY_RATING'].isin(['F', 'G'])) &
(birmingham_epcs['LODGEMENT_DATE'] >= '2025-01-01') &
(birmingham_epcs['PROPERTY_TYPE'].isin(['House', 'Bungalow']))
]
# Prepare the sample, with just the columns we would expect to receive from Spring
# 1) UPRN
# 2) Address
# 3) Postcode
# 4) Property type
# 5) Built form
# 6) Number of bedrooms (we'll simulate this)
# 7) Number of bathrooms (we'll simulate this)
# 8) Valuation (We'll simulate this, around 200,000)
sample = sample[['UPRN', 'ADDRESS', 'POSTCODE', 'PROPERTY_TYPE', 'BUILT_FORM']].copy()
sample['BEDROOMS'] = 3 # Simulating number of bedrooms
sample['BATHROOMS'] = 1 # Simulating number of bathrooms
sample['VALUATION'] = 200000 # Simulating valuation
sample.columns = [x.lower() for x in sample.columns]
# Store this as a excel
sample.to_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/sfr/Spring JV/birmingham_sample.xlsx",
index=False
)

View file

@ -0,0 +1,124 @@
"""
This script prepares the data for the principal pitch modelling
"""
import os
import pandas as pd
from dotenv import load_dotenv
from utils.s3 import save_csv_to_s3
from etl.find_my_epc.AssetListEpcData import AssetListEpcData
load_dotenv(dotenv_path="backend/.env")
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
PORTFOLIO_ID = 206
USER_ID = 8
EPC_TARGET = "C"
# Read the input file
properties = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/sfr/Spring JV/Birmingham_price_top300.xlsx"
)
# Keep just the D's and below
properties = properties[properties["current_energy_rating"].isin(["D", "E", "F", "G"])].copy()
# Focus on houses
properties = properties[properties["property_type_std"] != "Flat"]
properties = properties[properties["property_type"] != "flat"]
# Rename the key columns
properties = properties.rename(
columns={
"address1": "address",
"number_of_bathrooms": "n_bathrooms",
"num_beds": "n_bedrooms"
}
)
properties["patch"] = True
# Pull the non-invasive recommendations
asset_list_epc_client = AssetListEpcData(
asset_list=properties,
epc_auth_token=EPC_AUTH_TOKEN
)
asset_list_epc_client.get_data()
asset_list_epc_client.get_non_invasive_recommendations()
asset_list_epc_client.get_patch()
extracted_df = pd.DataFrame(asset_list_epc_client.extracted_data)
epc_df = pd.DataFrame(asset_list_epc_client.epc_data)
# Find examples where patches are different to the api
compare_epc = []
for patch in asset_list_epc_client.patches:
extracted = extracted_df[extracted_df["uprn"] == patch["uprn"]].squeeze()
epc = epc_df[epc_df["uprn"] == patch["uprn"]].squeeze()
compare_epc.append(
{
"uprn": extracted["uprn"],
"address": extracted["address"],
"postcode": extracted["postcode"],
"api_epc": int(extracted["current_epc_efficiency"]),
"fme_epc": int(epc["current-energy-efficiency"]),
}
)
compare_epc = pd.DataFrame(compare_epc)
diff = compare_epc[compare_epc["api_epc"] != compare_epc["fme_epc"]]
# Compare matched addresses to make sure they are the same
compare_addresses = extracted_df[["address", "postcode", "uprn"]].merge(
epc_df[["uprn", "address1", "postcode"]].rename(columns={"address1": "epc_address1", "postcode": "epc_postcode"}),
how="left",
on=["uprn"]
)
# Add on uprn
properties = properties.merge(
extracted_df[["address", "postcode", "uprn"]],
how="left",
on=["address", "postcode"]
)
# Store the asset list in s3
filename = f"{USER_ID}/{PORTFOLIO_ID}/asset_list.csv"
save_csv_to_s3(
dataframe=properties,
bucket_name="retrofit-plan-inputs-dev",
file_name=filename
)
# Store non-invasive recommendations in S3
non_invasive_recommendations_filename = f"{USER_ID}/{PORTFOLIO_ID}/non_invasive_recommendations.csv"
save_csv_to_s3(
dataframe=pd.DataFrame(asset_list_epc_client.non_invasive_recommendations),
bucket_name="retrofit-plan-inputs-dev",
file_name=non_invasive_recommendations_filename
)
# Store patches in S3
patches_filename = ""
if asset_list_epc_client.patches:
patches_filename = f"{USER_ID}/{PORTFOLIO_ID}/patches.csv"
save_csv_to_s3(
dataframe=pd.DataFrame(asset_list_epc_client.patches),
bucket_name="retrofit-plan-inputs-dev",
file_name=patches_filename
)
body = {
"portfolio_id": str(PORTFOLIO_ID),
"housing_type": "Private",
"goal": "Increasing EPC",
"goal_value": "C",
"trigger_file_path": filename,
"already_installed_file_path": "",
"patches_file_path": patches_filename,
"non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
"valuation_file_path": "",
"scenario_name": "EPC C",
"multi_plan": True,
"budget": None,
"ashp_cop": 3.5,
# This is new - when optimising, we drop scores by a few points to account for SAP 10
"simulate_sap_10": True,
"exclusions": ["external_wall_insulation"],
"required_measures": ["cavity_wall_insulation", "loft_insulation"]
}
print(body)

View file

@ -0,0 +1,224 @@
"""
This script prepares the data for the financial model
"""
import pandas as pd
from backend.app.utils import sap_to_epc
from sqlalchemy.orm import sessionmaker
from backend.app.db.connection import db_engine
from backend.app.db.models.recommendations import Recommendation, Plan, PlanRecommendations
from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel
PORTFOLIO_ID = 206
SCENARIOS = [389]
def get_data(portfolio_id, scenario_ids):
session = sessionmaker(bind=db_engine)()
session.begin()
# Get properties and their details for a specific portfolio
properties_query = session.query(
PropertyModel,
PropertyDetailsEpcModel
).join(
PropertyDetailsEpcModel, PropertyModel.id == PropertyDetailsEpcModel.property_id
).filter(
PropertyModel.portfolio_id == portfolio_id # Filter by portfolio ID
).all()
# Transform properties data to include all fields dynamically
properties_data = [
{**{col.name: getattr(prop.PropertyModel, col.name) for col in PropertyModel.__table__.columns},
**{col.name: getattr(prop.PropertyDetailsEpcModel, col.name) for col in
PropertyDetailsEpcModel.__table__.columns}}
for prop in properties_query
]
# Get property IDs from fetched properties
# Get plans linked to the fetched properties
plans_query = session.query(Plan).filter(Plan.scenario_id.in_(scenario_ids)).all()
# Transform plans data to include all fields dynamically
plans_data = [
{col.name: getattr(plan, col.name) for col in Plan.__table__.columns}
for plan in plans_query
]
# Extract plan IDs for filtering recommendations through PlanRecommendations
plan_ids = [plan['id'] for plan in plans_data]
# Get recommendations through PlanRecommendations for those plans and that are default
recommendations_query = session.query(
Recommendation,
Plan.scenario_id
).join(
PlanRecommendations, Recommendation.id == PlanRecommendations.recommendation_id
).join(
Plan, Plan.id == PlanRecommendations.plan_id # Join with Plan to access scenario_id
).filter(
PlanRecommendations.plan_id.in_(plan_ids),
Recommendation.default == True # Filtering for default recommendations
).all()
# Transform recommendations data to include all fields dynamically and include scenario_id
recommendations_data = [
{**{col.name: getattr(rec.Recommendation, col.name) if hasattr(rec, 'Recommendation') else getattr(rec,
col.name) for
col in Recommendation.__table__.columns},
"Scenario ID": rec.scenario_id}
for rec in recommendations_query
]
session.close()
return properties_data, plans_data, recommendations_data
properties_data, plans_data, recommendations_data = get_data(portfolio_id=PORTFOLIO_ID, scenario_ids=SCENARIOS)
properties_df = pd.DataFrame(properties_data)
plans_df = pd.DataFrame(plans_data)
recommendations_df = pd.DataFrame(recommendations_data)
recommended_measures_df = recommendations_df[
["property_id", "measure_type", "estimated_cost", "default"]
]
recommended_measures_df = recommended_measures_df[recommended_measures_df["default"]]
recommended_measures_df = recommended_measures_df.drop(columns=["default"])
post_install_sap = recommendations_df[["property_id", "default", "sap_points"]]
post_install_sap = post_install_sap[post_install_sap["default"]]
# Sum up the sap points by property id
post_install_sap = post_install_sap.groupby("property_id")[["sap_points"]].sum().reset_index()
recommendations_measures_pivot = recommended_measures_df.pivot(
index='property_id',
columns='measure_type',
values='estimated_cost'
)
recommendations_measures_pivot = recommendations_measures_pivot.reset_index()
# Total cost is the row sum, excluding the property_id column
recommendations_measures_pivot["total_retrofit_cost"] = recommendations_measures_pivot.drop(
columns=["property_id"]
).sum(axis=1)
df = properties_df[
[
"property_id", "uprn", "address", "postcode", "property_type", "walls", "roof", "heating", "windows",
"current_epc_rating",
"current_sap_points", "total_floor_area", "number_of_rooms",
]
].merge(
recommendations_measures_pivot, how="left", on="property_id"
).merge(
post_install_sap, how="left", on="property_id"
)
df = df.drop(columns=["property_id"])
df["sap_points"] = df["sap_points"].fillna(0)
df["predicted_post_works_sap"] = df["current_sap_points"] + df["sap_points"]
df["predicted_post_works_sap"] = df["predicted_post_works_sap"].round()
df["predicted_post_works_epc"] = df["predicted_post_works_sap"].apply(lambda x: sap_to_epc(x))
# We merge this back to the main dataframe, which will contain the bathrooms
from utils.s3 import read_csv_from_s3
asset_list = read_csv_from_s3(bucket_name="retrofit-plan-inputs-dev", filepath='8/206/asset_list.csv')
asset_list = pd.DataFrame(asset_list)
df["uprn"] = df["uprn"].astype(str)
asset_list = asset_list.merge(
df.drop(columns=["address", "postcode", "property_type", "total_floor_area"]),
how="left",
on="uprn"
)
condition_costs = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/sfr/Spring JV/Condition costs.xlsx",
sheet_name="Prices - Khalim",
header=35
)
# Remove unnamed columns and reset index
condition_costs = condition_costs.loc[:, ~condition_costs.columns.str.contains('^Unnamed')]
condition_costs = condition_costs.reset_index(drop=True)
# We now estimate condition cost
def simulate_condition(asset_list, condition_costs):
"""
This function is for testing, and will simulate condition cost from 1-10 for each property to see what the
costing array looks like.
:param df:
:return:
"""
condition_df = []
for _, row in asset_list.iterrows():
n_bathrooms = row["bathrooms"]
conditions = {}
for condition in reversed(range(1, 11)):
condition_cost = condition_costs[
condition_costs["Condition"] == condition
].drop(columns=["Condition"]).iloc[0]
# Each cost is scaled by floor area
condition_cost = condition_cost * row["total_floor_area"]
condition_cost["Bathroom"] = condition_cost["Bathroom"] * n_bathrooms
total_condition_cost = condition_cost.sum()
conditions["Condition " + str(condition)] = (total_condition_cost)
condition_df.append(
{
"uprn": row["uprn"],
**conditions
}
)
condition_df = pd.DataFrame(condition_df)
asset_list = asset_list.merge(
condition_df,
how="left",
on="uprn"
)
return asset_list
# asset_list = simulate_condition(asset_list, condition_costs)
# We calculate the condition cost based on the condition
for _, row in asset_list.iterrows():
condition = row["condition_score"]
if condition in [None, ""]:
continue
condition = int(float(condition))
condition_cost = condition_costs[
condition_costs["Condition"] == condition
].drop(columns=["Condition"]).iloc[0]
# Each cost is scaled by floor area
condition_cost = condition_cost * float(row["total_floor_area"])
n_bathrooms = row["n_bathrooms"]
condition_cost["Bathroom"] = condition_cost["Bathroom"] * float(n_bathrooms)
total_condition_cost = condition_cost.sum()
asset_list.loc[asset_list["uprn"] == row["uprn"], "domna_condition_cost"] = total_condition_cost
# Store output
asset_list.to_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/sfr/Spring JV/20250624_portfolio_retrofit_packages.xlsx",
index=False
)
condition_cost_comparison = asset_list[
["condition_score", "decoration_sum_min ", "decoration_sum_max", "domna_condition_cost"]
]